def alpha_compose(self, src_ctx, dst_ctx): # Assume src_ctx has fields: bg_image, box # Assume dst_ctx is from val_imdb src_img = cv2.imread(src_ctx['bg_image'], cv2.IMREAD_COLOR) dst_img = cv2.imread(dst_ctx['image'], cv2.IMREAD_COLOR) dst_alpha = cv2.imread(dst_ctx['alpha'], cv2.IMREAD_GRAYSCALE) src_xyxy = src_ctx['box']; dst_xyxy = dst_ctx['box'] src_width = src_img.shape[1]; src_height = src_img.shape[0] dst_width = dst_img.shape[1]; dst_height = dst_img.shape[0] # resize the target image to align the heights of the bboxes factor = float(src_xyxy[3] - src_xyxy[1] + 1)/float(dst_xyxy[3] - dst_xyxy[1] + 1) dst_width = int(dst_width * factor); dst_height = int(dst_height * factor) dst_img = cv2.resize(dst_img, (dst_width, dst_height)) dst_alpha = cv2.resize(dst_alpha, (dst_width, dst_height)) dst_alpha = dst_alpha.astype(np.float)/255.0 dst_xyxy = factor * dst_xyxy src_xywh = ds_utils.xyxy_to_xywh(src_xyxy.reshape((1,4))).squeeze() dst_xywh = ds_utils.xyxy_to_xywh(dst_xyxy.reshape((1,4))).squeeze() # anchors that should match (the standing points) src_anchor = src_xywh[:2]; dst_anchor = dst_xywh[:2] offset = (src_anchor - dst_anchor).astype(np.int) # dilate the target patch a bit to include the blending region dst_bb = ds_utils.expand_xyxy(dst_xyxy.reshape((1,4)), dst_width, dst_height, ratio=0.2).squeeze().astype(np.int) src_bb = dst_bb.copy() src_bb[:2] = dst_bb[:2] + offset src_bb[2:] = dst_bb[2:] + offset # in case the bbox of the target object is beyond the boundaries of the source image if src_bb[0] < 0: dst_bb[0] -= src_bb[0]; src_bb[0] = 0 if src_bb[1] < 0: dst_bb[1] -= src_bb[1]; src_bb[1] = 0 if src_bb[2] > src_width - 1: dst_bb[2] -= src_bb[2] - src_width + 1; src_bb[2] = src_width - 1 if src_bb[3] > src_height - 1: dst_bb[3] -= src_bb[3] - src_height + 1; src_bb[3] = src_height - 1 output_mask = np.zeros((src_height, src_width), dtype=np.float) output_image = src_img.copy() alpha_patch = dst_alpha[dst_bb[1]:(dst_bb[3]+1), dst_bb[0]:(dst_bb[2]+1)] src_patch = src_img[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1),:] dst_patch = dst_img[dst_bb[1]:(dst_bb[3]+1), dst_bb[0]:(dst_bb[2]+1),:] output_mask[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1)] = alpha_patch output_image[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1),:] = \ np.expand_dims(1.0 - alpha_patch, axis=-1) * src_patch + \ np.expand_dims(alpha_patch, axis=-1) * dst_patch # cv2.rectangle(output_image, (src_xyxy[0], src_xyxy[1]), (src_xyxy[2], src_xyxy[3]), \ # (255, 0, 0), 1) return output_image.astype(np.uint8), output_mask
def build_search_tree(self, ctxdb, mode=0): num_samples = len(ctxdb) if mode == 0: # hybrid mode X = np.zeros((num_samples, 4 + 2 * cfg.FEAT_DIMS[-1])) else: X = np.zeros((num_samples, 4 + cfg.FEAT_DIMS[-1])) for i in range(num_samples): ctx = ctxdb[i] box = ctx['box'].copy() xywh = ds_utils.xyxy_to_xywh(box.reshape( (1, 4))).squeeze().astype(np.float) with open(ctx['crop_feat'], 'rb') as fid: crop_feat = cPickle.load(fid).flatten() with open(ctx['full_feat'], 'rb') as fid: full_feat = cPickle.load(fid).flatten() if mode == 0: X[i, :] = np.concatenate((xywh, crop_feat, full_feat)) elif mode == 1: X[i, :] = np.concatenate((xywh, crop_feat)) else: X[i, :] = np.concatenate((xywh, full_feat)) # if mode == 0: # return BallTree(X, leaf_size=30, metric=cus_distance_hybrid) # else: # return BallTree(X, leaf_size=30, metric=cus_distance) return BallTree(X, leaf_size=30, metric=cus_distance)
def build_search_tree(self, ctxdb, mode = 0): num_samples = len(ctxdb) if mode == 0: # hybrid mode X = np.zeros((num_samples, 4 + 2 * cfg.FEAT_DIMS[-1])) else: X = np.zeros((num_samples, 4 + cfg.FEAT_DIMS[-1])) for i in range(num_samples): ctx = ctxdb[i] box = ctx['box'].copy() xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze().astype(np.float) with open(ctx['crop_feat'], 'rb') as fid: crop_feat = cPickle.load(fid).flatten() with open(ctx['full_feat'], 'rb') as fid: full_feat = cPickle.load(fid).flatten() if mode == 0: X[i, :] = np.concatenate((xywh, crop_feat, full_feat)) elif mode == 1: X[i, :] = np.concatenate((xywh, crop_feat)) else: X[i, :] = np.concatenate((xywh, full_feat)) # if mode == 0: # return BallTree(X, leaf_size=30, metric=cus_distance_hybrid) # else: # return BallTree(X, leaf_size=30, metric=cus_distance) return BallTree(X, leaf_size=30, metric=cus_distance)
def inference_ctx(self, ctx, mode, ctx_tree, K): # Assume ctx has fields: bg_image, box full_resolution = cfg.RETRIEVAL_RESOLUTION crop_resolution = [ full_resolution[0] / 2, full_resolution[1] / 2, full_resolution[2] ] box = ctx['box'].copy().astype(np.int) img = cv2.imread(ctx['bg_image'], cv2.IMREAD_COLOR) # if ctx.get('flipped', False): # img = cv2.flip(img, 1) full_img = cv2.resize(img, (full_resolution[1], full_resolution[0])) full_img = np.expand_dims(full_img, axis=0) - cfg.PIXEL_MEANS.reshape( (1, 1, 1, 3)) full_feat = self.model.predict(full_img).flatten() # img[box[1]:(box[3] + 1), box[0]:(box[2] + 1), :] = cfg.PIXEL_MEANS.reshape((1,1,3)) crop_img = ds_utils.crop_and_resize(img, box.astype(np.float), full_resolution, crop_resolution) crop_img = np.expand_dims(crop_img, axis=0) - cfg.PIXEL_MEANS.reshape( (1, 1, 1, 3)) crop_feat = self.model.predict(crop_img).flatten() xywh = ds_utils.xyxy_to_xywh(box.reshape( (1, 4))).squeeze().astype(np.float) if mode == 0: feat = np.concatenate((xywh, crop_feat, full_feat)) elif mode == 1: feat = np.concatenate((xywh, crop_feat)) else: feat = np.concatenate((xywh, full_feat)) return self.inference_feature(feat, ctx_tree, K)
def inference_ctx(self, ctx, mode, ctx_tree, K): # Assume ctx has fields: bg_image, box full_resolution = cfg.RETRIEVAL_RESOLUTION crop_resolution = [full_resolution[0]/2, full_resolution[1]/2, full_resolution[2]] box = ctx['box'].copy().astype(np.int) img = cv2.imread(ctx['bg_image'], cv2.IMREAD_COLOR) # if ctx.get('flipped', False): # img = cv2.flip(img, 1) full_img = cv2.resize(img, (full_resolution[1], full_resolution[0])) full_img = np.expand_dims(full_img, axis=0) - cfg.PIXEL_MEANS.reshape((1,1,1,3)) full_feat = self.model.predict(full_img).flatten() # img[box[1]:(box[3] + 1), box[0]:(box[2] + 1), :] = cfg.PIXEL_MEANS.reshape((1,1,3)) crop_img = ds_utils.crop_and_resize(img, box.astype(np.float), full_resolution, crop_resolution) crop_img = np.expand_dims(crop_img, axis=0) - cfg.PIXEL_MEANS.reshape((1,1,1,3)) crop_feat = self.model.predict(crop_img).flatten() xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze().astype(np.float) if mode == 0: feat = np.concatenate((xywh, crop_feat, full_feat)) elif mode == 1: feat = np.concatenate((xywh, crop_feat)) else: feat = np.concatenate((xywh, full_feat)) return self.inference_feature(feat, ctx_tree, K)
def draw_position_histogram(self, ctxdb=None): resolution = [15, 15] bins = np.zeros((resolution[0], resolution[1])) X = [] Y = [] if ctxdb == None: ctxdb = self.objdb num_samples = len(ctxdb) for i in range(num_samples): entry = ctxdb[i] xyxy = np.array(entry['box']).copy() width = entry['width'] height = entry['height'] max_dim = np.maximum(width, height) ox = int(0.5 * (max_dim - width)) oy = int(0.5 * (max_dim - height)) xyxy[0] += ox xyxy[1] += oy xyxy[2] += ox xyxy[3] += oy xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten() xywh /= float(max_dim) scaled_xy = np.ceil(xywh[:2] * resolution[0]) scaled_xy = np.maximum(0, scaled_xy - 1).astype(np.int) bins[scaled_xy[1], scaled_xy[0]] += 1.0 X = X + [xywh[0]] Y = Y + [1.0 - xywh[1]] if i % 1000 == 0: print i plt.switch_backend('agg') fig = plt.figure() plt.hist2d(X, Y, 15, range=[[0.0, 1.0], [0.0, 1.0]]) plt.colorbar() plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) # plt.title('offset: %s vs %s'%(self.classes[i], self.classes[j])) plt.grid(True) fig.savefig('gt_pos_hist.jpg', bbox_inches='tight') plt.close(fig) print 'Done'
def draw_position_histogram(self, ctxdb=None): resolution = [15, 15] bins = np.zeros((resolution[0], resolution[1])) X = [] Y = [] if ctxdb == None: ctxdb = self.objdb num_samples = len(ctxdb) for i in range(num_samples): entry = ctxdb[i] xyxy = np.array(entry['box']).copy() width = entry['width'] height = entry['height'] max_dim = np.maximum(width, height) ox = int(0.5 * (max_dim - width)) oy = int(0.5 * (max_dim - height)) xyxy[0] += ox; xyxy[1] += oy; xyxy[2] += ox; xyxy[3] += oy; xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten() xywh /= float(max_dim) scaled_xy = np.ceil(xywh[:2] * resolution[0]) scaled_xy = np.maximum(0, scaled_xy-1).astype(np.int) bins[scaled_xy[1], scaled_xy[0]] += 1.0 X = X + [xywh[0]] Y = Y + [1.0 - xywh[1]] if i%1000 == 0: print i plt.switch_backend('agg') fig = plt.figure() plt.hist2d(X, Y, 15, range=[[0.0, 1.0], [0.0, 1.0]]) plt.colorbar() plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) # plt.title('offset: %s vs %s'%(self.classes[i], self.classes[j])) plt.grid(True) fig.savefig('gt_pos_hist.jpg', bbox_inches='tight') plt.close(fig) print 'Done'
def _filter_crowd_proposals(roidb, crowd_thresh): """ Finds proposals that are inside crowd regions and marks them with overlap = -1 (for all gt rois), which means they will be excluded from training. """ for ix, entry in enumerate(roidb): overlaps = entry['gt_overlaps'].toarray() crowd_inds = np.where(overlaps.max(axis=1) == -1)[0] non_gt_inds = np.where(entry['gt_classes'] == 0)[0] if len(crowd_inds) == 0 or len(non_gt_inds) == 0: continue iscrowd = [int(True) for _ in xrange(len(crowd_inds))] crowd_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :]) non_gt_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :]) ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd) bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0] overlaps[non_gt_inds[bad_inds], :] = -1 roidb[ix]['gt_overlaps'] = scipy.sparse.csr_matrix(overlaps) return roidb
def get_minibatch(self, square=True): # outputs: resized images, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE grid_shape = cfg.GRID_SHAPE resolution = cfg.RESOLUTION ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] width = obj['width'] height = obj['height'] box = obj['box'].copy() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze() # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y width = height = img.shape[0] nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)), grid_shape).squeeze() # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) grids[i, :] = grid boxes[i, :] = nxywh return images, boxes, grids
def get_minibatch(self, square=True): # outputs: resized images, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE grid_shape = cfg.GRID_SHAPE resolution = cfg.RESOLUTION ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] width = obj['width'] height = obj['height'] box = obj['box'].copy() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze() # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y width = height = img.shape[0] nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze() # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) grids[i, :] = grid boxes[i, :] = nxywh return images, boxes, grids
def draw_heatmap(self, ctxdb=None): if ctxdb == None: ctxdb = self.objdb scale = 15 areas = np.zeros((scale, )) count = cfg.EPS * np.ones((scale, )) num_samples = len(ctxdb) for i in range(num_samples): entry = ctxdb[i] xyxy = np.array(entry['box']).copy() width = entry['width'] height = entry['height'] max_dim = np.maximum(width, height) ox = int(0.5 * (max_dim - width)) oy = int(0.5 * (max_dim - height)) xyxy[0] += ox xyxy[1] += oy xyxy[2] += ox xyxy[3] += oy xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten() xywh /= float(max_dim) area = xywh[2] * xywh[3] scaled_xy = np.ceil(xywh[:2] * scale) scaled_xy = np.maximum(0, scaled_xy - 1).astype(np.int) areas[scaled_xy[1]] += area count[scaled_xy[1]] += 1.0 if i % 1000 == 0: print i areas = np.divide(areas, count) heatmap = self.areas_to_heatmap(areas) heatmap = cv2.resize(heatmap, (512, 512)) cv2.imwrite('heatmap.png', heatmap)
def draw_heatmap(self, ctxdb=None): if ctxdb == None: ctxdb = self.objdb scale = 15 areas = np.zeros((scale,)) count = cfg.EPS * np.ones((scale,)) num_samples = len(ctxdb) for i in range(num_samples): entry = ctxdb[i] xyxy = np.array(entry['box']).copy() width = entry['width'] height = entry['height'] max_dim = np.maximum(width, height) ox = int(0.5 * (max_dim - width)) oy = int(0.5 * (max_dim - height)) xyxy[0] += ox; xyxy[1] += oy; xyxy[2] += ox; xyxy[3] += oy; xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten() xywh /= float(max_dim) area = xywh[2] * xywh[3] scaled_xy = np.ceil(xywh[:2] * scale) scaled_xy = np.maximum(0, scaled_xy-1).astype(np.int) areas[scaled_xy[1]] += area count[scaled_xy[1]] += 1.0 if i%1000 == 0: print i areas = np.divide(areas, count) heatmap = self.areas_to_heatmap(areas) heatmap = cv2.resize(heatmap, (512, 512)) cv2.imwrite('heatmap.png', heatmap)
def draw_ratio_histogram(self, ctxdb=None): if ctxdb == None: ctxdb = self.objdb R = [] num_samples = len(ctxdb) for i in range(num_samples): entry = ctxdb[i] xyxy = np.array(entry['box']).copy() width = entry['width'] height = entry['height'] max_dim = np.maximum(width, height) ox = int(0.5 * (max_dim - width)) oy = int(0.5 * (max_dim - height)) xyxy[0] += ox xyxy[1] += oy xyxy[2] += ox xyxy[3] += oy xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten() xywh /= float(max_dim) ratio = np.log(xywh[3] / xywh[2]) R = R + [ratio] if i % 1000 == 0: print i plt.switch_backend('agg') fig = plt.figure() plt.hist(R, 100, range=[0.0, 2.0]) plt.xlim([0.0, 2.0]) fig.savefig('gt_logratio_hist.jpg', bbox_inches='tight') plt.close(fig) print 'Done'
def draw_ratio_histogram(self, ctxdb=None): if ctxdb == None: ctxdb = self.objdb R = [] num_samples = len(ctxdb) for i in range(num_samples): entry = ctxdb[i] xyxy = np.array(entry['box']).copy() width = entry['width'] height = entry['height'] max_dim = np.maximum(width, height) ox = int(0.5 * (max_dim - width)) oy = int(0.5 * (max_dim - height)) xyxy[0] += ox; xyxy[1] += oy; xyxy[2] += ox; xyxy[3] += oy; xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten() xywh /= float(max_dim) ratio = np.log(xywh[3] / xywh[2]) R = R + [ratio] if i%1000 == 0: print i plt.switch_backend('agg') fig = plt.figure() plt.hist(R, 100, range=[0.0, 2.0]) plt.xlim([0.0, 2.0]) fig.savefig('gt_logratio_hist.jpg', bbox_inches='tight') plt.close(fig) print 'Done'
def get_rnn_minibatch(self, max_seq_len, square=True, vis=False): ####################################################################### # rename the config parameters to make the codes look clear batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.RESOLUTION grid_shape = cfg.GRID_SHAPE ####################################################################### # indices of the minibatch if self.roidb_cur + batch_size >= len(self.roidb): self.permute_roidb_indices() db_inds = self.roidb_perm[self.roidb_cur : self.roidb_cur + batch_size] self.roidb_cur += batch_size ####################################################################### ####################################################################### # to be returned objects = []; centers = []; ratios = []; masks = [] # normalized xywh representation bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) # grid box offset deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) ####################################################################### for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_path = rois['image'] width = rois['width'] height = rois['height'] gt_boxes = rois['boxes'].copy() gt_cats = rois['clses'].copy() areas = rois['seg_areas'] # number of instances should not exceed max_seq_len num_instances = min(gt_boxes.shape[0], max_seq_len) # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if rois['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) # sort the objects in the sequence based on their areas order = np.argsort(areas)[::-1] gt_boxes = gt_boxes[order, :] gt_cats = gt_cats[order] areas = areas[order] # print areas # [x1, y1, x2, y2] to [x, y, w, h] gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) gt_boxes[:,0] += offset_x gt_boxes[:,1] += offset_y width = height = img.shape[0] # normalize gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height) # truncate the sequences gt_boxes = gt_boxes[:num_instances, :] # discreted output positions grid_indices = ds_utils.xywh_to_index(gt_boxes, \ grid_shape[1], grid_shape[0]) # deltas between grid boxes and ground truth boxes grid_boxes = ds_utils.index_to_xywh(grid_indices, \ grid_shape[1], grid_shape[0]) grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes) # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) # use the last 'num_instances' objects bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0) # grid offsets deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0) # object indicators objects.append(gt_cats[:num_instances].tolist()) # masks for loss function masks.append(np.ones((num_instances, )).tolist()) # grid centers and sizes centers.append(grid_indices[:, 0].tolist()) ratios.append(grid_indices[:, 1].tolist()) # padding objects = pad_sequences(objects, maxlen=max_seq_len, padding='post', truncating='post', value=0.) centers = pad_sequences(centers, maxlen=max_seq_len, padding='post', truncating='post', value=0.) ratios = pad_sequences(ratios, maxlen=max_seq_len, padding='post', truncating='post', value=0.) masks = pad_sequences(masks, maxlen=max_seq_len, padding='post', truncating='post', value=0.) if vis: output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \ cfg.EXP_DIR, self.name, \ 'rnn_minibatch')) if not osp.exists(output_dir): os.makedirs(output_dir) for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_name, im_ext = osp.splitext(osp.basename(rois['image'])) msk = masks[i, :] # ground truth boxes ibb = bboxes[i, :, :].copy() iid = objects[i, :].copy() iim = images[i, :, :, :].copy() # grid bboxes grid_indices = np.vstack((centers[i,:], ratios[i,:])).transpose() gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0]) # regressed bboxes rbb = ds_utils.bbox_transform_inv(gbb, deltas[i,:,:]) # Denormalize ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0]) gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0]) rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0]) ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0]) gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0]) rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0]) # fontScale = 0.0007 * math.sqrt(float(\ # resolution[0]*resolution[0]+resolution[1]*resolution[1])) for j in xrange(ibb.shape[0]): if msk[j] == 0: break id = iid[j] cls = self.classes[id] # ground truth boxes bb = ibb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 255, 0), 2) # grid boxes bb = gbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (255, 0, 0), 1) # regressed boxes bb = rbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 0, 255), 1) # cv2.putText(iim, '{:}_{:}'.format(j, cls), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, '%06d_'%i+im_name+'.jpg') cv2.imwrite(output_path, iim) return images, objects, bboxes, deltas, centers, ratios, masks
def get_scene_minibatch(self, square=True): # outputs: resized images, layouts, segmentations, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE num_clses = self.num_classes-1 ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) scenes = np.zeros((batch_size, resolution[0], \ resolution[1], num_clses), dtype=np.float32) segs = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] seg_path = obj['out_seg'] width = obj['width'] height = obj['height'] box = obj['box'].copy() all_boxes = obj['all_boxes'].copy().reshape((-1,4)).astype(np.int) all_clses = obj['all_clses'].copy().flatten() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) seg = cv2.imread(seg_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) seg = cv2.flip(seg, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze() ex_box = box.copy().flatten().astype(np.int) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y ex_box[0] += offset_x ex_box[1] += offset_y ex_box[2] += offset_x ex_box[3] += offset_y all_boxes[:, 0] += offset_x all_boxes[:, 1] += offset_y all_boxes[:, 2] += offset_x all_boxes[:, 3] += offset_y width = height = img.shape[0] seg, offset_x, offset_y = \ ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS) nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze() # images of the same shape images[i] = cv2.resize(img, (resolution[1], resolution[0])) segs[i] = cv2.resize(seg, (resolution[1], resolution[0])) factor = float(resolution[0])/width all_boxes = (factor * all_boxes).astype(np.int) ex_box = (factor * ex_box).astype(np.int) scenes[i] = ds_utils.create_scenes(resolution[1], resolution[0], all_boxes, all_clses, ex_box=ex_box, n_cls=num_clses) grids[i, :] = grid boxes[i, :] = nxywh return images, scenes, segs, boxes, grids
def draw_binary_correlation_stat_graph(self, output_dir, roidb=None): # Create the output directory if necessary if not osp.exists(output_dir): os.makedirs(output_dir) if not osp.exists(osp.join(output_dir, 'images')): os.makedirs(osp.join(output_dir, 'images')) # Cache files present_cache_file = osp.join(self.cache_path, self.name + '_present_stats.pkl') correlation_cache_file = osp.join(self.cache_path, self.name + '_correlation_stats.pkl') # Load cache files if they exist if osp.exists(present_cache_file) and osp.exists( correlation_cache_file): with open(present_cache_file, 'rb') as fid: present_stats = cPickle.load(fid) print '{} present stats loaded from {}'.format( self.name, present_cache_file) with open(correlation_cache_file, 'rb') as fid: correlation_stats = cPickle.load(fid) print '{} correlation stats loaded from {}'.format( self.name, correlation_cache_file) # Otherwise, create them else: if roidb == None: roidb = self.roidb num_rois = len(roidb) # present_stats: the number of pairs present_stats = np.zeros((self.num_classes, self.num_classes)) correlation_stats = [[ np.zeros((6, 0)) for j in xrange(self.num_classes) ] \ for i in xrange(self.num_classes) ] for i in xrange(num_rois): rois = roidb[i] im_width = float(rois['width']) im_height = float(rois['height']) bboxes = rois['boxes'].copy() classes = rois['clses'] # At least 2 objects if bboxes.shape[0] < 2: continue # Assume squared images max_dim = np.maximum(im_width, im_height) nfactor = np.array([max_dim, max_dim, \ max_dim, max_dim]).reshape((1,4)) # Change representations from xyxy to xywh bboxes = ds_utils.xyxy_to_xywh(bboxes) # Normalize bboxes = np.divide(bboxes, nfactor) # Area areas = np.multiply(bboxes[:, 2], bboxes[:, 3]).squeeze() # Aspect ratio ratios = np.divide(bboxes[:, 2], bboxes[:, 3]).squeeze() for j in xrange(bboxes.shape[0] - 1): cls1 = classes[j] bbox1 = bboxes[j, :].squeeze() for k in xrange(j + 1, bboxes.shape[0]): cls2 = classes[k] bbox2 = bboxes[k, :].squeeze() offset = bbox2[:2] - bbox1[:2] correlation21 = np.array([ offset[0], offset[1], areas[j], areas[k], ratios[j], ratios[k] ]).reshape((6, 1)) correlation12 = np.array([ -offset[0], -offset[1], areas[k], areas[j], ratios[k], ratios[j] ]).reshape((6, 1)) correlation_stats[cls1][cls2] = \ np.hstack((correlation_stats[cls1][cls2], correlation21)) correlation_stats[cls2][cls1] = \ np.hstack((correlation_stats[cls2][cls1], correlation12)) present_stats[cls1, cls2] += 1 present_stats[cls2, cls1] += 1 print i with open(present_cache_file, 'wb') as fid: cPickle.dump(present_stats, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote present stats to {}'.format(present_cache_file) with open(correlation_cache_file, 'wb') as fid: cPickle.dump(correlation_stats, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote correlation stats to {}'.format( correlation_cache_file) plt.switch_backend('agg') for i in xrange(1, self.num_classes): for j in xrange(1, self.num_classes): correlation = correlation_stats[i][j] fig = plt.figure() plt.hist2d(correlation[0, :], correlation[1, :], 20, range=[[-1.0, 1.0], [-1.0, 1.0]]) plt.colorbar() plt.xlim([-1.0, 1.0]) plt.ylim([-1.0, 1.0]) plt.title('offset: %s vs %s' % (self.classes[i], self.classes[j])) plt.grid(True) fig.savefig(os.path.join( output_dir, 'images/offset_%02d_%02d.jpg' % (i, j)), bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.hist2d(correlation[2, :], correlation[3, :], 20, range=[[0, 0.05], [0, 0.05]]) plt.colorbar() plt.xlim([0, 0.05]) plt.ylim([0, 0.05]) plt.title('area: %s vs %s' % (self.classes[i], self.classes[j])) plt.grid(True) fig.savefig(osp.join(output_dir, 'images/area_%02d_%02d.jpg' % (i, j)), bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.hist2d(correlation[4, :], correlation[5, :], 20, range=[[0, 4.0], [0, 4.0]]) plt.colorbar() plt.xlim([0, 4.0]) plt.ylim([0, 4.0]) plt.title('aspect ratio: %s vs %s' % (self.classes[i], self.classes[j])) plt.grid(True) fig.savefig(osp.join(output_dir, 'images/ratio_%02d_%02d.jpg' % (i, j)), bbox_inches='tight') plt.close(fig) im1 = cv2.resize( cv2.imread( osp.join(output_dir, 'images/offset_%02d_%02d.jpg' % (i, j))), (648, 545)) im2 = cv2.resize( cv2.imread( osp.join(output_dir, 'images/area_%02d_%02d.jpg' % (i, j))), (648, 545)) im3 = cv2.resize( cv2.imread( osp.join(output_dir, 'images/ratio_%02d_%02d.jpg' % (i, j))), (648, 545)) im = np.zeros((545, 648 * 3, 3), dtype=np.int16) im[:, :648, :] = im1 im[:, 648:2 * 648, :] = im2 im[:, 2 * 648:3 * 648, :] = im3 cv2.imwrite( osp.join(output_dir, 'images/%02d_%02d.jpg' % (i, j)), im) print i, j
def get_scene_minibatch(self, square=True): # outputs: resized images, layouts, segmentations, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE num_clses = self.num_classes - 1 ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) scenes = np.zeros((batch_size, resolution[0], \ resolution[1], num_clses), dtype=np.float32) segs = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] seg_path = obj['out_seg'] width = obj['width'] height = obj['height'] box = obj['box'].copy() all_boxes = obj['all_boxes'].copy().reshape((-1, 4)).astype(np.int) all_clses = obj['all_clses'].copy().flatten() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) seg = cv2.imread(seg_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) seg = cv2.flip(seg, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze() ex_box = box.copy().flatten().astype(np.int) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y ex_box[0] += offset_x ex_box[1] += offset_y ex_box[2] += offset_x ex_box[3] += offset_y all_boxes[:, 0] += offset_x all_boxes[:, 1] += offset_y all_boxes[:, 2] += offset_x all_boxes[:, 3] += offset_y width = height = img.shape[0] seg, offset_x, offset_y = \ ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS) nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)), grid_shape).squeeze() # images of the same shape images[i] = cv2.resize(img, (resolution[1], resolution[0])) segs[i] = cv2.resize(seg, (resolution[1], resolution[0])) factor = float(resolution[0]) / width all_boxes = (factor * all_boxes).astype(np.int) ex_box = (factor * ex_box).astype(np.int) scenes[i] = ds_utils.create_scenes(resolution[1], resolution[0], all_boxes, all_clses, ex_box=ex_box, n_cls=num_clses) grids[i, :] = grid boxes[i, :] = nxywh return images, scenes, segs, boxes, grids
def get_rnn_minibatch(self, max_seq_len, square=True, vis=False): ####################################################################### # rename the config parameters to make the codes look clear batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.RESOLUTION grid_shape = cfg.GRID_SHAPE ####################################################################### # indices of the minibatch if self.roidb_cur + batch_size >= len(self.roidb): self.permute_roidb_indices() db_inds = self.roidb_perm[self.roidb_cur:self.roidb_cur + batch_size] self.roidb_cur += batch_size ####################################################################### ####################################################################### # to be returned objects = [] centers = [] ratios = [] masks = [] # normalized xywh representation bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) # grid box offset deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) ####################################################################### for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_path = rois['image'] width = rois['width'] height = rois['height'] gt_boxes = rois['boxes'].copy() gt_cats = rois['clses'].copy() areas = rois['seg_areas'] # number of instances should not exceed max_seq_len num_instances = min(gt_boxes.shape[0], max_seq_len) # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if rois['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) # sort the objects in the sequence based on their areas order = np.argsort(areas)[::-1] gt_boxes = gt_boxes[order, :] gt_cats = gt_cats[order] areas = areas[order] # print areas # [x1, y1, x2, y2] to [x, y, w, h] gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) gt_boxes[:, 0] += offset_x gt_boxes[:, 1] += offset_y width = height = img.shape[0] # normalize gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height) # truncate the sequences gt_boxes = gt_boxes[:num_instances, :] # discreted output positions grid_indices = ds_utils.xywh_to_index(gt_boxes, \ grid_shape[1], grid_shape[0]) # deltas between grid boxes and ground truth boxes grid_boxes = ds_utils.index_to_xywh(grid_indices, \ grid_shape[1], grid_shape[0]) grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes) # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) # use the last 'num_instances' objects bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0) # grid offsets deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0) # object indicators objects.append(gt_cats[:num_instances].tolist()) # masks for loss function masks.append(np.ones((num_instances, )).tolist()) # grid centers and sizes centers.append(grid_indices[:, 0].tolist()) ratios.append(grid_indices[:, 1].tolist()) # padding objects = pad_sequences(objects, maxlen=max_seq_len, padding='post', truncating='post', value=0.) centers = pad_sequences(centers, maxlen=max_seq_len, padding='post', truncating='post', value=0.) ratios = pad_sequences(ratios, maxlen=max_seq_len, padding='post', truncating='post', value=0.) masks = pad_sequences(masks, maxlen=max_seq_len, padding='post', truncating='post', value=0.) if vis: output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \ cfg.EXP_DIR, self.name, \ 'rnn_minibatch')) if not osp.exists(output_dir): os.makedirs(output_dir) for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_name, im_ext = osp.splitext(osp.basename(rois['image'])) msk = masks[i, :] # ground truth boxes ibb = bboxes[i, :, :].copy() iid = objects[i, :].copy() iim = images[i, :, :, :].copy() # grid bboxes grid_indices = np.vstack( (centers[i, :], ratios[i, :])).transpose() gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0]) # regressed bboxes rbb = ds_utils.bbox_transform_inv(gbb, deltas[i, :, :]) # Denormalize ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0]) gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0]) rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0]) ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0]) gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0]) rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0]) # fontScale = 0.0007 * math.sqrt(float(\ # resolution[0]*resolution[0]+resolution[1]*resolution[1])) for j in xrange(ibb.shape[0]): if msk[j] == 0: break id = iid[j] cls = self.classes[id] # ground truth boxes bb = ibb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 255, 0), 2) # grid boxes bb = gbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (255, 0, 0), 1) # regressed boxes bb = rbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 0, 255), 1) # cv2.putText(iim, '{:}_{:}'.format(j, cls), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, '%06d_' % i + im_name + '.jpg') cv2.imwrite(output_path, iim) return images, objects, bboxes, deltas, centers, ratios, masks
def draw_binary_correlation_stat_graph(self, output_dir, roidb=None): # Create the output directory if necessary if not osp.exists(output_dir): os.makedirs(output_dir) if not osp.exists(osp.join(output_dir, 'images')): os.makedirs(osp.join(output_dir, 'images')) # Cache files present_cache_file = osp.join(self.cache_path, self.name + '_present_stats.pkl') correlation_cache_file = osp.join(self.cache_path, self.name + '_correlation_stats.pkl') # Load cache files if they exist if osp.exists(present_cache_file) and osp.exists(correlation_cache_file): with open(present_cache_file, 'rb') as fid: present_stats = cPickle.load(fid) print '{} present stats loaded from {}'.format(self.name, present_cache_file) with open(correlation_cache_file, 'rb') as fid: correlation_stats = cPickle.load(fid) print '{} correlation stats loaded from {}'.format(self.name, correlation_cache_file) # Otherwise, create them else: if roidb == None: roidb = self.roidb num_rois = len(roidb) # present_stats: the number of pairs present_stats = np.zeros((self.num_classes, self.num_classes)) correlation_stats = [[ np.zeros((6, 0)) for j in xrange(self.num_classes) ] \ for i in xrange(self.num_classes) ] for i in xrange(num_rois): rois = roidb[i] im_width = float(rois['width']) im_height = float(rois['height']) bboxes = rois['boxes'].copy() classes = rois['clses'] # At least 2 objects if bboxes.shape[0] < 2: continue # Assume squared images max_dim = np.maximum(im_width, im_height) nfactor = np.array([max_dim, max_dim, \ max_dim, max_dim]).reshape((1,4)) # Change representations from xyxy to xywh bboxes = ds_utils.xyxy_to_xywh(bboxes) # Normalize bboxes = np.divide(bboxes, nfactor) # Area areas = np.multiply(bboxes[:, 2], bboxes[:, 3]).squeeze() # Aspect ratio ratios = np.divide(bboxes[:, 2], bboxes[:, 3]).squeeze() for j in xrange(bboxes.shape[0] - 1): cls1 = classes[j] bbox1 = bboxes[j, :].squeeze() for k in xrange(j + 1, bboxes.shape[0]): cls2 = classes[k] bbox2 = bboxes[k, :].squeeze() offset = bbox2[:2] - bbox1[:2] correlation21 = np.array([offset[0], offset[1], areas[j], areas[k], ratios[j], ratios[k]]).reshape((6,1)) correlation12 = np.array([-offset[0], -offset[1], areas[k], areas[j], ratios[k], ratios[j]]).reshape((6,1)) correlation_stats[cls1][cls2] = \ np.hstack((correlation_stats[cls1][cls2], correlation21)) correlation_stats[cls2][cls1] = \ np.hstack((correlation_stats[cls2][cls1], correlation12)) present_stats[cls1, cls2] += 1 present_stats[cls2, cls1] += 1 print i with open(present_cache_file, 'wb') as fid: cPickle.dump(present_stats, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote present stats to {}'.format(present_cache_file) with open(correlation_cache_file, 'wb') as fid: cPickle.dump(correlation_stats , fid, cPickle.HIGHEST_PROTOCOL) print 'wrote correlation stats to {}'.format(correlation_cache_file) plt.switch_backend('agg') for i in xrange(1, self.num_classes): for j in xrange(1, self.num_classes): correlation = correlation_stats[i][j] fig = plt.figure() plt.hist2d(correlation[0, :], correlation[1, :], 20, range=[[-1.0, 1.0], [-1.0, 1.0]]) plt.colorbar() plt.xlim([-1.0, 1.0]) plt.ylim([-1.0, 1.0]) plt.title('offset: %s vs %s'%(self.classes[i], self.classes[j])) plt.grid(True) fig.savefig(os.path.join(output_dir, 'images/offset_%02d_%02d.jpg' % (i, j)), bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.hist2d(correlation[2, :], correlation[3, :], 20, range=[[0, 0.05], [0, 0.05]]) plt.colorbar() plt.xlim([0, 0.05]) plt.ylim([0, 0.05]) plt.title('area: %s vs %s'%(self.classes[i], self.classes[j])) plt.grid(True) fig.savefig(osp.join(output_dir, 'images/area_%02d_%02d.jpg' % (i, j)), bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.hist2d(correlation[4, :], correlation[5, :], 20, range=[[0, 4.0], [0, 4.0]]) plt.colorbar() plt.xlim([0, 4.0]) plt.ylim([0, 4.0]) plt.title('aspect ratio: %s vs %s'%(self.classes[i], self.classes[j])) plt.grid(True) fig.savefig(osp.join(output_dir, 'images/ratio_%02d_%02d.jpg' % (i, j)), bbox_inches='tight') plt.close(fig) im1 = cv2.resize(cv2.imread(osp.join(output_dir, 'images/offset_%02d_%02d.jpg' % (i, j))), (648, 545)) im2 = cv2.resize(cv2.imread(osp.join(output_dir, 'images/area_%02d_%02d.jpg' % (i, j))), (648, 545)) im3 = cv2.resize(cv2.imread(osp.join(output_dir, 'images/ratio_%02d_%02d.jpg' % (i, j))), (648, 545)) im = np.zeros((545, 648 * 3, 3), dtype=np.int16) im[:, : 648, :] = im1 im[:, 648:2*648, :] = im2 im[:, 2*648:3*648, :] = im3 cv2.imwrite(osp.join(output_dir, 'images/%02d_%02d.jpg' % (i, j)), im) print i,j
def alpha_compose(self, src_ctx, dst_ctx): # Assume src_ctx has fields: bg_image, box # Assume dst_ctx is from val_imdb src_img = cv2.imread(src_ctx['bg_image'], cv2.IMREAD_COLOR) dst_img = cv2.imread(dst_ctx['image'], cv2.IMREAD_COLOR) dst_alpha = cv2.imread(dst_ctx['alpha'], cv2.IMREAD_GRAYSCALE) src_xyxy = src_ctx['box'] dst_xyxy = dst_ctx['box'] src_width = src_img.shape[1] src_height = src_img.shape[0] dst_width = dst_img.shape[1] dst_height = dst_img.shape[0] # resize the target image to align the heights of the bboxes factor = float(src_xyxy[3] - src_xyxy[1] + 1) / float(dst_xyxy[3] - dst_xyxy[1] + 1) dst_width = int(dst_width * factor) dst_height = int(dst_height * factor) dst_img = cv2.resize(dst_img, (dst_width, dst_height)) dst_alpha = cv2.resize(dst_alpha, (dst_width, dst_height)) dst_alpha = dst_alpha.astype(np.float) / 255.0 dst_xyxy = factor * dst_xyxy src_xywh = ds_utils.xyxy_to_xywh(src_xyxy.reshape((1, 4))).squeeze() dst_xywh = ds_utils.xyxy_to_xywh(dst_xyxy.reshape((1, 4))).squeeze() # anchors that should match (the standing points) src_anchor = src_xywh[:2] dst_anchor = dst_xywh[:2] offset = (src_anchor - dst_anchor).astype(np.int) # dilate the target patch a bit to include the blending region dst_bb = ds_utils.expand_xyxy(dst_xyxy.reshape((1, 4)), dst_width, dst_height, ratio=0.2).squeeze().astype(np.int) src_bb = dst_bb.copy() src_bb[:2] = dst_bb[:2] + offset src_bb[2:] = dst_bb[2:] + offset # in case the bbox of the target object is beyond the boundaries of the source image if src_bb[0] < 0: dst_bb[0] -= src_bb[0] src_bb[0] = 0 if src_bb[1] < 0: dst_bb[1] -= src_bb[1] src_bb[1] = 0 if src_bb[2] > src_width - 1: dst_bb[2] -= src_bb[2] - src_width + 1 src_bb[2] = src_width - 1 if src_bb[3] > src_height - 1: dst_bb[3] -= src_bb[3] - src_height + 1 src_bb[3] = src_height - 1 output_mask = np.zeros((src_height, src_width), dtype=np.float) output_image = src_img.copy() alpha_patch = dst_alpha[dst_bb[1]:(dst_bb[3] + 1), dst_bb[0]:(dst_bb[2] + 1)] src_patch = src_img[src_bb[1]:(src_bb[3] + 1), src_bb[0]:(src_bb[2] + 1), :] dst_patch = dst_img[dst_bb[1]:(dst_bb[3] + 1), dst_bb[0]:(dst_bb[2] + 1), :] output_mask[src_bb[1]:(src_bb[3] + 1), src_bb[0]:(src_bb[2] + 1)] = alpha_patch output_image[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1),:] = \ np.expand_dims(1.0 - alpha_patch, axis=-1) * src_patch + \ np.expand_dims(alpha_patch, axis=-1) * dst_patch # cv2.rectangle(output_image, (src_xyxy[0], src_xyxy[1]), (src_xyxy[2], src_xyxy[3]), \ # (255, 0, 0), 1) return output_image.astype(np.uint8), output_mask