def single_sample(self, entry, K=3): # assuming square image resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE state_dims = cfg.STATE_DIMS img = cv2.imread(entry['bg_image'], cv2.IMREAD_COLOR) lyo = cv2.imread(entry['bg_layout'], cv2.IMREAD_COLOR) img, ox, oy = ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) lyo, ox, oy = ds_utils.create_squared_image(lyo, cfg.PIXEL_MEANS) input_img = cv2.resize(img, (resolution[1], resolution[0])) input_lyo = cv2.resize(lyo, (resolution[1], resolution[0])) input_img, input_lyo = self.preprocess_input_images(\ np.expand_dims(input_img, 0), np.expand_dims(input_lyo, 0)) feat, cen_probs = self.center_inference.predict([input_img, input_lyo]) cen_probs = cen_probs.squeeze() cen_inds = np.argsort(cen_probs)[::-1] cen_inds = np.array(cen_inds[:K]) rois = ds_utils.centers_to_rois(cen_inds, grid_shape[:2], grid_shape[:2]) grids = np.zeros((0, 2)) for i in range(K): input_roi = rois[i].reshape((1, state_dims[0], state_dims[1])) size_probs = self.size_inference.predict([feat, input_roi]).squeeze() size_inds = np.argsort(size_probs.squeeze())[::-1] size_inds = np.array(size_inds[:K]) for j in range(K): grids = np.vstack((grids, np.array([cen_inds[i], size_inds[j]]).reshape( (1, 2)))) xywhs = ds_utils.indices_to_boxes(grids, grid_shape) cen_probs = cen_probs.reshape((grid_shape[0], grid_shape[1])) heatmap = cv2.resize(cen_probs, (img.shape[1], img.shape[0])) heatmap = (255 * heatmap).astype(np.uint8) heatmap = cv2.equalizeHist(heatmap) heatmap = np.repeat(np.expand_dims(heatmap, axis=-1), 3, axis=-1) return xywhs, grids, heatmap
def get_minibatch(self, square=True): # outputs: resized images, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE grid_shape = cfg.GRID_SHAPE resolution = cfg.RESOLUTION ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] width = obj['width'] height = obj['height'] box = obj['box'].copy() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze() # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y width = height = img.shape[0] nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)), grid_shape).squeeze() # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) grids[i, :] = grid boxes[i, :] = nxywh return images, boxes, grids
def single_sample(self, entry, K=3): # assuming square image resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE state_dims = cfg.STATE_DIMS img = cv2.imread(entry['bg_image'], cv2.IMREAD_COLOR) lyo = cv2.imread(entry['bg_layout'], cv2.IMREAD_COLOR) img, ox, oy = ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) lyo, ox, oy = ds_utils.create_squared_image(lyo, cfg.PIXEL_MEANS) input_img = cv2.resize(img, (resolution[1], resolution[0])) input_lyo = cv2.resize(lyo, (resolution[1], resolution[0])) input_img, input_lyo = self.preprocess_input_images(\ np.expand_dims(input_img, 0), np.expand_dims(input_lyo, 0)) feat, cen_probs = self.center_inference.predict([input_img, input_lyo]) cen_probs = cen_probs.squeeze() cen_inds = np.argsort(cen_probs)[::-1] cen_inds = np.array(cen_inds[:K]) rois = ds_utils.centers_to_rois(cen_inds, grid_shape[:2], grid_shape[:2]) grids = np.zeros((0,2)) for i in range(K): input_roi = rois[i].reshape((1, state_dims[0], state_dims[1])) size_probs = self.size_inference.predict([feat, input_roi]).squeeze() size_inds = np.argsort(size_probs.squeeze())[::-1] size_inds = np.array(size_inds[:K]) for j in range(K): grids = np.vstack((grids, np.array([cen_inds[i], size_inds[j]]).reshape((1,2)))) xywhs = ds_utils.indices_to_boxes(grids, grid_shape) cen_probs = cen_probs.reshape((grid_shape[0], grid_shape[1])) heatmap = cv2.resize(cen_probs, (img.shape[1], img.shape[0])) heatmap = (255 * heatmap).astype(np.uint8) heatmap = cv2.equalizeHist(heatmap) heatmap = np.repeat(np.expand_dims(heatmap, axis=-1), 3, axis=-1) return xywhs, grids, heatmap
def get_minibatch(self, square=True): # outputs: resized images, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE grid_shape = cfg.GRID_SHAPE resolution = cfg.RESOLUTION ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] width = obj['width'] height = obj['height'] box = obj['box'].copy() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze() # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y width = height = img.shape[0] nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze() # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) grids[i, :] = grid boxes[i, :] = nxywh return images, boxes, grids
def draw_roidb_bboxes(self, output_dir, roidb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'roidb_boxes')) if roidb is None: roidb = self._roidb for i in xrange(len(roidb)): roi = roidb[i] im_path = roi['image'] bboxes = roi['boxes'].copy() clses = roi['clses'] # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if roi['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) bboxes[:, 0] += offset_x bboxes[:, 1] += offset_y bboxes[:, 2] += offset_x bboxes[:, 3] += offset_y fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) for j in xrange(bboxes.shape[0]): bb = bboxes[j, :].astype(np.int16) cls = self.classes[clses[j]] cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, 'roidb_bboxes', osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def draw_objdb_bboxes(self, output_dir, objdb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'objdb_boxes')) if objdb is None: objdb = self._objdb for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) box = obj['box'] cls = obj['cls'] aid = obj['obj_id'] if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) box[0] += offset_x box[1] += offset_y box[2] += offset_x box[3] += offset_y bb = box.astype(np.int) fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(output_dir, 'objdb_boxes', im_name + '_' + str(aid).zfill(12) + im_ext) cv2.imwrite(output_path, img) print i
def draw_roidb_bboxes(self, output_dir, roidb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'roidb_boxes')) if roidb is None: roidb = self._roidb for i in xrange(len(roidb)): roi = roidb[i] im_path = roi['image'] bboxes = roi['boxes'].copy() clses = roi['clses'] # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if roi['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) bboxes[:, 0] += offset_x; bboxes[:, 1] += offset_y bboxes[:, 2] += offset_x; bboxes[:, 3] += offset_y fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) for j in xrange(bboxes.shape[0]): bb = bboxes[j, :].astype(np.int16) cls = self.classes[clses[j]] cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, 'roidb_bboxes', osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def draw_objdb_bboxes(self, output_dir, objdb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'objdb_boxes')) if objdb is None: objdb = self._objdb for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) box = obj['box'] cls = obj['cls'] aid = obj['obj_id'] if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) box[0] += offset_x; box[1] += offset_y box[2] += offset_x; box[3] += offset_y bb = box.astype(np.int) fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(output_dir, 'objdb_boxes', im_name+'_'+str(aid).zfill(12)+im_ext) cv2.imwrite(output_path, img) print i
def sampler(self, test_db, epoch=0, K=3, vis=False): # assume each entry in test_db has field: 'bg_image', 'bg_layout' self.center_inference.set_weights(self.get_center_branch_weights(self.model)) self.size_inference.set_weights(self.get_size_branch_weights(self.model)) output_dir = osp.join(self.output_dir, 'prediction_jsons') ds_utils.maybe_create(output_dir) if vis: vis_dir = osp.join(self.output_dir, 'prediction_vis') ds_utils.maybe_create(vis_dir) # hm_dir = osp.join(self.output_dir, 'prediction_heatmap') # ds_utils.maybe_create(hm_dir) res_db = [] num_samples = len(test_db) for i in range(num_samples): entry = test_db[i] im_path = entry['bg_image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR) img, ox, oy = ds_utils.create_squared_image(ori_img, cfg.PIXEL_MEANS) width = img.shape[1];height = img.shape[0] xywhs, grids, heatmap = self.single_sample(entry,K=K) xywhs = ds_utils.denormalize_xywh(xywhs, width, height) xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height) xyxys[:,0] -= ox; xyxys[:,1] -= oy xyxys[:,2] -= ox; xyxys[:,3] -= oy xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1], ori_img.shape[0]) heatmap = heatmap[oy:(oy+ori_img.shape[0]), ox:(ox+ori_img.shape[1]), :] res = {} res['bg_image'] = im_path res['name'] = im_name res['boxes'] = xyxys.tolist() json_path = osp.join(output_dir, im_name+'.json') with open(json_path, 'w') as res_file: json.dump(res, res_file, indent=4, separators=(',', ': ')) if vis: vis_img = ori_img fontScale = 0.0007 * math.sqrt(2 * width * height) for j in range(xyxys.shape[0]): bb = xyxys[j] color = self.palette[j%len(self.palette)] cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]), color, 4) # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2), # cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) tmp = np.ones_like(heatmap, dtype=np.float) tmp[:,:,1] += heatmap[:,:,1]/255.0 overlay = np.multiply(vis_img, tmp) overlay = np.minimum(overlay, 255).astype(np.uint8) final = np.concatenate((vis_img, overlay, heatmap), axis=1) # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext) # cv2.imwrite(output_path, final) output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_ol'+im_ext) cv2.imwrite(output_path, overlay) output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_hm'+im_ext) cv2.imwrite(output_path, heatmap) for j in range(len(res['boxes'])): entry = {} entry['bg_image'] = im_path entry['name'] = im_name entry['box'] = xyxys[j] entry['rank'] = j res_db.append(entry) return res_db
def sampler(self, test_db, epoch=0, K=3, vis=False): # assume each entry in test_db has field: 'bg_image', 'bg_layout' self.center_inference.set_weights( self.get_center_branch_weights(self.model)) self.size_inference.set_weights( self.get_size_branch_weights(self.model)) output_dir = osp.join(self.output_dir, 'prediction_jsons') ds_utils.maybe_create(output_dir) if vis: vis_dir = osp.join(self.output_dir, 'prediction_vis') ds_utils.maybe_create(vis_dir) # hm_dir = osp.join(self.output_dir, 'prediction_heatmap') # ds_utils.maybe_create(hm_dir) res_db = [] num_samples = len(test_db) for i in range(num_samples): entry = test_db[i] im_path = entry['bg_image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR) img, ox, oy = ds_utils.create_squared_image( ori_img, cfg.PIXEL_MEANS) width = img.shape[1] height = img.shape[0] xywhs, grids, heatmap = self.single_sample(entry, K=K) xywhs = ds_utils.denormalize_xywh(xywhs, width, height) xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height) xyxys[:, 0] -= ox xyxys[:, 1] -= oy xyxys[:, 2] -= ox xyxys[:, 3] -= oy xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1], ori_img.shape[0]) heatmap = heatmap[oy:(oy + ori_img.shape[0]), ox:(ox + ori_img.shape[1]), :] res = {} res['bg_image'] = im_path res['name'] = im_name res['boxes'] = xyxys.tolist() json_path = osp.join(output_dir, im_name + '.json') with open(json_path, 'w') as res_file: json.dump(res, res_file, indent=4, separators=(',', ': ')) if vis: vis_img = ori_img fontScale = 0.0007 * math.sqrt(2 * width * height) for j in range(xyxys.shape[0]): bb = xyxys[j] color = self.palette[j % len(self.palette)] cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]), color, 4) # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2), # cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) tmp = np.ones_like(heatmap, dtype=np.float) tmp[:, :, 1] += heatmap[:, :, 1] / 255.0 overlay = np.multiply(vis_img, tmp) overlay = np.minimum(overlay, 255).astype(np.uint8) final = np.concatenate((vis_img, overlay, heatmap), axis=1) # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext) # cv2.imwrite(output_path, final) output_path = osp.join( vis_dir, '%04d_' % epoch + im_name + '_ol' + im_ext) cv2.imwrite(output_path, overlay) output_path = osp.join( vis_dir, '%04d_' % epoch + im_name + '_hm' + im_ext) cv2.imwrite(output_path, heatmap) for j in range(len(res['boxes'])): entry = {} entry['bg_image'] = im_path entry['name'] = im_name entry['box'] = xyxys[j] entry['rank'] = j res_db.append(entry) return res_db
def get_scene_minibatch(self, square=True): # outputs: resized images, layouts, segmentations, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE num_clses = self.num_classes - 1 ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) scenes = np.zeros((batch_size, resolution[0], \ resolution[1], num_clses), dtype=np.float32) segs = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] seg_path = obj['out_seg'] width = obj['width'] height = obj['height'] box = obj['box'].copy() all_boxes = obj['all_boxes'].copy().reshape((-1, 4)).astype(np.int) all_clses = obj['all_clses'].copy().flatten() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) seg = cv2.imread(seg_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) seg = cv2.flip(seg, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze() ex_box = box.copy().flatten().astype(np.int) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y ex_box[0] += offset_x ex_box[1] += offset_y ex_box[2] += offset_x ex_box[3] += offset_y all_boxes[:, 0] += offset_x all_boxes[:, 1] += offset_y all_boxes[:, 2] += offset_x all_boxes[:, 3] += offset_y width = height = img.shape[0] seg, offset_x, offset_y = \ ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS) nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)), grid_shape).squeeze() # images of the same shape images[i] = cv2.resize(img, (resolution[1], resolution[0])) segs[i] = cv2.resize(seg, (resolution[1], resolution[0])) factor = float(resolution[0]) / width all_boxes = (factor * all_boxes).astype(np.int) ex_box = (factor * ex_box).astype(np.int) scenes[i] = ds_utils.create_scenes(resolution[1], resolution[0], all_boxes, all_clses, ex_box=ex_box, n_cls=num_clses) grids[i, :] = grid boxes[i, :] = nxywh return images, scenes, segs, boxes, grids
def get_rnn_minibatch(self, max_seq_len, square=True, vis=False): ####################################################################### # rename the config parameters to make the codes look clear batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.RESOLUTION grid_shape = cfg.GRID_SHAPE ####################################################################### # indices of the minibatch if self.roidb_cur + batch_size >= len(self.roidb): self.permute_roidb_indices() db_inds = self.roidb_perm[self.roidb_cur:self.roidb_cur + batch_size] self.roidb_cur += batch_size ####################################################################### ####################################################################### # to be returned objects = [] centers = [] ratios = [] masks = [] # normalized xywh representation bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) # grid box offset deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) ####################################################################### for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_path = rois['image'] width = rois['width'] height = rois['height'] gt_boxes = rois['boxes'].copy() gt_cats = rois['clses'].copy() areas = rois['seg_areas'] # number of instances should not exceed max_seq_len num_instances = min(gt_boxes.shape[0], max_seq_len) # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if rois['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) # sort the objects in the sequence based on their areas order = np.argsort(areas)[::-1] gt_boxes = gt_boxes[order, :] gt_cats = gt_cats[order] areas = areas[order] # print areas # [x1, y1, x2, y2] to [x, y, w, h] gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) gt_boxes[:, 0] += offset_x gt_boxes[:, 1] += offset_y width = height = img.shape[0] # normalize gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height) # truncate the sequences gt_boxes = gt_boxes[:num_instances, :] # discreted output positions grid_indices = ds_utils.xywh_to_index(gt_boxes, \ grid_shape[1], grid_shape[0]) # deltas between grid boxes and ground truth boxes grid_boxes = ds_utils.index_to_xywh(grid_indices, \ grid_shape[1], grid_shape[0]) grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes) # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) # use the last 'num_instances' objects bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0) # grid offsets deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0) # object indicators objects.append(gt_cats[:num_instances].tolist()) # masks for loss function masks.append(np.ones((num_instances, )).tolist()) # grid centers and sizes centers.append(grid_indices[:, 0].tolist()) ratios.append(grid_indices[:, 1].tolist()) # padding objects = pad_sequences(objects, maxlen=max_seq_len, padding='post', truncating='post', value=0.) centers = pad_sequences(centers, maxlen=max_seq_len, padding='post', truncating='post', value=0.) ratios = pad_sequences(ratios, maxlen=max_seq_len, padding='post', truncating='post', value=0.) masks = pad_sequences(masks, maxlen=max_seq_len, padding='post', truncating='post', value=0.) if vis: output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \ cfg.EXP_DIR, self.name, \ 'rnn_minibatch')) if not osp.exists(output_dir): os.makedirs(output_dir) for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_name, im_ext = osp.splitext(osp.basename(rois['image'])) msk = masks[i, :] # ground truth boxes ibb = bboxes[i, :, :].copy() iid = objects[i, :].copy() iim = images[i, :, :, :].copy() # grid bboxes grid_indices = np.vstack( (centers[i, :], ratios[i, :])).transpose() gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0]) # regressed bboxes rbb = ds_utils.bbox_transform_inv(gbb, deltas[i, :, :]) # Denormalize ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0]) gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0]) rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0]) ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0]) gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0]) rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0]) # fontScale = 0.0007 * math.sqrt(float(\ # resolution[0]*resolution[0]+resolution[1]*resolution[1])) for j in xrange(ibb.shape[0]): if msk[j] == 0: break id = iid[j] cls = self.classes[id] # ground truth boxes bb = ibb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 255, 0), 2) # grid boxes bb = gbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (255, 0, 0), 1) # regressed boxes bb = rbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 0, 255), 1) # cv2.putText(iim, '{:}_{:}'.format(j, cls), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, '%06d_' % i + im_name + '.jpg') cv2.imwrite(output_path, iim) return images, objects, bboxes, deltas, centers, ratios, masks
def get_scene_minibatch(self, square=True): # outputs: resized images, layouts, segmentations, normalized xywhs, grids batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE num_clses = self.num_classes-1 ####################################################################### # indices of the minibatch if self.objdb_cur + batch_size >= len(self.objdb): self.permute_objdb_indices() db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size] self.objdb_cur += batch_size ####################################################################### images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) scenes = np.zeros((batch_size, resolution[0], \ resolution[1], num_clses), dtype=np.float32) segs = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) grids = np.zeros((batch_size, 2)) boxes = np.zeros((batch_size, 4)) for i in range(batch_size): obj = self.objdb[db_inds[i]] im_path = obj['background'] seg_path = obj['out_seg'] width = obj['width'] height = obj['height'] box = obj['box'].copy() all_boxes = obj['all_boxes'].copy().reshape((-1,4)).astype(np.int) all_clses = obj['all_clses'].copy().flatten() # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) seg = cv2.imread(seg_path, cv2.IMREAD_COLOR) if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) seg = cv2.flip(seg, 1) xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze() ex_box = box.copy().flatten().astype(np.int) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) xywh[0] += offset_x xywh[1] += offset_y ex_box[0] += offset_x ex_box[1] += offset_y ex_box[2] += offset_x ex_box[3] += offset_y all_boxes[:, 0] += offset_x all_boxes[:, 1] += offset_y all_boxes[:, 2] += offset_x all_boxes[:, 3] += offset_y width = height = img.shape[0] seg, offset_x, offset_y = \ ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS) nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze() # discreted output positions grid = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze() # images of the same shape images[i] = cv2.resize(img, (resolution[1], resolution[0])) segs[i] = cv2.resize(seg, (resolution[1], resolution[0])) factor = float(resolution[0])/width all_boxes = (factor * all_boxes).astype(np.int) ex_box = (factor * ex_box).astype(np.int) scenes[i] = ds_utils.create_scenes(resolution[1], resolution[0], all_boxes, all_clses, ex_box=ex_box, n_cls=num_clses) grids[i, :] = grid boxes[i, :] = nxywh return images, scenes, segs, boxes, grids
def get_rnn_minibatch(self, max_seq_len, square=True, vis=False): ####################################################################### # rename the config parameters to make the codes look clear batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.RESOLUTION grid_shape = cfg.GRID_SHAPE ####################################################################### # indices of the minibatch if self.roidb_cur + batch_size >= len(self.roidb): self.permute_roidb_indices() db_inds = self.roidb_perm[self.roidb_cur : self.roidb_cur + batch_size] self.roidb_cur += batch_size ####################################################################### ####################################################################### # to be returned objects = []; centers = []; ratios = []; masks = [] # normalized xywh representation bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) # grid box offset deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32) images = np.zeros((batch_size, resolution[0], \ resolution[1], resolution[2]), dtype=np.float32) ####################################################################### for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_path = rois['image'] width = rois['width'] height = rois['height'] gt_boxes = rois['boxes'].copy() gt_cats = rois['clses'].copy() areas = rois['seg_areas'] # number of instances should not exceed max_seq_len num_instances = min(gt_boxes.shape[0], max_seq_len) # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if rois['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) # sort the objects in the sequence based on their areas order = np.argsort(areas)[::-1] gt_boxes = gt_boxes[order, :] gt_cats = gt_cats[order] areas = areas[order] # print areas # [x1, y1, x2, y2] to [x, y, w, h] gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes) # if we need square images if square: img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) gt_boxes[:,0] += offset_x gt_boxes[:,1] += offset_y width = height = img.shape[0] # normalize gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height) # truncate the sequences gt_boxes = gt_boxes[:num_instances, :] # discreted output positions grid_indices = ds_utils.xywh_to_index(gt_boxes, \ grid_shape[1], grid_shape[0]) # deltas between grid boxes and ground truth boxes grid_boxes = ds_utils.index_to_xywh(grid_indices, \ grid_shape[1], grid_shape[0]) grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes) # images of the same shape images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0])) # use the last 'num_instances' objects bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0) # grid offsets deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0) # object indicators objects.append(gt_cats[:num_instances].tolist()) # masks for loss function masks.append(np.ones((num_instances, )).tolist()) # grid centers and sizes centers.append(grid_indices[:, 0].tolist()) ratios.append(grid_indices[:, 1].tolist()) # padding objects = pad_sequences(objects, maxlen=max_seq_len, padding='post', truncating='post', value=0.) centers = pad_sequences(centers, maxlen=max_seq_len, padding='post', truncating='post', value=0.) ratios = pad_sequences(ratios, maxlen=max_seq_len, padding='post', truncating='post', value=0.) masks = pad_sequences(masks, maxlen=max_seq_len, padding='post', truncating='post', value=0.) if vis: output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \ cfg.EXP_DIR, self.name, \ 'rnn_minibatch')) if not osp.exists(output_dir): os.makedirs(output_dir) for i in xrange(batch_size): rois = self.roidb[db_inds[i]] im_name, im_ext = osp.splitext(osp.basename(rois['image'])) msk = masks[i, :] # ground truth boxes ibb = bboxes[i, :, :].copy() iid = objects[i, :].copy() iim = images[i, :, :, :].copy() # grid bboxes grid_indices = np.vstack((centers[i,:], ratios[i,:])).transpose() gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0]) # regressed bboxes rbb = ds_utils.bbox_transform_inv(gbb, deltas[i,:,:]) # Denormalize ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0]) gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0]) rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0]) ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0]) gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0]) rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0]) # fontScale = 0.0007 * math.sqrt(float(\ # resolution[0]*resolution[0]+resolution[1]*resolution[1])) for j in xrange(ibb.shape[0]): if msk[j] == 0: break id = iid[j] cls = self.classes[id] # ground truth boxes bb = ibb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 255, 0), 2) # grid boxes bb = gbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (255, 0, 0), 1) # regressed boxes bb = rbb[j, :].astype(np.int16) cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \ (0, 0, 255), 1) # cv2.putText(iim, '{:}_{:}'.format(j, cls), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, '%06d_'%i+im_name+'.jpg') cv2.imwrite(output_path, iim) return images, objects, bboxes, deltas, centers, ratios, masks