def train(self, train_imdb, val_imdb, test_db): ckpt_dir = osp.join(self.output_dir, 'prediction_ckpts') ds_utils.maybe_create(ckpt_dir) ckpt_path = osp.join( ckpt_dir, "weights.{epoch:02d}-{val_output_cens_loss:.2f}-{val_output_sizes_loss:.2f}-{val_output_cens_mcl_accu:.2f}-{val_output_sizes_categorical_accuracy:.2f}.hdf5" ) log_dir = osp.join(self.output_dir, 'prediction_logs') ds_utils.maybe_create(log_dir) checkpointer = ModelCheckpoint(filepath=ckpt_path, verbose=1, save_weights_only=False) logwriter = TensorBoard(log_dir=log_dir) vis_sampler = LambdaCallback(on_epoch_end=lambda epoch, logs: self. sampler(test_db, epoch, vis=True)) self.model.fit_generator( self.generator(train_imdb, cfg.TRAIN.BATCH_SIZE), steps_per_epoch=int(len(train_imdb.objdb) / cfg.TRAIN.BATCH_SIZE), epochs=cfg.TRAIN.EPOCH, callbacks=[checkpointer, logwriter, vis_sampler], validation_data=self.generator(val_imdb, cfg.TRAIN.BATCH_SIZE), validation_steps=int(len(val_imdb.objdb) / cfg.TRAIN.BATCH_SIZE))
def get_minibatch(self, imdb, vis=False): batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE images, layouts, boxes, grids = imdb.get_background_minibatch() rois = ds_utils.centers_to_rois(grids[:,0], grid_shape[:2], grid_shape[:2]) cens_onehot = to_categorical(grids[:,0], self.cen_dims) sizes_onehot = to_categorical(grids[:,1], self.size_dims) if vis: output_dir = osp.join(self.output_dir, 'minibatch') ds_utils.maybe_create(output_dir) for i in xrange(batch_size): img = images[i].copy() lyo = layouts[i].copy() cen_id = np.argmax(cens_onehot[i,:]) size_id = np.argmax(sizes_onehot[i,:]) true_xywh = boxes[i, :] true_xywh = ds_utils.denormalize_xywh(true_xywh.reshape((1,4)), resolution[1], resolution[0]) true_xyxy = ds_utils.xywh_to_xyxy(true_xywh, resolution[1], resolution[0]).squeeze() grid_xywh = ds_utils.indices_to_boxes(\ np.array([cen_id, size_id]).reshape((1,2)), \ grid_shape) grid_xywh = ds_utils.denormalize_xywh(grid_xywh, resolution[1], resolution[0]) grid_xyxy = ds_utils.xywh_to_xyxy(grid_xywh, resolution[1], resolution[0]).squeeze() cv2.rectangle(img, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \ (0, 255, 0), 1) cv2.rectangle(img, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \ (255, 0, 0), 1) cv2.rectangle(lyo, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \ (0, 255, 0), 1) cv2.rectangle(lyo, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \ (255, 0, 0), 1) roi = rois[i].copy() roi = cv2.resize((roi*255).astype(np.uint8), (resolution[1], resolution[0])) output_path = osp.join(output_dir, 'img_%06d.jpg'%i) cv2.imwrite(output_path, img) output_path = osp.join(output_dir, 'lyo_%06d.jpg'%i) cv2.imwrite(output_path, lyo) output_path = osp.join(output_dir, 'roi_%06d.jpg'%i) cv2.imwrite(output_path, roi) return images, layouts, rois, cens_onehot, sizes_onehot
def dump_full_features(self, output_dir, image_encoder, ctxdb=None): full_resolution = cfg.RETRIEVAL_RESOLUTION if ctxdb == None: ctxdb = self.objdb ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, 'full') ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, cfg.LAYER) ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, '{}'.format(self._image_set + self._year)) ds_utils.maybe_create(output_dir) for i in xrange(len(ctxdb)): ctx = ctxdb[i] im_path = ctx['image'] ann_id = ctx['obj_id'] bb = ctx['box'].copy().astype(np.int) img = cv2.imread(im_path, cv2.IMREAD_COLOR) # img[bb[1]:(bb[3] + 1), bb[0]:(bb[2] + 1), :] = cfg.PIXEL_MEANS.reshape((1,1,3)) img = cv2.resize(img, (full_resolution[1], full_resolution[0])) x = np.expand_dims(img.astype(np.float64), axis=0) - cfg.PIXEL_MEANS.reshape((1, 1, 1, 3)) features = image_encoder.predict(x).flatten() im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join( output_dir, im_name + '_' + str(ann_id).zfill(12) + '.npy') with open(output_path, 'wb') as fid: cPickle.dump(features, fid, cPickle.HIGHEST_PROTOCOL) print i
def dump_crop_features(self, output_dir, image_encoder, ctxdb=None): full_resolution = [224, 224, 3] crop_resolution = [112, 112, 3] if ctxdb == None: ctxdb = self.objdb ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, 'crop') ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, cfg.LAYER) ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, '{}'.format(self._image_set+self._year)) ds_utils.maybe_create(output_dir) for i in xrange(len(ctxdb)): ctx = ctxdb[i] im_path = ctx['image'] ann_id = ctx['obj_id'] bb = ctx['box'].copy().astype(np.int) img = cv2.imread(im_path, cv2.IMREAD_COLOR) img[bb[1]:(bb[3] + 1), bb[0]:(bb[2] + 1), :] = cfg.PIXEL_MEANS.reshape((1,1,3)) img = ds_utils.crop_and_resize(img, bb, full_resolution, crop_resolution) x = np.expand_dims(img.astype(np.float64), axis=0) - cfg.PIXEL_MEANS.reshape((1,1,1,3)) features = image_encoder.predict(x).flatten() im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(output_dir, im_name+'_'+str(ann_id).zfill(12)+'.npy') with open(output_path , 'wb') as fid: cPickle.dump(features, fid, cPickle.HIGHEST_PROTOCOL) print i
def draw_objdb_layouts(self, color_palette, output_dir, objdb=None): if objdb == None: objdb = self.objdb layout_dir = osp.join(output_dir, self._image_set+self._year) ds_utils.maybe_create(layout_dir) for i in range(len(objdb)): entry = objdb[i] cur_box = entry['box'] ann_id = entry['obj_id'] output_img = self.render_entry_boxes(entry, cur_box, color_palette) im_path = entry['image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(layout_dir, im_name+'_'+str(ann_id).zfill(12)+im_ext) cv2.imwrite(output_path, output_img) print i
def draw_roidb_bboxes(self, output_dir, roidb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'roidb_boxes')) if roidb is None: roidb = self._roidb for i in xrange(len(roidb)): roi = roidb[i] im_path = roi['image'] bboxes = roi['boxes'].copy() clses = roi['clses'] # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if roi['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) bboxes[:, 0] += offset_x bboxes[:, 1] += offset_y bboxes[:, 2] += offset_x bboxes[:, 3] += offset_y fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) for j in xrange(bboxes.shape[0]): bb = bboxes[j, :].astype(np.int16) cls = self.classes[clses[j]] cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, 'roidb_bboxes', osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def draw_objdb_layouts(self, color_palette, output_dir, objdb=None): if objdb == None: objdb = self.objdb layout_dir = osp.join(output_dir, self._image_set + self._year) ds_utils.maybe_create(layout_dir) for i in range(len(objdb)): entry = objdb[i] cur_box = entry['box'] ann_id = entry['obj_id'] output_img = self.render_entry_boxes(entry, cur_box, color_palette) im_path = entry['image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join( layout_dir, im_name + '_' + str(ann_id).zfill(12) + im_ext) cv2.imwrite(output_path, output_img) print i
def sample(self, src_ctxdb, dst_ctxdb, K, mode=0, show_gt=False): # Create output directories comp_dir = osp.join(self.output_dir, 'composite_colors') mask_dir = osp.join(self.output_dir, 'composite_masks') ds_utils.maybe_create(comp_dir) ds_utils.maybe_create(mask_dir) # Build ball tree dst_tree = self.build_search_tree(dst_ctxdb, mode) # Retrieval for i in xrange(len(src_ctxdb)): src_ctx = src_ctxdb[i] cand_list = self.inference_ctx(src_ctx, mode, dst_tree, K) # composition for j in range(len(cand_list)): dst_index = cand_list[j][0] dst_dist = cand_list[j][1] # composition dst_ctx = dst_ctxdb[dst_index] composite_image, composite_mask = self.alpha_compose( src_ctx, dst_ctx) if show_gt: src_img = cv2.imread(src_ctx['bg_image'], cv2.IMREAD_COLOR) # dst_img = cv2.imread(dst_ctx['image'], cv2.IMREAD_COLOR) # dst_img = cv2.resize(dst_img, (src_img.shape[1], src_img.shape[0])) composite_image = np.concatenate( (src_img, composite_image), axis=1) im_name, im_ext = osp.splitext( osp.basename(src_ctx['bg_image'])) rank = src_ctx['rank'] file_name = im_name + '_%02d' % rank + '_%02d' % j + '_' + str( dst_ctx['obj_id']).zfill(12) + im_ext output_path = osp.join(comp_dir, file_name) cv2.imwrite(output_path, composite_image) output_path = osp.join(mask_dir, file_name) cv2.imwrite(output_path, (composite_mask * 255).astype(np.uint8)) print i
def draw_roidb_bboxes(self, output_dir, roidb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'roidb_boxes')) if roidb is None: roidb = self._roidb for i in xrange(len(roidb)): roi = roidb[i] im_path = roi['image'] bboxes = roi['boxes'].copy() clses = roi['clses'] # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) if roi['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) bboxes[:, 0] += offset_x; bboxes[:, 1] += offset_y bboxes[:, 2] += offset_x; bboxes[:, 3] += offset_y fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) for j in xrange(bboxes.shape[0]): bb = bboxes[j, :].astype(np.int16) cls = self.classes[clses[j]] cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) output_path = osp.join(output_dir, 'roidb_bboxes', osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def draw_objdb_bboxes(self, output_dir, objdb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'objdb_boxes')) if objdb is None: objdb = self._objdb for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) box = obj['box'] cls = obj['cls'] aid = obj['obj_id'] if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) box[0] += offset_x box[1] += offset_y box[2] += offset_x box[3] += offset_y bb = box.astype(np.int) fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(output_dir, 'objdb_boxes', im_name + '_' + str(aid).zfill(12) + im_ext) cv2.imwrite(output_path, img) print i
def draw_images(self, output_dir, roidb=None): ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, 'images') ds_utils.maybe_create(output_dir) if roidb is None: roidb = self._roidb for i in xrange(len(roidb)): roi = roidb[i] im_path = roi['image'] # image data, flip if necessary img = cv2.imread(im_path, cv2.IMREAD_COLOR) # img, offset_x, offset_y = \ # ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) # img = cv2.resize(img, (512, 512)) output_path = osp.join(output_dir, osp.basename(im_path)) cv2.imwrite(output_path, img) print i, osp.basename(im_path)
def train(self, train_imdb, val_imdb, test_db): ckpt_dir = osp.join(self.output_dir, 'prediction_ckpts') ds_utils.maybe_create(ckpt_dir) ckpt_path = osp.join(ckpt_dir, "weights.{epoch:02d}-{val_output_cens_loss:.2f}-{val_output_sizes_loss:.2f}-{val_output_cens_mcl_accu:.2f}-{val_output_sizes_categorical_accuracy:.2f}.hdf5") log_dir = osp.join(self.output_dir, 'prediction_logs') ds_utils.maybe_create(log_dir) checkpointer = ModelCheckpoint(filepath=ckpt_path, verbose=1, save_weights_only=False) logwriter = TensorBoard(log_dir=log_dir) vis_sampler = LambdaCallback( on_epoch_end=lambda epoch, logs: self.sampler(test_db, epoch, vis=True)) self.model.fit_generator( self.generator(train_imdb, cfg.TRAIN.BATCH_SIZE), steps_per_epoch=int(len(train_imdb.objdb)/cfg.TRAIN.BATCH_SIZE), epochs=cfg.TRAIN.EPOCH, callbacks=[checkpointer, logwriter, vis_sampler], validation_data=self.generator(val_imdb, cfg.TRAIN.BATCH_SIZE), validation_steps=int(len(val_imdb.objdb)/cfg.TRAIN.BATCH_SIZE))
def draw_objdb_bboxes(self, output_dir, objdb=None): ds_utils.maybe_create(output_dir) ds_utils.maybe_create(osp.join(output_dir, 'objdb_boxes')) if objdb is None: objdb = self._objdb for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) box = obj['box'] cls = obj['cls'] aid = obj['obj_id'] if obj['flipped']: # print('flipped %d'%i) img = cv2.flip(img, 1) img, offset_x, offset_y = \ ds_utils.create_squared_image(img, cfg.PIXEL_MEANS) box[0] += offset_x; box[1] += offset_y box[2] += offset_x; box[3] += offset_y bb = box.astype(np.int) fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0]) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(output_dir, 'objdb_boxes', im_name+'_'+str(aid).zfill(12)+im_ext) cv2.imwrite(output_path, img) print i
def dump_gist_features(self, output_dir, ctxdb=None): if ctxdb == None: ctxdb = self.objdb ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, 'gist') ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, '{}'.format(self._image_set + self._year)) ds_utils.maybe_create(output_dir) for i in xrange(len(ctxdb)): ctx = ctxdb[i] # print ctx im_path = ctx['image'] ann_id = ctx['obj_id'] gist_feat = self.extract_gist_feature(im_path) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join( output_dir, im_name + '_' + str(ann_id).zfill(12) + '.npy') with open(output_path, 'wb') as fid: cPickle.dump(gist_feat, fid, cPickle.HIGHEST_PROTOCOL) print i
def draw_objdb_scenes(self, output_dir, objdb=None): if objdb == None: objdb = self.objdb ds_utils.maybe_create(output_dir) for i in range(len(objdb)): entry = objdb[i] ann_id = entry['obj_id'] output_vol = self.render_entry_scenes(entry, ann_id) im_path = entry['image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) new_name = im_name+'_'+str(ann_id).zfill(12) output_path = osp.join(output_dir, new_name + '.pkl') with open(output_path, 'wb') as fid: cPickle.dump(output_vol, fid, cPickle.HIGHEST_PROTOCOL) print i output_path = new_name + '.jpg' cv2.imwrite(output_path, output_vol[:,:,0].astype(np.uint8))
def draw_objdb_scenes(self, output_dir, objdb=None): if objdb == None: objdb = self.objdb ds_utils.maybe_create(output_dir) for i in range(len(objdb)): entry = objdb[i] ann_id = entry['obj_id'] output_vol = self.render_entry_scenes(entry, ann_id) im_path = entry['image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) new_name = im_name + '_' + str(ann_id).zfill(12) output_path = osp.join(output_dir, new_name + '.pkl') with open(output_path, 'wb') as fid: cPickle.dump(output_vol, fid, cPickle.HIGHEST_PROTOCOL) print i output_path = new_name + '.jpg' cv2.imwrite(output_path, output_vol[:, :, 0].astype(np.uint8))
def draw_objdb_masks(self, output_dir, objdb=None): if objdb == None: objdb = self.objdb mask_dir = osp.join(output_dir, '{}_objdb_masks'.format(self._image_set)) img_dir = osp.join(output_dir, '{}_objdb_imgs'.format(self._image_set)) ds_utils.maybe_create(output_dir) ds_utils.maybe_create(mask_dir) ds_utils.maybe_create(img_dir) for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] ann_id = obj['obj_id'] poly = obj['poly'] bb = obj['box'].astype(np.int16) cls = obj['cls'] width = obj['width'] height = obj['height'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) msk = np.amax(COCOmask.decode(poly), axis=2) # binarize the mask msk = msk * 255 retVal, msk = cv2.threshold(msk, 127, 255, cv2.THRESH_BINARY) msk = msk.astype(np.uint8) # msk = ds_utils.dilate_mask(msk, 9) # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \ # 0.5/255 * msk.reshape((height, width, 1)) * \ # np.random.random((1, 3)) * 255 # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \ # (0, 255, 0), 2) # # fontScale = 0.0009 * math.sqrt(float(width*width + height*height)) # # # cv2.putText(img, '{:}'.format(self.classes[cls]), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join( mask_dir, im_name + '_' + str(ann_id).zfill(12) + im_ext) # output_path = osp.join(mask_dir, im_name+im_ext) cv2.imwrite(output_path, msk) output_path = osp.join( img_dir, im_name + '_' + str(ann_id).zfill(12) + im_ext) # output_path = osp.join(img_dir, im_name+im_ext) cv2.imwrite(output_path, img) print i
def sample(self, src_ctxdb, dst_ctxdb, K, mode=0, show_gt=False): # Create output directories comp_dir = osp.join(self.output_dir, 'composite_colors') mask_dir = osp.join(self.output_dir, 'composite_masks') ds_utils.maybe_create(comp_dir) ds_utils.maybe_create(mask_dir) # Build ball tree dst_tree = self.build_search_tree(dst_ctxdb, mode) # Retrieval for i in xrange(len(src_ctxdb)): src_ctx = src_ctxdb[i] cand_list = self.inference_ctx(src_ctx, mode, dst_tree, K) # composition for j in range(len(cand_list)): dst_index = cand_list[j][0] dst_dist = cand_list[j][1] # composition dst_ctx = dst_ctxdb[dst_index] composite_image, composite_mask = self.alpha_compose(src_ctx, dst_ctx) if show_gt: src_img = cv2.imread(src_ctx['bg_image'], cv2.IMREAD_COLOR) # dst_img = cv2.imread(dst_ctx['image'], cv2.IMREAD_COLOR) # dst_img = cv2.resize(dst_img, (src_img.shape[1], src_img.shape[0])) composite_image = np.concatenate((src_img, composite_image), axis=1) im_name, im_ext = osp.splitext(osp.basename(src_ctx['bg_image'])) rank = src_ctx['rank'] file_name = im_name+'_%02d'%rank + '_%02d'%j+'_'+str(dst_ctx['obj_id']).zfill(12)+im_ext output_path = osp.join(comp_dir, file_name) cv2.imwrite(output_path, composite_image) output_path = osp.join(mask_dir, file_name) cv2.imwrite(output_path, (composite_mask*255).astype(np.uint8)) print i
def draw_objdb_masks(self, output_dir, objdb=None): if objdb == None: objdb = self.objdb mask_dir = osp.join(output_dir, '{}_objdb_masks'.format(self._image_set)) img_dir = osp.join(output_dir, '{}_objdb_imgs'.format(self._image_set)) ds_utils.maybe_create(output_dir) ds_utils.maybe_create(mask_dir) ds_utils.maybe_create(img_dir) for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] ann_id = obj['obj_id'] poly = obj['poly'] bb = obj['box'].astype(np.int16) cls = obj['cls'] width = obj['width'] height = obj['height'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) msk = np.amax(COCOmask.decode(poly), axis=2) # binarize the mask msk = msk * 255 retVal, msk = cv2.threshold(msk, 127, 255, cv2.THRESH_BINARY) msk = msk.astype(np.uint8) # msk = ds_utils.dilate_mask(msk, 9) # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \ # 0.5/255 * msk.reshape((height, width, 1)) * \ # np.random.random((1, 3)) * 255 # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \ # (0, 255, 0), 2) # # fontScale = 0.0009 * math.sqrt(float(width*width + height*height)) # # # cv2.putText(img, '{:}'.format(self.classes[cls]), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(mask_dir, im_name+'_'+str(ann_id).zfill(12)+im_ext) # output_path = osp.join(mask_dir, im_name+im_ext) cv2.imwrite(output_path, msk) output_path = osp.join(img_dir, im_name+'_'+str(ann_id).zfill(12)+im_ext) # output_path = osp.join(img_dir, im_name+im_ext) cv2.imwrite(output_path, img) print i
def draw_roidb_masks(self, output_dir, roidb=None): mask_dir = osp.join(output_dir, '{}_roidb_masks'.format(self._image_set)) img_dir = osp.join(output_dir, '{}_roidb_imgs'.format(self._image_set)) ds_utils.maybe_create(output_dir) ds_utils.maybe_create(mask_dir) ds_utils.maybe_create(img_dir) if roidb == None: roidb = self.roidb for i in xrange(len(roidb)): rois = roidb[i] im_path = rois['image'] clses = rois['clses'] boxes = rois['boxes'] rles = rois['polys'] width = rois['width'] height = rois['height'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) msk = np.zeros((height, width), dtype=np.uint8) for j in xrange(len(rles)): rle = rles[j] bb = boxes[j,:].astype(np.int) cls = clses[j] tmp = np.amax(COCOmask.decode(rle), axis=2) * 255 retVal, tmp = cv2.threshold(tmp, 127, 255, cv2.THRESH_BINARY) tmp = tmp.astype(np.uint8) tmp = ds_utils.dilate_mask(tmp, 9) msk = np.maximum(msk, tmp) # fontScale = 0.0009 * math.sqrt(float(width*width + height*height)) # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \ # (0, 255, 0), 2) # cv2.putText(img, '{:}'.format(self.classes[cls]), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \ # 0.5/255 * msk.reshape((height, width, 1)) * \ # np.random.random((1, 3)) * 255 output_path = osp.join(mask_dir, osp.basename(im_path)) cv2.imwrite(output_path, msk) output_path = osp.join(img_dir, osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def draw_roidb_masks(self, output_dir, roidb=None): mask_dir = osp.join(output_dir, '{}_roidb_masks'.format(self._image_set)) img_dir = osp.join(output_dir, '{}_roidb_imgs'.format(self._image_set)) ds_utils.maybe_create(output_dir) ds_utils.maybe_create(mask_dir) ds_utils.maybe_create(img_dir) if roidb == None: roidb = self.roidb for i in xrange(len(roidb)): rois = roidb[i] im_path = rois['image'] clses = rois['clses'] boxes = rois['boxes'] rles = rois['polys'] width = rois['width'] height = rois['height'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) msk = np.zeros((height, width), dtype=np.uint8) for j in xrange(len(rles)): rle = rles[j] bb = boxes[j, :].astype(np.int) cls = clses[j] tmp = np.amax(COCOmask.decode(rle), axis=2) * 255 retVal, tmp = cv2.threshold(tmp, 127, 255, cv2.THRESH_BINARY) tmp = tmp.astype(np.uint8) tmp = ds_utils.dilate_mask(tmp, 9) msk = np.maximum(msk, tmp) # fontScale = 0.0009 * math.sqrt(float(width*width + height*height)) # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \ # (0, 255, 0), 2) # cv2.putText(img, '{:}'.format(self.classes[cls]), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \ # 0.5/255 * msk.reshape((height, width, 1)) * \ # np.random.random((1, 3)) * 255 output_path = osp.join(mask_dir, osp.basename(im_path)) cv2.imwrite(output_path, msk) output_path = osp.join(img_dir, osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def dump_gist_features(self, output_dir, ctxdb=None): if ctxdb == None: ctxdb = self.objdb ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, 'gist') ds_utils.maybe_create(output_dir) output_dir = osp.join(output_dir, '{}'.format(self._image_set+self._year)) ds_utils.maybe_create(output_dir) for i in xrange(len(ctxdb)): ctx = ctxdb[i] # print ctx im_path = ctx['image'] ann_id = ctx['obj_id'] gist_feat = self.extract_gist_feature(im_path) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(output_dir, im_name+'_'+str(ann_id).zfill(12)+'.npy') with open(output_path , 'wb') as fid: cPickle.dump(gist_feat, fid, cPickle.HIGHEST_PROTOCOL) print i
def sampler(self, test_db, epoch=0, K=3, vis=False): # assume each entry in test_db has field: 'bg_image', 'bg_layout' self.center_inference.set_weights(self.get_center_branch_weights(self.model)) self.size_inference.set_weights(self.get_size_branch_weights(self.model)) output_dir = osp.join(self.output_dir, 'prediction_jsons') ds_utils.maybe_create(output_dir) if vis: vis_dir = osp.join(self.output_dir, 'prediction_vis') ds_utils.maybe_create(vis_dir) # hm_dir = osp.join(self.output_dir, 'prediction_heatmap') # ds_utils.maybe_create(hm_dir) res_db = [] num_samples = len(test_db) for i in range(num_samples): entry = test_db[i] im_path = entry['bg_image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR) img, ox, oy = ds_utils.create_squared_image(ori_img, cfg.PIXEL_MEANS) width = img.shape[1];height = img.shape[0] xywhs, grids, heatmap = self.single_sample(entry,K=K) xywhs = ds_utils.denormalize_xywh(xywhs, width, height) xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height) xyxys[:,0] -= ox; xyxys[:,1] -= oy xyxys[:,2] -= ox; xyxys[:,3] -= oy xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1], ori_img.shape[0]) heatmap = heatmap[oy:(oy+ori_img.shape[0]), ox:(ox+ori_img.shape[1]), :] res = {} res['bg_image'] = im_path res['name'] = im_name res['boxes'] = xyxys.tolist() json_path = osp.join(output_dir, im_name+'.json') with open(json_path, 'w') as res_file: json.dump(res, res_file, indent=4, separators=(',', ': ')) if vis: vis_img = ori_img fontScale = 0.0007 * math.sqrt(2 * width * height) for j in range(xyxys.shape[0]): bb = xyxys[j] color = self.palette[j%len(self.palette)] cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]), color, 4) # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2), # cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) tmp = np.ones_like(heatmap, dtype=np.float) tmp[:,:,1] += heatmap[:,:,1]/255.0 overlay = np.multiply(vis_img, tmp) overlay = np.minimum(overlay, 255).astype(np.uint8) final = np.concatenate((vis_img, overlay, heatmap), axis=1) # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext) # cv2.imwrite(output_path, final) output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_ol'+im_ext) cv2.imwrite(output_path, overlay) output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_hm'+im_ext) cv2.imwrite(output_path, heatmap) for j in range(len(res['boxes'])): entry = {} entry['bg_image'] = im_path entry['name'] = im_name entry['box'] = xyxys[j] entry['rank'] = j res_db.append(entry) return res_db
return output if __name__ == '__main__': import argparse np.random.seed(cfg.RNG_SEED) parser = argparse.ArgumentParser() parser.add_argument('--input_images_dir', help='directory of the input color images') parser.add_argument('--input_detections_dir', help='directory of the input detection files') parser.add_argument('--output_layouts_dir', help='directory of the output layout images') opt, unparsed = parser.parse_known_args() imgs_dir = opt.input_images_dir dets_dir = opt.input_detections_dir layouts_dir = opt.output_layouts_dir ds_utils.maybe_create(layouts_dir) palette_path = osp.join(cfg.DATA_DIR, 'coco', 'color_palette.json') color_palette = json.loads(open(palette_path,'r').read()) img_paths = sorted(glob(osp.join(imgs_dir, '*'))) img_names = [osp.splitext(osp.basename(x))[0] for x in img_paths] for i in range(len(img_names)): x = img_names[i] output = render_layout(x, color_palette) output_path = osp.join(layouts_dir, x+'.jpg') cv2.imwrite(output_path, output) print i
def sampler(self, test_db, epoch=0, K=3, vis=False): # assume each entry in test_db has field: 'bg_image', 'bg_layout' self.center_inference.set_weights( self.get_center_branch_weights(self.model)) self.size_inference.set_weights( self.get_size_branch_weights(self.model)) output_dir = osp.join(self.output_dir, 'prediction_jsons') ds_utils.maybe_create(output_dir) if vis: vis_dir = osp.join(self.output_dir, 'prediction_vis') ds_utils.maybe_create(vis_dir) # hm_dir = osp.join(self.output_dir, 'prediction_heatmap') # ds_utils.maybe_create(hm_dir) res_db = [] num_samples = len(test_db) for i in range(num_samples): entry = test_db[i] im_path = entry['bg_image'] im_name, im_ext = osp.splitext(osp.basename(im_path)) ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR) img, ox, oy = ds_utils.create_squared_image( ori_img, cfg.PIXEL_MEANS) width = img.shape[1] height = img.shape[0] xywhs, grids, heatmap = self.single_sample(entry, K=K) xywhs = ds_utils.denormalize_xywh(xywhs, width, height) xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height) xyxys[:, 0] -= ox xyxys[:, 1] -= oy xyxys[:, 2] -= ox xyxys[:, 3] -= oy xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1], ori_img.shape[0]) heatmap = heatmap[oy:(oy + ori_img.shape[0]), ox:(ox + ori_img.shape[1]), :] res = {} res['bg_image'] = im_path res['name'] = im_name res['boxes'] = xyxys.tolist() json_path = osp.join(output_dir, im_name + '.json') with open(json_path, 'w') as res_file: json.dump(res, res_file, indent=4, separators=(',', ': ')) if vis: vis_img = ori_img fontScale = 0.0007 * math.sqrt(2 * width * height) for j in range(xyxys.shape[0]): bb = xyxys[j] color = self.palette[j % len(self.palette)] cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]), color, 4) # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2), # cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1) tmp = np.ones_like(heatmap, dtype=np.float) tmp[:, :, 1] += heatmap[:, :, 1] / 255.0 overlay = np.multiply(vis_img, tmp) overlay = np.minimum(overlay, 255).astype(np.uint8) final = np.concatenate((vis_img, overlay, heatmap), axis=1) # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext) # cv2.imwrite(output_path, final) output_path = osp.join( vis_dir, '%04d_' % epoch + im_name + '_ol' + im_ext) cv2.imwrite(output_path, overlay) output_path = osp.join( vis_dir, '%04d_' % epoch + im_name + '_hm' + im_ext) cv2.imwrite(output_path, heatmap) for j in range(len(res['boxes'])): entry = {} entry['bg_image'] = im_path entry['name'] = im_name entry['box'] = xyxys[j] entry['rank'] = j res_db.append(entry) return res_db
def get_minibatch(self, imdb, vis=False): batch_size = cfg.TRAIN.BATCH_SIZE resolution = cfg.PREDICT_RESOLUTION grid_shape = cfg.GRID_SHAPE images, layouts, boxes, grids = imdb.get_background_minibatch() rois = ds_utils.centers_to_rois(grids[:, 0], grid_shape[:2], grid_shape[:2]) cens_onehot = to_categorical(grids[:, 0], self.cen_dims) sizes_onehot = to_categorical(grids[:, 1], self.size_dims) if vis: output_dir = osp.join(self.output_dir, 'minibatch') ds_utils.maybe_create(output_dir) for i in xrange(batch_size): img = images[i].copy() lyo = layouts[i].copy() cen_id = np.argmax(cens_onehot[i, :]) size_id = np.argmax(sizes_onehot[i, :]) true_xywh = boxes[i, :] true_xywh = ds_utils.denormalize_xywh( true_xywh.reshape((1, 4)), resolution[1], resolution[0]) true_xyxy = ds_utils.xywh_to_xyxy(true_xywh, resolution[1], resolution[0]).squeeze() grid_xywh = ds_utils.indices_to_boxes(\ np.array([cen_id, size_id]).reshape((1,2)), \ grid_shape) grid_xywh = ds_utils.denormalize_xywh(grid_xywh, resolution[1], resolution[0]) grid_xyxy = ds_utils.xywh_to_xyxy(grid_xywh, resolution[1], resolution[0]).squeeze() cv2.rectangle(img, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \ (0, 255, 0), 1) cv2.rectangle(img, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \ (255, 0, 0), 1) cv2.rectangle(lyo, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \ (0, 255, 0), 1) cv2.rectangle(lyo, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \ (255, 0, 0), 1) roi = rois[i].copy() roi = cv2.resize((roi * 255).astype(np.uint8), (resolution[1], resolution[0])) output_path = osp.join(output_dir, 'img_%06d.jpg' % i) cv2.imwrite(output_path, img) output_path = osp.join(output_dir, 'lyo_%06d.jpg' % i) cv2.imwrite(output_path, lyo) output_path = osp.join(output_dir, 'roi_%06d.jpg' % i) cv2.imwrite(output_path, roi) return images, layouts, rois, cens_onehot, sizes_onehot