def load_datasets(gt_dataset_file, pred_dataset_file): """Load gt and results datasets""" assert osp.exists( gt_dataset_file), "ImageDataset filepath {} does not exist..".format( gt_dataset_file) assert osp.exists( pred_dataset_file), "ImageDataset filepath {} does not exist.".format( pred_dataset_file) print 'Loading groundtruth dataset from {}'.format(gt_dataset_file) gt_dataset = ImageDataset.from_json(gt_dataset_file) print 'Loaded {} dataset with {} annotations'.format( gt_dataset.name(), gt_dataset.num_of_images()) print 'Loading predited dataset from {}'.format(pred_dataset_file) pred_dataset = ImageDataset.from_json(pred_dataset_file) print 'Loaded {} dataset with {} annotations'.format( pred_dataset.name(), pred_dataset.num_of_images()) assert gt_dataset.num_of_images() == pred_dataset.num_of_images() num_of_objects_gt = sum([ len(image_info['object_infos']) for image_info in gt_dataset.image_infos() ]) num_of_objects_pred = sum([ len(image_info['object_infos']) for image_info in gt_dataset.image_infos() ]) assert num_of_objects_gt == num_of_objects_pred, "{} ! {}".format( num_of_objects_gt, num_of_objects_pred) return gt_dataset, pred_dataset
def main(): parser = argparse.ArgumentParser(description="Visualize Results") parser.add_argument("pred_dataset_file", help="Path to predicted (results) JSON dataset file") parser.add_argument("-s", "--score_threshold", default=0.1, type=float, help="Score thresold") args = parser.parse_args() assert osp.exists(args.pred_dataset_file ), "ImageDataset filepath {} does not exist.".format( args.pred_dataset_file) print 'Loading predited dataset from {}'.format(args.pred_dataset_file) pred_dataset = ImageDataset.from_json(args.pred_dataset_file) print 'Loaded {} dataset with {} annotations'.format( pred_dataset.name(), pred_dataset.num_of_images()) print "score_threshold = {}".format(args.score_threshold) cv2.namedWindow('image', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) cv2.resizeWindow('image', 2048, 1024) wait_nav = WaitKeyNavigator(pred_dataset.num_of_images()) wait_nav.print_key_map() quit_viz = False while not quit_viz: i = wait_nav.index image_info = pred_dataset.image_infos()[i] img_path = osp.join(pred_dataset.rootdir(), image_info['image_file']) image = cv2.imread(img_path) for obj_info in image_info['object_infos']: if 'bbx_visible' in obj_info: if 'score' in obj_info: if obj_info['score'] < args.score_threshold: continue draw_bbx(image, obj_info['bbx_visible']) if 'category' in obj_info: obj_text = obj_info['category'] tl = tuple( np.floor(obj_info['bbx_visible'][:2]).astype(int)) font_face = cv2.FONT_HERSHEY_PLAIN font_scale = 0.8 thickness = 1 ts, baseline = cv2.getTextSize(obj_text, font_face, font_scale, thickness) cv2.rectangle(image, (tl[0], tl[1] + baseline), (tl[0] + ts[0], tl[1] - ts[1]), (0, 0, 0), cv2.FILLED) cv2.addText(image, obj_text, tl, 'times', color=(0, 255, 0)) cv2.displayOverlay( 'image', 'Image: {}'.format(osp.splitext(osp.basename(img_path))[0])) cv2.imshow('image', image) quit_viz = wait_nav.process_key()
def main(): """Main function""" description = ('Test Fast-RCNN style datalayer') parser = argparse.ArgumentParser(description=description) parser.add_argument("dataset", help="ImageDataset JSON file") parser.add_argument("-n", "--net_file", required=True, help="Net (prototxt) file") parser.add_argument("-g", "--gpu", type=int, default=0, help="Gpu Id.") parser.add_argument("-e", "--epochs", type=int, default=2, help="Number of epochs") parser.add_argument( "-p", "--pause", default=0, type=int, help="Set number of milliseconds to pause. Use 0 to pause indefinitely" ) args = parser.parse_args() # init caffe caffe.set_device(args.gpu) caffe.set_mode_gpu() assert osp.exists(args.net_file), 'Net file "{}" do not exist'.format( args.net_file) net = caffe.Net(args.net_file, caffe.TEST) print 'Loading dataset from {}'.format(args.dataset) dataset = ImageDataset.from_json(args.dataset) print 'Loaded {} dataset with {} annotations'.format( dataset.name(), dataset.num_of_images()) net.layers[0].add_dataset(dataset) net.layers[0].print_params() net.layers[0].generate_datum_ids() required_object_info_fields = net.layers[0].required_object_info_fields print( "required_object_info_fields = {}".format(required_object_info_fields)) # Make sure we remove bad objects like tha data layer does filter_dataset(dataset, required_object_info_fields) number_of_images = dataset.num_of_images() assert net.layers[0].number_of_datapoints() == number_of_images num_of_layer_objects = sum([ len(img_info['object_infos']) for img_info in net.layers[0].data_samples ]) num_of_dataset_objects = sum( [len(img_info['object_infos']) for img_info in dataset.image_infos()]) assert num_of_layer_objects == num_of_dataset_objects, "{} != {}".format( num_of_layer_objects, num_of_dataset_objects) cv2.namedWindow('blob_image', cv2.WINDOW_AUTOSIZE) cv2.namedWindow('original_image', cv2.WINDOW_AUTOSIZE) image_blob_shape = net.blobs['input_image'].data.shape assert len(image_blob_shape) == 4, 'Expects 4D data blob' assert image_blob_shape[ 1] == 3, 'Expects 2nd channel to be 3 for BGR image' batch_size = image_blob_shape[0] num_of_batches = int(np.ceil(dataset.num_of_images() / float(batch_size))) exit_loop = False for epoch_id in xrange(args.epochs): print "-----------------------Epoch # {} / {} -----------------------------".format( epoch_id, args.epochs) for b in trange(num_of_batches): start_idx = batch_size * b end_idx = min(batch_size * (b + 1), number_of_images) # print 'Working on batch: {}/{} (Images# {} - {}) of epoch {}'.format(b, num_of_batches, start_idx, end_idx, epoch_id) # Run forward pass _ = net.forward() # Get image_scales and image_flippings image_scales = net.blobs['image_scales'].data image_flippings = net.blobs['image_flippings'].data.astype(np.bool) assert image_scales.shape == image_flippings.shape == ( batch_size, ) # Get roi_blob and from that determine number_of_rois roi_blob = net.blobs['roi'].data assert roi_blob.ndim == 2 and roi_blob.shape[1] == 5 number_of_rois = roi_blob.shape[0] for roi_id in xrange(number_of_rois): roi_batch_index = roi_blob[roi_id, 0] assert 0 <= roi_batch_index <= batch_size assert_bbx(roi_blob[roi_id, -4:]) # Check the bbx blobs for bbx_blob_name in ['gt_bbx_amodal', 'gt_bbx_crop']: if bbx_blob_name in net.blobs: bbx_blob = net.blobs[bbx_blob_name].data assert bbx_blob.shape == (number_of_rois, 4) for roi_id in xrange(number_of_rois): assert_bbx(bbx_blob[roi_id, :]) # Check the center proj blobs center_proj_blob = net.blobs['gt_center_proj'].data assert center_proj_blob.shape == (number_of_rois, 2) # Check vp blobs vp_blob = net.blobs['gt_viewpoint'].data assert vp_blob.shape == (number_of_rois, 3), "Weird vp shape = {}".format(vp_blob) assert (vp_blob >= -np.pi).all() and ( vp_blob < np.pi).all(), "Bad vp = \n{}".format(vp_blob) for i in xrange(start_idx, end_idx): original_image = cv2.imread( osp.join(dataset.rootdir(), dataset.image_infos()[i]['image_file'])) cv2.imshow('original_image', original_image) image_blob = net.blobs['input_image'].data[i - start_idx] image_blob_bgr8 = net.layers[0].make_bgr8_from_blob( image_blob).copy() for roi_id in xrange(roi_blob.shape[0]): roi_batch_index = roi_blob[roi_id, 0] if roi_batch_index == (i - start_idx): bbx_roi = roi_blob[roi_id, -4:].astype(np.float32) cv2.rectangle(image_blob_bgr8, tuple(bbx_roi[:2]), tuple(bbx_roi[2:]), (0, 255, 0), 1) cv2.imshow('blob_image', image_blob_bgr8) cv2.displayOverlay( 'blob_image', 'Flipped' if image_flippings[i - start_idx] else 'Original') key = cv2.waitKey(args.pause) if key == 27: cv2.destroyAllWindows() exit_loop = True break elif key == ord('p'): args.pause = not args.pause if exit_loop is True: print 'User presessed ESC. Exiting epoch {}'.format(epoch_id) exit_loop = False break print "-----------------------End of epoch -----------------------------" # No check the data_layer.data_samples print "Verifying data_samples ...", for im_info_layer, im_info_dataset in zip(net.layers[0].data_samples, dataset.image_infos()): for im_field in ['image_size', 'image_intrinsic']: if im_field in im_info_dataset: assert np.all( im_info_layer[im_field] == im_info_dataset[im_field]) layer_obj_infos = im_info_layer['object_infos'] dataset_obj_infos = im_info_dataset['object_infos'] assert len(layer_obj_infos) == len( dataset_obj_infos), "{} != {}".format(len(layer_obj_infos), len(dataset_obj_infos)) for obj_info_layer, obj_info_dataset in zip( layer_obj_infos, dataset_obj_infos): assert obj_info_layer['id'] == obj_info_dataset['id'] assert obj_info_layer['category'] == obj_info_dataset[ 'category'] for obj_field in required_object_info_fields: assert np.all(obj_info_layer[obj_field] == np.array(obj_info_dataset[obj_field])), \ "For obj_field '{}': {} vs {}".format(obj_field, obj_info_layer[obj_field], obj_info_dataset[obj_field]) print "Done."
def main(): parser = argparse.ArgumentParser(description="Visualize Image dataset") parser.add_argument("image_dataset_file", help="Path to ImageDataset JSON file") args = parser.parse_args() print 'Loading image dataset from {} ...'.format(args.image_dataset_file), dataset = ImageDataset.from_json(args.image_dataset_file) print 'Done.' print dataset cv2.namedWindow('Image', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) cv2.resizeWindow('Image', 2048, 1024) wait_nav = WaitKeyNavigator(dataset.num_of_images()) wait_nav.print_key_map() quit_viz = False while not quit_viz: image_id = wait_nav.index image_info = dataset.image_infos()[image_id] W, H = image_info['image_size'] img_path = osp.join(dataset.rootdir(), image_info['image_file']) assert osp.exists(img_path) image = cv2.imread(img_path) assert image.shape == (H, W, 3) if 'image_intrinsic' in image_info: K = np.array(image_info['image_intrinsic'], dtype=np.float) else: # Assume focal length f = 1. f = 200. K = np.array([[f, 0., W / 2.], [0., f, H / 2.], [0., 0., 1.]]) K_inv = np.linalg.inv(K) for obj_info in image_info['object_infos']: h, s, l = random.random( ), 0.5 + random.random() / 2.0, 0.4 + random.random() / 5.0 color = [int(256 * i) for i in colorsys.hls_to_rgb(h, l, s)] if 'bbx_visible' in obj_info: draw_bbx(image, obj_info['bbx_visible'], color=color, thickness=1) if 'bbx_amodal' in obj_info: draw_bbx(image, obj_info['bbx_amodal'], color=color, thickness=1) if 'center_proj' in obj_info: center_proj = np.array(obj_info['center_proj'], dtype=np.float) cv2.circle(image, tuple(center_proj.astype(np.float32)), 3, color, -1) if 'viewpoint' in obj_info: vp = np.array(obj_info['viewpoint'], dtype=np.float) R_vp = rotation_from_viewpoint(vp) distance = obj_info.get('center_dist', 10.) obj_pose = Pose(R=R_vp, t=np.array([0., 0., distance])) center_proj_ray = K_inv.dot(np.append(center_proj, 1)) delta_rot = rotation_from_two_vectors( np.array([0., 0., 1.]), center_proj_ray) obj_pose.R = delta_rot.dot(obj_pose.R) obj_pose.t = delta_rot.dot(obj_pose.t) obj_center_proj = project_point( K, obj_pose * (np.array([0., 0., 0.]))).astype(np.float32) obj_x_proj = project_point( K, obj_pose * np.array([1., 0., 0.])).astype(np.float32) obj_y_proj = project_point( K, obj_pose * np.array([0., 1., 0.])).astype(np.float32) obj_z_proj = project_point( K, obj_pose * np.array([0., 0., 1.])).astype(np.float32) cv2.line(image, tuple(obj_center_proj), tuple(obj_x_proj), (0, 0, 255), 2, cv2.LINE_AA) cv2.line(image, tuple(obj_center_proj), tuple(obj_y_proj), (0, 255, 0), 2, cv2.LINE_AA) cv2.line(image, tuple(obj_center_proj), tuple(obj_z_proj), (255, 0, 0), 2, cv2.LINE_AA) cv2.displayOverlay( 'Image', 'Image: {}'.format(osp.splitext(osp.basename(img_path))[0])) cv2.imshow('Image', image) quit_viz = wait_nav.process_key()