def visualize_annotations_in_frame_sequence(frame_sequence_dir, annotation_dir, output_dir, labels, video_id=""): frame_file_list = sorted(glob.glob(os.path.join(frame_sequence_dir, '*'))) annotations = io_util.load_annotation_from_dir( annotation_dir, io_util.parse_vatic_annotation_file) io_util.create_dir_if_not_exist(output_dir) if video_id: print("filtering through video_id: {}".format(video_id)) annotations = annotations[annotations['videoid'] == video_id] labels = labels.split(',') lost_mask = (annotations['lost'] == 0) label_mask = annotations['label'].isin(labels) mask = label_mask & lost_mask annotations = annotations[mask] assert len(annotations) > 0 for frame_file in frame_file_list: frame_base_file = os.path.basename(frame_file) (frame_seq, ext) = os.path.splitext(frame_base_file) frameid = int(frame_seq) print("processing: %s" % frameid) frame_annotations = annotations[annotations['frameid'] == frameid] output_frame_path = os.path.join(output_dir, frame_base_file) draw_annotations_on_image(frame_file, frame_annotations, output_frame_path)
def setup_train_dir_with_extra_negative(): for dataset_name, dataset_info in dataset.iteritems(): output_path = dataset_info['experiment_dir'] logger.debug('setting up training directory for {} at {}'.format( dataset_name, output_path)) io_util.create_dir_if_not_exist(output_path) for split in ['train', 'test']: output_file_path = os.path.join(output_path, '{}.pkl'.format(split)) preprocess.sample_train_test_frames_with_extra_negative( dataset_name, dataset_info['tile_classification_annotation_dir'], dataset_info['sample_num_per_video'], dataset_info[split], dataset_info['extra_negative_dataset'], dataset_info['extra_negative_annotation_dirs'], dataset_info['extra_negative_sample_num_per_video'], dataset_info['extra_negative_video_ids'], output_file_path) # create photo dir links to images io_util.create_dir_if_not_exist(os.path.join(output_path, 'photos')) for dataset_name, dataset_info in dataset.iteritems(): link_path = os.path.join(output_path, 'photos', dataset_name) if not os.path.exists(link_path): os.symlink( os.path.abspath(dataset_info['image_dir']), link_path) else: logger.info('{} exists. skip linking'.format(link_path))
def _populate_datasets_info(): for dataset_name, dataset_info in dataset.iteritems(): dataset_info['tile_classification_annotation_dir'] = os.path.join( dataset_name, 'classification_448_224_224_224_annotations') dataset_info['sample_num_per_video'] = 2000 dataset_info['extra_negative_annotation_dirs'] = [] dataset_info['extra_negative_video_ids'] = [] for extra_negative_dataset_name in dataset_info[ 'extra_negative_dataset']: dataset_info['extra_negative_annotation_dirs'].append( os.path.join(extra_negative_dataset_name, 'classification_448_224_224_224_annotations')) dataset_info['extra_negative_video_ids'].append( dataset[extra_negative_dataset_name]['train']) dataset_info['extra_negative_sample_num_per_video'] = 2000 dataset_info['image_dir'] = os.path.join(dataset_name, 'images_448_224') dataset_info['experiment_dir'] = os.path.join( dataset_name, 'experiments', 'classification_448_224_224_224_extra_negative') dataset_info['checkpoint_dir'] = os.path.join( dataset_info['experiment_dir'], 'logs_all_layers_40000') dataset_info['test_inference_dir'] = os.path.join( dataset_info['experiment_dir'], 'test_inference') io_util.create_dir_if_not_exist(dataset_info['test_inference_dir'])
def visualize_predictions_in_frame_sequence(frame_sequence_dir, result_dir, output_dir, video_id="", prediction_threshold=0.5, long_edge_ratio=0.5, short_edge_ratio=1): frame_file_list = sorted(glob.glob(os.path.join(frame_sequence_dir, '*'))) predictions = io_util.load_all_pickles_from_dir(result_dir) io_util.create_dir_if_not_exist(output_dir) if video_id: print("filtering through video_id: {}".format(video_id)) video_predictions_ids = _get_keys_by_id_prefix(predictions, video_id) predictions = { k: v for k, v in predictions.items() if k in video_predictions_ids } for frame_file in frame_file_list: frame_base_file = os.path.basename(frame_file) (frame_seq, ext) = os.path.splitext(frame_base_file) frame_id = int(frame_seq) print("processing: %s" % frame_id) prediction_image_id = '{}_{}'.format(video_id, frame_id) prediction_tile_ids = _get_keys_by_id_prefix(predictions, prediction_image_id) im = cv2.imread(frame_file) orig_im = np.copy(im) output_frame_path = os.path.join(output_dir, frame_base_file) tile_height, tile_width = _get_tile_size_from_ratio( im, long_edge_ratio, short_edge_ratio) for prediction_tile_id in prediction_tile_ids: if predictions[prediction_tile_id][1] < prediction_threshold: grid_x, grid_y = int(prediction_tile_id.split('_')[-2]), int( prediction_tile_id.split('_')[-1]) tile_x = grid_x * tile_width tile_y = grid_y * tile_height # tile_to_grey = np.copy(im[tile_y:tile_y + tile_height, tile_x: # tile_x + tile_width]) # tile_to_grey = cv2.cvtColor(tile_to_grey, cv2.COLOR_BGR2GRAY) # # make it back to 3 channels so that # # we can put it in the original image # tile_to_grey = cv2.cvtColor(tile_to_grey, cv2.COLOR_GRAY2BGR) # im[tile_y:tile_y + tile_height, tile_x: # tile_x + tile_width] = tile_to_grey im[tile_y:tile_y + tile_height, tile_x:tile_x + tile_width] = 0 combined_im = np.concatenate((orig_im, im), axis=0) cv2.imwrite(output_frame_path, combined_im)
def encode_datasets(dataset_ids, output_dir, crf=23): video_iter = get_all_video_iter(dataset_ids) procs = [] io_util.create_dir_if_not_exist(output_dir) for dataset_video_id, frame_sequence_dir in video_iter: output_file_path = os.path.join(output_dir, dataset_video_id + '.mp4') procs.append( encode_images_to_h264(frame_sequence_dir, output_file_path, crf=crf)) for proc in procs: stdout_value, stderr_value = proc.communicate() ret_val = proc.returncode logger.debug('{}: returns {}\n{}\n{}'.format(proc, ret_val, stdout_value, stderr_value))
def fix_tpod_annotation_for_original_resolution(tpod_annotation_dir, dataset_name, output_dir): assert dataset_name in ['elephant', 'raft'] if dataset_name == 'elephant': original_resolution_lut = annotation_stats.elephant_video_id_to_original_resolution tpod_resolution_lut = annotation_stats.elephant_video_id_to_tpod_resolution elif dataset_name == 'raft': original_resolution_lut = annotation_stats.raft_video_id_to_original_resolution tpod_resolution_lut = annotation_stats.raft_video_id_to_tpod_resolution annotations = io_util.load_annotation_from_dir( tpod_annotation_dir, io_util.parse_vatic_annotation_file) video_ids = set(annotations['videoid']) for video_id in video_ids: print('working on {}'.format(video_id)) video_annotations = annotations[annotations['videoid'] == video_id] (original_width, original_height) = original_resolution_lut[video_id] (image_width, image_height) = tpod_resolution_lut[video_id] resize_width_ratio = original_width / image_width resize_height_ratio = original_height / image_height annotations.loc[annotations.videoid == video_id, 'xmin'] = ( video_annotations['xmin'] * resize_width_ratio).clip( lower=0, upper=original_width).astype(int) annotations.loc[annotations.videoid == video_id, 'xmax'] = ( video_annotations['xmax'] * resize_width_ratio).clip( lower=0, upper=original_width).astype(int) annotations.loc[annotations.videoid == video_id, 'ymin'] = ( video_annotations['ymin'] * resize_height_ratio).clip( lower=0, upper=original_height).astype(int) annotations.loc[annotations.videoid == video_id, 'ymax'] = ( video_annotations['ymax'] * resize_height_ratio).clip( lower=0, upper=original_height).astype(int) io_util.create_dir_if_not_exist(output_dir) output_file_path = os.path.join(output_dir, 'cache.pkl') annotations.to_pickle(output_file_path)
def get_tile_classification_annotation( annotation_dir, func_load_annotation_dir, video_id_to_original_resolution, video_id_to_frame_num, labels, resized_long_edge, resized_short_edge, tile_width, tile_height, output_dir, video_ids): """Get tile classificaiton annotation for resized images. The aspect ratio doesn't matter. resized_long_edge and resized_short_edge are treated as long edge and short edge. Args: annotation_dir: func_load_annotation_dir: video_id_to_original_resolution: video_id_to_frame_num: labels: resized_long_edge: resized_short_edge: tile_width: tile_height: output_dir: video_ids: Returns: """ assert video_id_to_frame_num assert labels assert resized_long_edge >= resized_short_edge annotations = func_load_annotation_dir(annotation_dir) annotations = filter_annotation_by_label(annotations, labels=labels) annotations = annotations[annotations['videoid'].isin(video_ids)] annotations['imageid'] = ( annotations['videoid'] + '_' + annotations['frameid'].astype(str)) print('total {} annotations'.format(len(annotations))) io_util.create_dir_if_not_exist(output_dir) for video_id in video_ids: original_image_resolution = (video_id_to_original_resolution[video_id]) original_width, original_height = original_image_resolution if original_width >= original_height: resized_image_width = resized_long_edge resized_image_height = resized_short_edge else: resized_image_width = resized_short_edge resized_image_height = resized_long_edge grid_h, grid_w = int(resized_image_height / tile_height), int( resized_image_width / tile_width) print('{} is divided into {}x{} tiles'.format(video_id, grid_w, grid_h)) print('working on {}'.format(video_id)) tile_id_to_classification_label = {} frame_num = video_id_to_frame_num[video_id] positive_num = 0 for frame_id in range(frame_num): image_id = video_id + '_' + str(frame_id) image_annotations = annotations[annotations['imageid'] == image_id] _initialize_tile_id_to_classification_label( tile_id_to_classification_label, grid_w, grid_h, image_id) # debug by saving frame id = 0 if len(image_annotations) > 0: im = cv2.imread('stanford/images_{}_{}/{}/{:010d}.jpg'.format( resized_long_edge, resized_short_edge, video_id, frame_id + 1)) for _, image_annotation in image_annotations.iterrows(): image_resolution = (resized_image_width, resized_image_height) xmin, ymin, xmax, ymax = _get_resized_annotation( image_annotation, original_image_resolution, image_resolution) cv2.rectangle(im, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 1) # upper left, upper right, lower left, lower right key_points = [(xmin, ymin), (xmax, ymin), (xmin, ymax), (xmax, ymax)] roi = (xmin, ymin, xmax, ymax) for (x, y) in key_points: grid_x, grid_y = int(x / tile_width), int(y / tile_height) grid_x = min(grid_w - 1, max(grid_x, 0)) grid_y = min(grid_h - 1, max(grid_y, 0)) tile = (grid_x * tile_width, grid_y * tile_height, (grid_x + 1) * tile_width, (grid_y + 1) * tile_height) if is_small_bx_in_big_bx(roi, tile): tile_id = str( image_id + '_{}_{}'.format(grid_x, grid_y)) if not tile_id_to_classification_label[tile_id]: tile_id_to_classification_label[tile_id] = True positive_num += 1 if len(image_annotations) > 0: cv2.imwrite('/tmp/{}.jpg'.format(image_id), im) negative_num = int(frame_num * grid_w * grid_h) - positive_num print( 'For {}, number of positives: {}, number of negatives: {}'.format( video_id, positive_num, negative_num)) negative_num_in_lut = len( [k for k, v in tile_id_to_classification_label.items() if not v]) assert negative_num == negative_num_in_lut, ('Number of negative' 'examples do not match!') output_file_path = os.path.join(output_dir, '{}.pkl'.format(video_id)) print('saving to {}'.format(output_file_path)) with open(output_file_path, 'wb') as f: pickle.dump(tile_id_to_classification_label, f)