예제 #1
0
def visualize_annotations_in_frame_sequence(frame_sequence_dir,
                                            annotation_dir,
                                            output_dir,
                                            labels,
                                            video_id=""):
    frame_file_list = sorted(glob.glob(os.path.join(frame_sequence_dir, '*')))
    annotations = io_util.load_annotation_from_dir(
        annotation_dir, io_util.parse_vatic_annotation_file)
    io_util.create_dir_if_not_exist(output_dir)
    if video_id:
        print("filtering through video_id: {}".format(video_id))
        annotations = annotations[annotations['videoid'] == video_id]
    labels = labels.split(',')
    lost_mask = (annotations['lost'] == 0)
    label_mask = annotations['label'].isin(labels)
    mask = label_mask & lost_mask
    annotations = annotations[mask]
    assert len(annotations) > 0

    for frame_file in frame_file_list:
        frame_base_file = os.path.basename(frame_file)
        (frame_seq, ext) = os.path.splitext(frame_base_file)
        frameid = int(frame_seq)

        print("processing: %s" % frameid)
        frame_annotations = annotations[annotations['frameid'] == frameid]

        output_frame_path = os.path.join(output_dir, frame_base_file)
        draw_annotations_on_image(frame_file, frame_annotations,
                                  output_frame_path)
예제 #2
0
def setup_train_dir_with_extra_negative():
    for dataset_name, dataset_info in dataset.iteritems():
        output_path = dataset_info['experiment_dir']
        logger.debug('setting up training directory for {} at {}'.format(
            dataset_name, output_path))
        io_util.create_dir_if_not_exist(output_path)
        for split in ['train', 'test']:
            output_file_path = os.path.join(output_path,
                                            '{}.pkl'.format(split))
            preprocess.sample_train_test_frames_with_extra_negative(
                dataset_name,
                dataset_info['tile_classification_annotation_dir'],
                dataset_info['sample_num_per_video'], dataset_info[split],
                dataset_info['extra_negative_dataset'],
                dataset_info['extra_negative_annotation_dirs'],
                dataset_info['extra_negative_sample_num_per_video'],
                dataset_info['extra_negative_video_ids'], output_file_path)

        # create photo dir links to images
        io_util.create_dir_if_not_exist(os.path.join(output_path, 'photos'))
        for dataset_name, dataset_info in dataset.iteritems():
            link_path = os.path.join(output_path, 'photos', dataset_name)
            if not os.path.exists(link_path):
                os.symlink(
                    os.path.abspath(dataset_info['image_dir']),
                    link_path)
            else:
                logger.info('{} exists. skip linking'.format(link_path))
예제 #3
0
def _populate_datasets_info():
    for dataset_name, dataset_info in dataset.iteritems():
        dataset_info['tile_classification_annotation_dir'] = os.path.join(
            dataset_name, 'classification_448_224_224_224_annotations')
        dataset_info['sample_num_per_video'] = 2000
        dataset_info['extra_negative_annotation_dirs'] = []
        dataset_info['extra_negative_video_ids'] = []
        for extra_negative_dataset_name in dataset_info[
                'extra_negative_dataset']:
            dataset_info['extra_negative_annotation_dirs'].append(
                os.path.join(extra_negative_dataset_name,
                             'classification_448_224_224_224_annotations'))
            dataset_info['extra_negative_video_ids'].append(
                dataset[extra_negative_dataset_name]['train'])
        dataset_info['extra_negative_sample_num_per_video'] = 2000
        dataset_info['image_dir'] = os.path.join(dataset_name,
                                                 'images_448_224')
        dataset_info['experiment_dir'] = os.path.join(
            dataset_name, 'experiments',
            'classification_448_224_224_224_extra_negative')
        dataset_info['checkpoint_dir'] = os.path.join(
            dataset_info['experiment_dir'], 'logs_all_layers_40000')
        dataset_info['test_inference_dir'] = os.path.join(
            dataset_info['experiment_dir'], 'test_inference')
        io_util.create_dir_if_not_exist(dataset_info['test_inference_dir'])
예제 #4
0
def visualize_predictions_in_frame_sequence(frame_sequence_dir,
                                            result_dir,
                                            output_dir,
                                            video_id="",
                                            prediction_threshold=0.5,
                                            long_edge_ratio=0.5,
                                            short_edge_ratio=1):
    frame_file_list = sorted(glob.glob(os.path.join(frame_sequence_dir, '*')))
    predictions = io_util.load_all_pickles_from_dir(result_dir)

    io_util.create_dir_if_not_exist(output_dir)
    if video_id:
        print("filtering through video_id: {}".format(video_id))
        video_predictions_ids = _get_keys_by_id_prefix(predictions, video_id)
        predictions = {
            k: v
            for k, v in predictions.items() if k in video_predictions_ids
        }
    for frame_file in frame_file_list:
        frame_base_file = os.path.basename(frame_file)
        (frame_seq, ext) = os.path.splitext(frame_base_file)
        frame_id = int(frame_seq)

        print("processing: %s" % frame_id)
        prediction_image_id = '{}_{}'.format(video_id, frame_id)
        prediction_tile_ids = _get_keys_by_id_prefix(predictions,
                                                     prediction_image_id)
        im = cv2.imread(frame_file)
        orig_im = np.copy(im)
        output_frame_path = os.path.join(output_dir, frame_base_file)
        tile_height, tile_width = _get_tile_size_from_ratio(
            im, long_edge_ratio, short_edge_ratio)
        for prediction_tile_id in prediction_tile_ids:
            if predictions[prediction_tile_id][1] < prediction_threshold:
                grid_x, grid_y = int(prediction_tile_id.split('_')[-2]), int(
                    prediction_tile_id.split('_')[-1])
                tile_x = grid_x * tile_width
                tile_y = grid_y * tile_height
                # tile_to_grey = np.copy(im[tile_y:tile_y + tile_height, tile_x:
                #                           tile_x + tile_width])
                # tile_to_grey = cv2.cvtColor(tile_to_grey, cv2.COLOR_BGR2GRAY)
                # # make it back to 3 channels so that
                # # we can put it in the original image
                # tile_to_grey = cv2.cvtColor(tile_to_grey, cv2.COLOR_GRAY2BGR)
                # im[tile_y:tile_y + tile_height, tile_x:
                #    tile_x + tile_width] = tile_to_grey
                im[tile_y:tile_y + tile_height, tile_x:tile_x + tile_width] = 0
        combined_im = np.concatenate((orig_im, im), axis=0)
        cv2.imwrite(output_frame_path, combined_im)
예제 #5
0
def encode_datasets(dataset_ids, output_dir, crf=23):
    video_iter = get_all_video_iter(dataset_ids)
    procs = []
    io_util.create_dir_if_not_exist(output_dir)
    for dataset_video_id, frame_sequence_dir in video_iter:
        output_file_path = os.path.join(output_dir, dataset_video_id + '.mp4')
        procs.append(
            encode_images_to_h264(frame_sequence_dir,
                                  output_file_path,
                                  crf=crf))

    for proc in procs:
        stdout_value, stderr_value = proc.communicate()
        ret_val = proc.returncode
        logger.debug('{}: returns {}\n{}\n{}'.format(proc, ret_val,
                                                     stdout_value,
                                                     stderr_value))
예제 #6
0
def fix_tpod_annotation_for_original_resolution(tpod_annotation_dir,
                                                dataset_name, output_dir):
    assert dataset_name in ['elephant', 'raft']
    if dataset_name == 'elephant':
        original_resolution_lut = annotation_stats.elephant_video_id_to_original_resolution
        tpod_resolution_lut = annotation_stats.elephant_video_id_to_tpod_resolution
    elif dataset_name == 'raft':
        original_resolution_lut = annotation_stats.raft_video_id_to_original_resolution
        tpod_resolution_lut = annotation_stats.raft_video_id_to_tpod_resolution

    annotations = io_util.load_annotation_from_dir(
        tpod_annotation_dir, io_util.parse_vatic_annotation_file)

    video_ids = set(annotations['videoid'])
    for video_id in video_ids:
        print('working on {}'.format(video_id))
        video_annotations = annotations[annotations['videoid'] == video_id]
        (original_width, original_height) = original_resolution_lut[video_id]
        (image_width, image_height) = tpod_resolution_lut[video_id]
        resize_width_ratio = original_width / image_width
        resize_height_ratio = original_height / image_height
        annotations.loc[annotations.videoid == video_id, 'xmin'] = (
            video_annotations['xmin'] * resize_width_ratio).clip(
                lower=0, upper=original_width).astype(int)
        annotations.loc[annotations.videoid == video_id, 'xmax'] = (
            video_annotations['xmax'] * resize_width_ratio).clip(
                lower=0, upper=original_width).astype(int)
        annotations.loc[annotations.videoid == video_id, 'ymin'] = (
            video_annotations['ymin'] * resize_height_ratio).clip(
                lower=0, upper=original_height).astype(int)
        annotations.loc[annotations.videoid == video_id, 'ymax'] = (
            video_annotations['ymax'] * resize_height_ratio).clip(
                lower=0, upper=original_height).astype(int)

    io_util.create_dir_if_not_exist(output_dir)
    output_file_path = os.path.join(output_dir, 'cache.pkl')
    annotations.to_pickle(output_file_path)
예제 #7
0
def get_tile_classification_annotation(
        annotation_dir, func_load_annotation_dir,
        video_id_to_original_resolution, video_id_to_frame_num, labels,
        resized_long_edge, resized_short_edge, tile_width, tile_height,
        output_dir, video_ids):
    """Get tile classificaiton annotation for resized images.

    The aspect ratio doesn't matter.
    resized_long_edge and resized_short_edge are treated as long edge and short edge.

    Args:
      annotation_dir: 
      func_load_annotation_dir: 
      video_id_to_original_resolution: 
      video_id_to_frame_num: 
      labels: 
      resized_long_edge: 
      resized_short_edge: 
      tile_width: 
      tile_height: 
      output_dir: 
      video_ids: 

    Returns:

    """
    assert video_id_to_frame_num
    assert labels
    assert resized_long_edge >= resized_short_edge

    annotations = func_load_annotation_dir(annotation_dir)
    annotations = filter_annotation_by_label(annotations, labels=labels)
    annotations = annotations[annotations['videoid'].isin(video_ids)]
    annotations['imageid'] = (
        annotations['videoid'] + '_' + annotations['frameid'].astype(str))

    print('total {} annotations'.format(len(annotations)))
    io_util.create_dir_if_not_exist(output_dir)
    for video_id in video_ids:
        original_image_resolution = (video_id_to_original_resolution[video_id])
        original_width, original_height = original_image_resolution
        if original_width >= original_height:
            resized_image_width = resized_long_edge
            resized_image_height = resized_short_edge
        else:
            resized_image_width = resized_short_edge
            resized_image_height = resized_long_edge

        grid_h, grid_w = int(resized_image_height / tile_height), int(
            resized_image_width / tile_width)
        print('{} is divided into {}x{} tiles'.format(video_id, grid_w,
                                                      grid_h))

        print('working on {}'.format(video_id))
        tile_id_to_classification_label = {}
        frame_num = video_id_to_frame_num[video_id]
        positive_num = 0
        for frame_id in range(frame_num):
            image_id = video_id + '_' + str(frame_id)
            image_annotations = annotations[annotations['imageid'] == image_id]

            _initialize_tile_id_to_classification_label(
                tile_id_to_classification_label, grid_w, grid_h, image_id)

            # debug by saving frame id = 0
            if len(image_annotations) > 0:
                im = cv2.imread('stanford/images_{}_{}/{}/{:010d}.jpg'.format(
                    resized_long_edge, resized_short_edge, video_id,
                    frame_id + 1))

            for _, image_annotation in image_annotations.iterrows():
                image_resolution = (resized_image_width, resized_image_height)
                xmin, ymin, xmax, ymax = _get_resized_annotation(
                    image_annotation, original_image_resolution,
                    image_resolution)

                cv2.rectangle(im, (int(xmin), int(ymin)),
                              (int(xmax), int(ymax)), (0, 255, 0), 1)

                # upper left, upper right, lower left, lower right
                key_points = [(xmin, ymin), (xmax, ymin), (xmin, ymax), (xmax,
                                                                         ymax)]
                roi = (xmin, ymin, xmax, ymax)
                for (x, y) in key_points:
                    grid_x, grid_y = int(x / tile_width), int(y / tile_height)
                    grid_x = min(grid_w - 1, max(grid_x, 0))
                    grid_y = min(grid_h - 1, max(grid_y, 0))
                    tile = (grid_x * tile_width, grid_y * tile_height,
                            (grid_x + 1) * tile_width,
                            (grid_y + 1) * tile_height)
                    if is_small_bx_in_big_bx(roi, tile):
                        tile_id = str(
                            image_id + '_{}_{}'.format(grid_x, grid_y))
                        if not tile_id_to_classification_label[tile_id]:
                            tile_id_to_classification_label[tile_id] = True
                            positive_num += 1

            if len(image_annotations) > 0:
                cv2.imwrite('/tmp/{}.jpg'.format(image_id), im)

        negative_num = int(frame_num * grid_w * grid_h) - positive_num
        print(
            'For {}, number of positives: {}, number of negatives: {}'.format(
                video_id, positive_num, negative_num))
        negative_num_in_lut = len(
            [k for k, v in tile_id_to_classification_label.items() if not v])
        assert negative_num == negative_num_in_lut, ('Number of negative'
                                                     'examples do not match!')

        output_file_path = os.path.join(output_dir, '{}.pkl'.format(video_id))
        print('saving to {}'.format(output_file_path))
        with open(output_file_path, 'wb') as f:
            pickle.dump(tile_id_to_classification_label, f)