def get_image_dims(mega_db_seqs, dataset_name, datasets_table, n_cores):

    images_to_get_dims_for = []

    for seq in tqdm(mega_db_seqs):
        assert 'seq_id' in seq and 'images' in seq
        for i in seq['images']:
            assert 'file' in i

        for im in seq['images']:
            if 'bbox' in im and len(im['bbox']) > 1:
                if 'id' not in im:
                    im['id'] = str(uuid.uuid1())
                images_to_get_dims_for.append(im)

    print('Getting the dimensions for {} images'.format(len(images_to_get_dims_for)))

    storage_client = MegadbUtils.get_storage_client(datasets_table, dataset_name)
    path_prefix = datasets_table[dataset_name]['path_prefix']

    if n_cores:
        print('Using threads to download images')
        pool = workerpool(n_cores)
        updated_im_objects = pool.map(partial(_get_image_dims, storage_client, path_prefix),
                                       images_to_get_dims_for)
        print('pool.map has returned')
    else:
        print('Downloading images sequentially')
        updated_im_objects = []
        for image_obj in tqdm(images_to_get_dims_for):
            updated_im_objects.append(get_image_dims(storage_client, path_prefix, image_obj))
    print('Successfully updated {} images.'.format(len(updated_im_objects)))
    updated_im_objects = {i['id']:i for i in updated_im_objects}

    # update the sequences
    print('Updating the sequence objects...')
    for seq in tqdm(mega_db_seqs):
        updated_images = []
        for im in seq['images']:
            if 'bbox' in im and im['id'] in updated_im_objects:
                updated_images.append(updated_im_objects[im['id']])
            else:
                updated_images.append(im)
        seq['images'] = updated_images

    return mega_db_seqs
Esempio n. 2
0
def visualize_sequences(datasets_table, sequences, args):
    num_images = 0

    images_html = []
    rendering_info = []

    for seq in sequences:
        if 'images' not in seq:
            continue

        # dataset and seq_id are required fields
        dataset_name = seq['dataset']
        seq_id = seq['seq_id']

        # sort the images in the sequence

        images_in_seq = sorted(seq['images'],
                               key=lambda x: x['frame_num']) if len(
                                   seq['images']) > 1 else seq['images']

        for im in images_in_seq:
            if args.trim_to_images_bboxes_labeled and 'bbox' not in im:
                continue

            num_images += 1

            blob_path = MegadbUtils.get_full_path(datasets_table, dataset_name,
                                                  im['file'])
            frame_num = im.get('frame_num', -1)

            # if no class label on the image, show class label on the sequence
            im_class = im.get('class', None)
            if im_class is None:
                im_class = seq.get('class', [])

            rendering = {}
            rendering['storage_client'] = MegadbUtils.get_storage_client(
                datasets_table, dataset_name)
            rendering['blob_path'] = blob_path
            rendering['bbox'] = im.get('bbox', [])

            annotated_img_name = 'anno_' + blob_path.replace(
                '/', args.pathsep_replacement).replace(
                    '\\', args.pathsep_replacement)
            rendering['annotated_img_name'] = annotated_img_name

            rendering_info.append(rendering)

            images_html.append({
                'filename':
                'rendered_images/{}'.format(annotated_img_name),
                'title':
                'Seq ID: {}. Frame number: {}<br/> Image file: {}<br/> number of boxes: {}, image class labels: {}'
                .format(seq_id, frame_num, blob_path, len(rendering['bbox']),
                        im_class),
                'textStyle':
                'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5'
            })

        if num_images >= args.num_to_visualize:
            print('num_images visualized is {}'.format(num_images))
            break

    # pool = ThreadPool()
    render_image_info_partial = partial(render_image_info, args=args)
    # print('len of rendering_info', len(rendering_info))
    # tqdm(pool.imap_unordered(render_image_info_partial, rendering_info), total=len(rendering_info))

    for rendering in tqdm(rendering_info):
        render_image_info_partial(rendering)

    print('Making HTML...')

    html_path = os.path.join(args.output_dir, 'index.html')
    # options = write_html_image_list()
    # options['headerHtml']
    write_html_image_list(filename=html_path, images=images_html)
def visualize_incoming_annotations(args):
    print('Connecting to MegaDB to get the datasets table...')
    megadb_utils = MegadbUtils()
    datasets_table = megadb_utils.get_datasets_table()

    print('Loading the MegaDB entries...')
    with open(args.megadb_entries) as f:
        sequences = json.load(f)
    print(f'Total number of sequences: {len(sequences)}')
    dataset_seq_images = defaultdict(dict)
    for seq in sequences:
        dataset_seq_images[seq['dataset']][seq['seq_id']] = seq['images']

    print('Loading incoming annotation entries...')
    incoming = IndexedJsonDb(args.incoming_annotation)
    print(
        f'Number of images in this annotation file: {len(incoming.image_id_to_image)}'
    )

    if args.num_to_visualize != -1 and args.num_to_visualize <= len(
            incoming.image_id_to_image):
        incoming_id_to_anno = sample(
            list(incoming.image_id_to_annotations.items()),
            args.num_to_visualize)
    else:
        incoming_id_to_anno = incoming.image_id_to_annotations.items()

    # The file_name field in the incoming json looks like alka_squirrels.seq2020_05_07_25C.frame119221.jpg
    # we need to use the dataset, sequence and frame info to find the actual path in blob storage
    # using the sequences
    images_html = []
    for image_id, annotations in tqdm(incoming_id_to_anno):
        if args.trim_to_images_bboxes_labeled and annotations[0][
                'category_id'] == 5:
            # category_id 5 is No Object Visible
            continue

        anno_file_name = incoming.image_id_to_image[image_id]['file_name']
        parts = anno_file_name.split('.')
        dataset_name = parts[0]
        seq_id = parts[1].split('seq')[1]
        frame_num = int(parts[2].split('frame')[1])

        im_rel_path = get_image_rel_path(dataset_seq_images, dataset_name,
                                         seq_id, frame_num)
        if im_rel_path is None:
            print(f'Not found in megadb entries: dataset {dataset_name},'
                  f' seq_id {seq_id}, frame_num {frame_num}')
            continue

        im_full_path = megadb_utils.get_full_path(datasets_table, dataset_name,
                                                  im_rel_path)

        # download the image
        container_client = megadb_utils.get_storage_client(
            datasets_table, dataset_name)
        downloader = container_client.download_blob(im_full_path)
        image_file = io.BytesIO()
        blob_props = downloader.download_to_stream(image_file)
        image = vis_utils.open_image(image_file)

        boxes = [anno['bbox'] for anno in annotations]
        classes = [anno['category_id'] for anno in annotations]

        vis_utils.render_iMerit_boxes(boxes,
                                      classes,
                                      image,
                                      label_map=incoming.cat_id_to_name)

        file_name = '{}_gtbbox.jpg'.format(
            os.path.splitext(anno_file_name)[0].replace('/', '~'))
        image = vis_utils.resize_image(image, args.output_image_width)
        image.save(os.path.join(args.output_dir, 'rendered_images', file_name))

        images_html.append({
            'filename':
            '{}/{}'.format('rendered_images', file_name),
            'title':
            '{}, number of boxes: {}'.format(
                anno_file_name, len([b for b in boxes if len(b) > 0])),
            'textStyle':
            'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5'
        })

    # Write to HTML
    images_html = sorted(images_html, key=lambda x: x['filename'])
    write_html_image_list(filename=os.path.join(args.output_dir, 'index.html'),
                          images=images_html,
                          options={
                              'headerHtml':
                              '<h1>Sample annotations from {}</h1>'.format(
                                  args.incoming_annotation)
                          })

    print('Visualized {} images.'.format(len(images_html)))