Ejemplo n.º 1
0
def get_example_images(megadb_utils: MegadbUtils, dataset_name: str,
                       class_name: str) -> List[Optional[str]]:
    """Gets SAS URLs for images of a particular class from a given dataset."""
    datasets_table = megadb_utils.get_datasets_table()

    # this query should be fairly fast, ~1 sec
    query_both_levels = f'''
    SELECT TOP {NUMBER_SEQUENCES_TO_QUERY} VALUE seq
    FROM seq
    WHERE ARRAY_CONTAINS(seq.class, "{class_name}")
        OR (SELECT VALUE COUNT(im)
            FROM im IN seq.images
            WHERE ARRAY_CONTAINS(im.class, "{class_name}")) > 0
    '''
    sequences = megadb_utils.query_sequences_table(
        query_both_levels, partition_key=dataset_name)

    num_samples = min(len(sequences), NUMBER_EXAMPLES_PER_SPECIES)
    sample_seqs = sample(sequences, num_samples)

    image_urls: List[Optional[str]] = []
    for seq in sample_seqs:
        sample_image = sample(seq['images'], 1)[0]  # sample 1 img per sequence
        img_path = MegadbUtils.get_full_path(
            datasets_table, dataset_name, sample_image['file'])
        img_path = urllib.parse.quote_plus(img_path)

        dataset_info = datasets_table[dataset_name]
        img_url = 'https://{}.blob.core.windows.net/{}/{}{}'.format(
            dataset_info["storage_account"],
            dataset_info["container"],
            img_path,
            dataset_info["container_sas_key"])
        image_urls.append(img_url)

    num_missing = NUMBER_EXAMPLES_PER_SPECIES - len(image_urls)
    if num_missing > 0:
        image_urls.extend([None] * num_missing)
    assert len(image_urls) == NUMBER_EXAMPLES_PER_SPECIES
    return image_urls
Ejemplo n.º 2
0
def get_example_images(megadb_utils, dataset_name, class_name):
    datasets_table = megadb_utils.get_datasets_table()

    query_both_levels = '''
    SELECT TOP {} VALUE seq
    FROM seq
    WHERE ARRAY_CONTAINS(seq.class, "{}") OR (SELECT VALUE COUNT(im) FROM im IN seq.images WHERE ARRAY_CONTAINS(im.class, "{}")) > 0
    '''.format(NUMBER_SEQUENCES_TO_QUERY, class_name, class_name)

    sequences = megadb_utils.query_sequences_table(query_both_levels,
                                                   partition_key=dataset_name)
    sample_seqs = sample(
        sequences,
        min(len(sequences),
            NUMBER_EXAMPLES_PER_SPECIES))  # sample 7 sequences if possible

    image_urls = []
    for i, seq in enumerate(sample_seqs):
        sample_image = sample(seq['images'],
                              1)[0]  # sample one image from each sequence
        img_path = sample_image['file']

        img_path = MegadbUtils.get_full_path(datasets_table, dataset_name,
                                             img_path)
        img_path = urllib.parse.quote_plus(img_path)

        dataset_info = datasets_table[dataset_name]
        img_url = 'https://{}.blob.core.windows.net/{}/{}{}'.format(
            dataset_info["storage_account"], dataset_info["container"],
            img_path, dataset_info["container_sas_key"])
        image_urls.append(img_url)

    if len(image_urls) < NUMBER_EXAMPLES_PER_SPECIES:
        image_urls.extend([None] *
                          (NUMBER_EXAMPLES_PER_SPECIES - len(image_urls)))
    assert len(image_urls) == NUMBER_EXAMPLES_PER_SPECIES
    return image_urls
Ejemplo n.º 3
0
def visualize_sequences(datasets_table, sequences, args):
    num_images = 0

    images_html = []
    rendering_info = []

    for seq in sequences:
        if 'images' not in seq:
            continue

        # dataset and seq_id are required fields
        dataset_name = seq['dataset']
        seq_id = seq['seq_id']

        # sort the images in the sequence

        images_in_seq = sorted(seq['images'], key=lambda x: x['frame_num']) if len(seq['images']) > 1 else seq['images']

        for im in images_in_seq:
            if args.trim_to_images_bboxes_labeled and 'bbox' not in im:
                continue

            num_images += 1

            blob_path = MegadbUtils.get_full_path(datasets_table, dataset_name, im['file'])
            frame_num = im.get('frame_num', -1)
            im_class = im.get('class', None)
            if im_class is None:  # if no class label on the image, show the class label on the sequence
                im_class = seq.get('class', [])

            rendering = {}
            rendering['blob_service'] = MegadbUtils.get_blob_service(datasets_table, dataset_name)
            rendering['container_name'] = datasets_table[dataset_name]['container']
            rendering['blob_path'] = blob_path
            rendering['bbox'] = im.get('bbox', [])

            annotated_img_name = 'anno_' + blob_path.replace('/', args.pathsep_replacement).replace('\\', args.pathsep_replacement)
            rendering['annotated_img_name'] = annotated_img_name

            rendering_info.append(rendering)

            images_html.append({
                'filename': 'rendered_images/{}'.format(annotated_img_name),
                'title': 'Seq ID: {}. Frame number: {}<br/> Image file: {}<br/> number of boxes: {}, image class labels: {}'.format(seq_id, frame_num, blob_path, len(rendering['bbox']), im_class),
                'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5'
            })

        if num_images >= args.num_to_visualize:
            print('num_images visualized is {}'.format(num_images))
            break

    # pool = ThreadPool()
    render_image_info_partial = partial(render_image_info, args=args)
    # print('len of rendering_info', len(rendering_info))
    # tqdm(pool.imap_unordered(render_image_info_partial, rendering_info), total=len(rendering_info))

    for rendering in tqdm(rendering_info):
        render_image_info_partial(rendering)

    print('Making HTML...')

    html_path = os.path.join(args.output_dir, 'index.html')
    # options = write_html_image_list()
    # options['headerHtml']
    write_html_image_list(
        filename=html_path,
        images=images_html
    )
def visualize_incoming_annotations(args):
    print('Connecting to MegaDB to get the datasets table...')
    megadb_utils = MegadbUtils()
    datasets_table = megadb_utils.get_datasets_table()

    print('Loading the MegaDB entries...')
    with open(args.megadb_entries) as f:
        sequences = json.load(f)
    print(f'Total number of sequences: {len(sequences)}')
    dataset_seq_images = defaultdict(dict)
    for seq in sequences:
        dataset_seq_images[seq['dataset']][seq['seq_id']] = seq['images']

    print('Loading incoming annotation entries...')
    incoming = IndexedJsonDb(args.incoming_annotation)
    print(
        f'Number of images in this annotation file: {len(incoming.image_id_to_image)}'
    )

    if args.num_to_visualize != -1 and args.num_to_visualize <= len(
            incoming.image_id_to_image):
        incoming_id_to_anno = sample(
            list(incoming.image_id_to_annotations.items()),
            args.num_to_visualize)
    else:
        incoming_id_to_anno = incoming.image_id_to_annotations.items()

    # The file_name field in the incoming json looks like alka_squirrels.seq2020_05_07_25C.frame119221.jpg
    # we need to use the dataset, sequence and frame info to find the actual path in blob storage
    # using the sequences
    images_html = []
    for image_id, annotations in tqdm(incoming_id_to_anno):
        if args.trim_to_images_bboxes_labeled and annotations[0][
                'category_id'] == 5:
            # category_id 5 is No Object Visible
            continue

        anno_file_name = incoming.image_id_to_image[image_id]['file_name']
        parts = anno_file_name.split('.')
        dataset_name = parts[0]
        seq_id = parts[1].split('seq')[1]
        frame_num = int(parts[2].split('frame')[1])

        im_rel_path = get_image_rel_path(dataset_seq_images, dataset_name,
                                         seq_id, frame_num)
        if im_rel_path is None:
            print(f'Not found in megadb entries: dataset {dataset_name},'
                  f' seq_id {seq_id}, frame_num {frame_num}')
            continue

        im_full_path = megadb_utils.get_full_path(datasets_table, dataset_name,
                                                  im_rel_path)

        # download the image
        container_client = megadb_utils.get_storage_client(
            datasets_table, dataset_name)
        downloader = container_client.download_blob(im_full_path)
        image_file = io.BytesIO()
        blob_props = downloader.download_to_stream(image_file)
        image = vis_utils.open_image(image_file)

        boxes = [anno['bbox'] for anno in annotations]
        classes = [anno['category_id'] for anno in annotations]

        vis_utils.render_iMerit_boxes(boxes,
                                      classes,
                                      image,
                                      label_map=incoming.cat_id_to_name)

        file_name = '{}_gtbbox.jpg'.format(
            os.path.splitext(anno_file_name)[0].replace('/', '~'))
        image = vis_utils.resize_image(image, args.output_image_width)
        image.save(os.path.join(args.output_dir, 'rendered_images', file_name))

        images_html.append({
            'filename':
            '{}/{}'.format('rendered_images', file_name),
            'title':
            '{}, number of boxes: {}'.format(
                anno_file_name, len([b for b in boxes if len(b) > 0])),
            'textStyle':
            'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5'
        })

    # Write to HTML
    images_html = sorted(images_html, key=lambda x: x['filename'])
    write_html_image_list(filename=os.path.join(args.output_dir, 'index.html'),
                          images=images_html,
                          options={
                              'headerHtml':
                              '<h1>Sample annotations from {}</h1>'.format(
                                  args.incoming_annotation)
                          })

    print('Visualized {} images.'.format(len(images_html)))