def get_image_dims(mega_db_seqs, dataset_name, datasets_table, n_cores): images_to_get_dims_for = [] for seq in tqdm(mega_db_seqs): assert 'seq_id' in seq and 'images' in seq for i in seq['images']: assert 'file' in i for im in seq['images']: if 'bbox' in im and len(im['bbox']) > 1: if 'id' not in im: im['id'] = str(uuid.uuid1()) images_to_get_dims_for.append(im) print('Getting the dimensions for {} images'.format(len(images_to_get_dims_for))) storage_client = MegadbUtils.get_storage_client(datasets_table, dataset_name) path_prefix = datasets_table[dataset_name]['path_prefix'] if n_cores: print('Using threads to download images') pool = workerpool(n_cores) updated_im_objects = pool.map(partial(_get_image_dims, storage_client, path_prefix), images_to_get_dims_for) print('pool.map has returned') else: print('Downloading images sequentially') updated_im_objects = [] for image_obj in tqdm(images_to_get_dims_for): updated_im_objects.append(get_image_dims(storage_client, path_prefix, image_obj)) print('Successfully updated {} images.'.format(len(updated_im_objects))) updated_im_objects = {i['id']:i for i in updated_im_objects} # update the sequences print('Updating the sequence objects...') for seq in tqdm(mega_db_seqs): updated_images = [] for im in seq['images']: if 'bbox' in im and im['id'] in updated_im_objects: updated_images.append(updated_im_objects[im['id']]) else: updated_images.append(im) seq['images'] = updated_images return mega_db_seqs
def visualize_sequences(datasets_table, sequences, args): num_images = 0 images_html = [] rendering_info = [] for seq in sequences: if 'images' not in seq: continue # dataset and seq_id are required fields dataset_name = seq['dataset'] seq_id = seq['seq_id'] # sort the images in the sequence images_in_seq = sorted(seq['images'], key=lambda x: x['frame_num']) if len( seq['images']) > 1 else seq['images'] for im in images_in_seq: if args.trim_to_images_bboxes_labeled and 'bbox' not in im: continue num_images += 1 blob_path = MegadbUtils.get_full_path(datasets_table, dataset_name, im['file']) frame_num = im.get('frame_num', -1) # if no class label on the image, show class label on the sequence im_class = im.get('class', None) if im_class is None: im_class = seq.get('class', []) rendering = {} rendering['storage_client'] = MegadbUtils.get_storage_client( datasets_table, dataset_name) rendering['blob_path'] = blob_path rendering['bbox'] = im.get('bbox', []) annotated_img_name = 'anno_' + blob_path.replace( '/', args.pathsep_replacement).replace( '\\', args.pathsep_replacement) rendering['annotated_img_name'] = annotated_img_name rendering_info.append(rendering) images_html.append({ 'filename': 'rendered_images/{}'.format(annotated_img_name), 'title': 'Seq ID: {}. Frame number: {}<br/> Image file: {}<br/> number of boxes: {}, image class labels: {}' .format(seq_id, frame_num, blob_path, len(rendering['bbox']), im_class), 'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5' }) if num_images >= args.num_to_visualize: print('num_images visualized is {}'.format(num_images)) break # pool = ThreadPool() render_image_info_partial = partial(render_image_info, args=args) # print('len of rendering_info', len(rendering_info)) # tqdm(pool.imap_unordered(render_image_info_partial, rendering_info), total=len(rendering_info)) for rendering in tqdm(rendering_info): render_image_info_partial(rendering) print('Making HTML...') html_path = os.path.join(args.output_dir, 'index.html') # options = write_html_image_list() # options['headerHtml'] write_html_image_list(filename=html_path, images=images_html)
def visualize_incoming_annotations(args): print('Connecting to MegaDB to get the datasets table...') megadb_utils = MegadbUtils() datasets_table = megadb_utils.get_datasets_table() print('Loading the MegaDB entries...') with open(args.megadb_entries) as f: sequences = json.load(f) print(f'Total number of sequences: {len(sequences)}') dataset_seq_images = defaultdict(dict) for seq in sequences: dataset_seq_images[seq['dataset']][seq['seq_id']] = seq['images'] print('Loading incoming annotation entries...') incoming = IndexedJsonDb(args.incoming_annotation) print( f'Number of images in this annotation file: {len(incoming.image_id_to_image)}' ) if args.num_to_visualize != -1 and args.num_to_visualize <= len( incoming.image_id_to_image): incoming_id_to_anno = sample( list(incoming.image_id_to_annotations.items()), args.num_to_visualize) else: incoming_id_to_anno = incoming.image_id_to_annotations.items() # The file_name field in the incoming json looks like alka_squirrels.seq2020_05_07_25C.frame119221.jpg # we need to use the dataset, sequence and frame info to find the actual path in blob storage # using the sequences images_html = [] for image_id, annotations in tqdm(incoming_id_to_anno): if args.trim_to_images_bboxes_labeled and annotations[0][ 'category_id'] == 5: # category_id 5 is No Object Visible continue anno_file_name = incoming.image_id_to_image[image_id]['file_name'] parts = anno_file_name.split('.') dataset_name = parts[0] seq_id = parts[1].split('seq')[1] frame_num = int(parts[2].split('frame')[1]) im_rel_path = get_image_rel_path(dataset_seq_images, dataset_name, seq_id, frame_num) if im_rel_path is None: print(f'Not found in megadb entries: dataset {dataset_name},' f' seq_id {seq_id}, frame_num {frame_num}') continue im_full_path = megadb_utils.get_full_path(datasets_table, dataset_name, im_rel_path) # download the image container_client = megadb_utils.get_storage_client( datasets_table, dataset_name) downloader = container_client.download_blob(im_full_path) image_file = io.BytesIO() blob_props = downloader.download_to_stream(image_file) image = vis_utils.open_image(image_file) boxes = [anno['bbox'] for anno in annotations] classes = [anno['category_id'] for anno in annotations] vis_utils.render_iMerit_boxes(boxes, classes, image, label_map=incoming.cat_id_to_name) file_name = '{}_gtbbox.jpg'.format( os.path.splitext(anno_file_name)[0].replace('/', '~')) image = vis_utils.resize_image(image, args.output_image_width) image.save(os.path.join(args.output_dir, 'rendered_images', file_name)) images_html.append({ 'filename': '{}/{}'.format('rendered_images', file_name), 'title': '{}, number of boxes: {}'.format( anno_file_name, len([b for b in boxes if len(b) > 0])), 'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5' }) # Write to HTML images_html = sorted(images_html, key=lambda x: x['filename']) write_html_image_list(filename=os.path.join(args.output_dir, 'index.html'), images=images_html, options={ 'headerHtml': '<h1>Sample annotations from {}</h1>'.format( args.incoming_annotation) }) print('Visualized {} images.'.format(len(images_html)))