def new_database(back): # File -> New Database new_db = back.user_input('Enter the new database name') msg_put = 'Where should I put %r?' % new_db opt_put = ['Choose Directory', 'My Work Dir'] reply = back.user_option(msg_put, 'options', opt_put, True) if reply == opt_put[1]: put_dir = back.get_work_directory() elif reply == opt_put[0]: msg = 'Select where to put the new database' put_dir = guitools.select_directory(msg) elif reply is None: back.user_info('No Reply. Aborting new database') print('[*back] abort new database()') return None else: raise Exception('Unknown reply=%r' % reply) new_db_dir = join(put_dir, new_db) # Check the put directory exists and the new database does not exist msg_try = None if not exists(put_dir): msg_try = 'Directory %r does not exist.' % put_dir elif exists(new_db_dir): msg_try = 'New Database %r already exists.' % new_db_dir if msg_try is not None: opt_try = ['Try Again'] title_try = 'New Database Failed' try_again = back.user_option(msg_try, title_try, opt_try, False) if try_again == 'Try Again': return back.new_database() print('[*back] valid new_db_dir = %r' % new_db_dir) helpers.ensurepath(new_db_dir) back.open_database(new_db_dir)
def convert_named_chips(db_dir, img_dpath=None): print('\n --- Convert Named Chips ---') # --- Initialize --- gt_format = '{}_{:d}.jpg' print('gt_format (name, num) = %r' % gt_format) if img_dpath is None: img_dpath = db_dir + '/images' print('Converting db_dir=%r and img_dpath=%r' % (db_dir, img_dpath)) # --- Build Image Table --- helpers.print_('Building name table: ') gx2_gname = helpers.list_images(img_dpath) gx2_gid = range(1, len(gx2_gname) + 1) print('There are %d images' % len(gx2_gname)) # ---- Build Name Table --- helpers.print_('Building name table: ') name_set = set([]) for gx, gname in enumerate(gx2_gname): name, num = parse.parse(gt_format, gname) name_set.add(name) nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) print('There are %d names' % (len(nx2_name) - 2)) # ---- Build Chip Table --- print('[converdb] Building chip table: ') cx2_cid = [] cx2_theta = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] cid = 1 def add_to_hs_tables(gname, name, roi, theta=0): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(theta) return cid for gx, gname in enumerate(gx2_gname): name, num = parse.parse(gt_format, gname) img_fpath = join(img_dpath, gname) (w, h) = Image.open(img_fpath).size roi = [1, 1, w, h] cid = add_to_hs_tables(gname, name, roi) cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] print('There are %d chips' % (cid - 1)) # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)
def init_database_from_images(db_dir, img_dpath=None, gt_format=None, allow_unknown_chips=False): # --- Initialize --- if img_dpath is None: img_dpath = db_dir + '/images' print('Converting db_dir=%r and img_dpath=%r' % (db_dir, img_dpath)) gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath) name_set = groundtruth_from_imagenames(gx2_gname, gt_format) nx2_name, nx2_nid = nametables_from_nameset(name_set) # ---- Build Chip Table --- helpers.print_('Building chip table: ') cx2_cid = [] cx2_theta = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] cid = 1 def add_to_hs_tables(gname, name, roi, theta=0): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(theta) return cid for gx, gname in enumerate(gx2_gname): if gt_format is None: name = '____' else: name, num = parse.parse(gt_format, gname) if name == '____' and not allow_unknown_chips: continue img_fpath = join(img_dpath, gname) roi = roi_from_imgsize(img_fpath) if not roi is None: cid = add_to_hs_tables(gname, name, roi) cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] print('There are %d chips' % (cid - 1)) # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname) return True
def save_if_requested(hs, subdir): if not hs.args.save_figures: return #print('[viz] Dumping Image') fpath = hs.dirs.result_dir if not subdir is None: subdir = helpers.sanatize_fname2(subdir) fpath = join(fpath, subdir) helpers.ensurepath(fpath) df2.save_figure(fpath=fpath, usetitle=True) df2.reset()
def add_images(hs, fpath_list, move_images=True): nImages = len(fpath_list) print('[hs.add_imgs] adding %d images' % nImages) img_dir = hs.dirs.img_dir copy_list = [] helpers.ensurepath(img_dir) if move_images: # Build lists of where the new images will be fpath_list2 = [ join(img_dir, split(fpath)[1]) for fpath in fpath_list ] copy_iter = izip(fpath_list, fpath_list2) copy_list = [(src, dst) for src, dst in copy_iter if not exists(dst)] nExist = len(fpath_list2) - len(copy_list) print('[hs] copying %d images' % len(copy_list)) print('[hs] %d images already exist' % nExist) # RCOS TODO: Copying like this should be a helper function. # It appears in multiple places # Also there should be the option of parallelization? IDK, these are # disk writes, but it still might help. mark_progress, end_progress = helpers.progress_func( len(copy_list), lbl='Copying Image') for count, (src, dst) in enumerate(copy_list): shutil.copy(src, dst) mark_progress(count) end_progress() else: print('[hs.add_imgs] using original image paths') fpath_list2 = fpath_list # Get location of the new images relative to the image dir gx2_gname = hs.tables.gx2_gname.tolist() gx2_aif = hs.tables.gx2_aif.tolist() relpath_list = [relpath(fpath, img_dir) for fpath in fpath_list2] current_gname_set = set(gx2_gname) # Check to make sure the gnames are not currently indexed new_gnames = [ gname for gname in relpath_list if not gname in current_gname_set ] new_aifs = [False] * len(new_gnames) nNewImages = len(new_gnames) nIndexed = nImages - nNewImages print('[hs.add_imgs] new_gnames:\n' + '\n'.join(new_gnames)) print('[hs.add_imgs] %d images already indexed.' % nIndexed) print('[hs.add_imgs] Added %d new images.' % nIndexed) # Append the new gnames to the hotspotter table hs.tables.gx2_gname = np.array(gx2_gname + new_gnames) hs.tables.gx2_aif = np.array(gx2_aif + new_aifs) hs.update_samples() return nNewImages
def dump(hs, subdir=None, quality=False, overwrite=False): if quality is True: df2.FIGSIZE = df2.golden_wh2(12) df2.DPI = 120 df2.FONTS.figtitle = df2.FONTS.small if quality is False: df2.FIGSIZE = df2.golden_wh2(8) df2.DPI = 90 df2.FONTS.figtitle = df2.FONTS.smaller #print('[viz] Dumping Image') fpath = hs.dirs.result_dir if subdir is not None: fpath = join(fpath, subdir) helpers.ensurepath(fpath) df2.save_figure(fpath=fpath, usetitle=True, overwrite=overwrite) df2.reset()
def __dump_text_report(allres, report_type): if not 'report_type' in vars(): report_type = 'rankres_str' print('[rr2] Dumping textfile: ' + report_type) report_str = allres.__dict__[report_type] # Get directories result_dir = allres.hs.dirs.result_dir timestamp_dir = join(result_dir, 'timestamped_results') helpers.ensurepath(timestamp_dir) helpers.ensurepath(result_dir) # Write to timestamp and result dir timestamp = helpers.get_timestamp() csv_timestamp_fname = report_type + allres.title_suffix + timestamp + '.csv' csv_timestamp_fpath = join(timestamp_dir, csv_timestamp_fname) csv_fname = report_type + allres.title_suffix + '.csv' csv_fpath = join(result_dir, csv_fname) helpers.write_to(csv_fpath, report_str) helpers.write_to(csv_timestamp_fpath, report_str)
def add_images(hs, fpath_list, move_images=True): nImages = len(fpath_list) print('[hs.add_imgs] adding %d images' % nImages) img_dir = hs.dirs.img_dir copy_list = [] helpers.ensurepath(img_dir) if move_images: # Build lists of where the new images will be fpath_list2 = [join(img_dir, split(fpath)[1]) for fpath in fpath_list] copy_iter = izip(fpath_list, fpath_list2) copy_list = [(src, dst) for src, dst in copy_iter if not exists(dst)] nExist = len(fpath_list2) - len(copy_list) print('[hs] copying %d images' % len(copy_list)) print('[hs] %d images already exist' % nExist) # RCOS TODO: Copying like this should be a helper function. # It appears in multiple places # Also there should be the option of parallelization? IDK, these are # disk writes, but it still might help. mark_progress, end_progress = helpers.progress_func(len(copy_list), lbl='Copying Image') for count, (src, dst) in enumerate(copy_list): shutil.copy(src, dst) mark_progress(count) end_progress() else: print('[hs.add_imgs] using original image paths') fpath_list2 = fpath_list # Get location of the new images relative to the image dir gx2_gname = hs.tables.gx2_gname.tolist() gx2_aif = hs.tables.gx2_aif.tolist() relpath_list = [relpath(fpath, img_dir) for fpath in fpath_list2] current_gname_set = set(gx2_gname) # Check to make sure the gnames are not currently indexed new_gnames = [gname for gname in relpath_list if not gname in current_gname_set] new_aifs = [False] * len(new_gnames) nNewImages = len(new_gnames) nIndexed = nImages - nNewImages print('[hs.add_imgs] new_gnames:\n' + '\n'.join(new_gnames)) print('[hs.add_imgs] %d images already indexed.' % nIndexed) print('[hs.add_imgs] Added %d new images.' % nIndexed) # Append the new gnames to the hotspotter table hs.tables.gx2_gname = np.array(gx2_gname + new_gnames) hs.tables.gx2_aif = np.array(gx2_aif + new_aifs) hs.update_samples() return nNewImages
return roi #%% # ============================================================================= # Initialization - dataset # ============================================================================= db_dir = join(dpath, new_db) #------ function:[hsgui]-[guitools]- def select_directory caption = 'Select Directory' print('Selected Directory: %r' % dpath) io.global_cache_write('select_directory', split(dpath)[0]) print('[*back] valid new_db_dir = %r' % db_dir) io.global_cache_write('db_dir', db_dir) helpers.ensurepath(db_dir) if Flag_new_db & 1: if exists(db_dir): shutil.rmtree(db_dir) defaultdb = None preload = False args = parse_arguments(defaultdb, defaultdb == 'cache') # --- Build HotSpotter API --- hs = open_database(db_dir) #%% # ============================================================================= # Initialization - images
def wildid_to_tables(db_dir, img_dpath, column_labels, column_list): row_lengths = [len(col) for col in column_list] num_rows = row_lengths[0] assert all([num_rows == rowlen for rowlen in row_lengths ]), 'number of rows in xlsx file must be consistent' #header = 'Converted from: '+repr(xlsx_fpath) #csv_string = ld2.make_csv_table(column_labels, column_list, header) # Get Image set print('[convert] Building image table') gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath) # Get name set print('[convert] Building name table') def get_lbl_pos(column_labels, valid_labels): for lbl in valid_labels: index = helpers.listfind(column_labels, lbl) if index is not None: return index raise Exception('There is no valid label') name_colx = get_lbl_pos(column_labels, ['ANIMAL_ID', 'AnimalID']) name_set = set(column_list[name_colx]) nx2_name, nx2_nid = nametables_from_nameset(name_set) # Get chip set print('[convert] build chip table') # --------- # This format has multiple images per row chips_per_name = 2 # this is apparently always 2 def get_multiprop_colx_list(prefix): colx_list = [] for num in xrange(chips_per_name): lbl = prefix + str(num + 1) colx = get_lbl_pos(column_labels, [lbl]) colx_list.append(colx) return colx_list # --------- # Essential properties #prop2_colx_list = {} try: image_colx_list = get_multiprop_colx_list('IMAGE_') except Exception: image_colx_list = get_multiprop_colx_list('Image') # --------- # Nonessential multi-properties try_multiprops = ['DATE_NO'] multiprop2_colx = {} for key in try_multiprops: try: multiprop2_colx[key] = get_multiprop_colx_list(key) except Exception: pass # --------- # Nonessential single-properties try_props = ['SEX'] prop2_colx = {} for key in try_props: try: other_colx = get_lbl_pos(column_labels, [key]) prop2_colx[key] = other_colx except Exception: pass # --------- # Nonessential pairwise-properties try_match_props = ['matches', 'WildID_score'] pairprop2_colx = {} for key in try_match_props: try: other_colx = get_lbl_pos(column_labels, [key]) pairprop2_colx[key] = other_colx except Exception: pass # --------- # Build tables cx2_cid = [] cx2_theta = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {} pairwise_dict = {} gnameroi_to_cid = {} for key in prop2_colx.keys(): prop_dict[key] = [] for key in multiprop2_colx.keys(): prop_dict[key] = [] cid = 1 def wildid_add_to_hs_tables(gname, name, roi, theta=0, **kwargs): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_nx.append(nx) cx2_gx.append(gx) for key, val in kwargs.iteritems(): prop_dict[key].append(val) cx2_theta.append(theta) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid # --------- # Wildid parsing bad_rows = 0 for rowx in xrange(num_rows): name = column_list[name_colx][rowx] tbl_kwargs2 = { key: column_list[val][rowx] for key, val in prop2_colx.iteritems() } pairwise_vals = [ column_list[colx][rowx] for colx in pairprop2_colx.values() ] cid_tup = [] for num in xrange( chips_per_name): # TODO: This is always just pairwise img_colx = image_colx_list[num] gname = column_list[img_colx][rowx] tbl_kwargs1 = { key: column_list[val[num]][rowx] for key, val in multiprop2_colx.iteritems() } tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items()) roi = roi_from_imgsize(join(img_dpath, gname), silent=True) if roi is None: img_fpath = join(img_dpath, gname) bad_rows += 1 if not exists(img_fpath): print('nonexistant image: %r' % gname) else: print('corrupted image: %r' % gname) continue gnameroi = (gname, tuple(roi)) if gnameroi in gnameroi_to_cid.keys(): cid = gnameroi_to_cid[gnameroi] cid_tup.append(cid) continue cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs) gnameroi_to_cid[gnameroi] = cid cid_tup.append(cid) pairwise_dict[tuple(cid_tup)] = pairwise_vals print('bad_rows = %r ' % bad_rows) print('num_rows = %r ' % num_rows) print('chips_per_name = %r ' % chips_per_name) print('cid = %r ' % cid) print('num pairwise properties: %r' % len(pairwise_dict)) print('implementation of pairwise properties does not yet exist') num_known_chips = len(cx2_cid) print('[convert] Added %r known chips.' % num_known_chips) # Add the rest of the nongroundtruthed chips print('[convert] Adding unknown images to table') # Check that images were unique unique_gx = np.unique(np.array(cx2_gx)) print('len(cx2_gx)=%r' % len(cx2_gx)) print('len(unique_gx)=%r' % len(unique_gx)) assert len(cx2_gx) == len(unique_gx), \ 'There are images specified twice' # Check that cids were unique cx2_cid_arr = np.array(cx2_cid) valid_cids = cx2_cid_arr[np.where(cx2_cid_arr > 0)[0]] unique_cids = np.unique(valid_cids) print('len(cx2_cid) = %r' % len(cx2_cid)) print('len(valid_cids) = %r' % len(valid_cids)) print('len(unique_cids) = %r' % len(unique_cids)) assert len(valid_cids) == len(unique_cids), \ 'There are chipids specified twice' known_gx_set = set(cx2_gx) for gx, gname in enumerate(gx2_gname): if gx in known_gx_set: continue name = '____' roi = roi_from_imgsize(join(img_dpath, gname), silent=False) tbl_kwargs1 = {key: 'NA' for key, val in multiprop2_colx.iteritems()} tbl_kwargs2 = {key: 'NA' for key, val in prop2_colx.iteritems()} tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items()) if not roi is None: cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs) num_unknown_chips = len(cx2_cid) - num_known_chips print('[convert] Added %r more unknown chips.' % num_unknown_chips) cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] print('[convert] There are %d chips' % (cid - 1)) # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname) print('[convert] finished conversion')
def convert_from_oxford_style(db_dir): # Get directories for the oxford groundtruth oxford_gt_dpath = join(db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dpath) # Check for corrupted files (Looking at your Paris Buildings Dataset) corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt') corrupted_gname_set = set([]) if helpers.checkpath(corrupted_file_fpath): with open(corrupted_file_fpath) as f: corrupted_gname_list = f.read().splitlines() corrupted_gname_set = set(corrupted_gname_list) # Recursively get relative path of all files in img_dpath print('Loading Oxford Style Images from: ' + db_dir) img_dpath = join(db_dir, 'images') helpers.assertpath(img_dpath) gname_list_ = [join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '') for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist)] gname_list = [gname for gname in iter(gname_list_) if not gname in corrupted_gname_set and helpers.matches_image(gname)] print(' * num_images = %d ' % len(gname_list)) # Read the Oxford Style Groundtruth files print('Loading Oxford Style Names and Chips') gt_fname_list = os.listdir(oxford_gt_dpath) num_gt_files = len(gt_fname_list) query_chips = [] gname2_chips_raw = collections.defaultdict(list) name_set = set([]) print(' * num_gt_files = %d ' % num_gt_files) sys.stdout.write('parsed: 0000/%4d' % (num_gt_files)) for gtx, gt_fname in enumerate(gt_fname_list): sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files)) if gtx % 10 - 1 == 0: sys.stdout.flush() if gt_fname == 'corrupted_files.txt': continue #Get name, quality, and num from fname (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname) gt_fpath = join(oxford_gt_dpath, gt_fname) name_set.add(name) oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, corrupted_gname_set) if quality == 'query': for (gname, roi) in iter(oxsty_chip_info_sublist): query_chips.append((gname, roi, name, num)) else: for (gname, roi) in iter(oxsty_chip_info_sublist): gname2_chips_raw[gname].append((name, roi, quality)) sys.stdout.write('\n') print(' * num_query images = %d ' % len(query_chips)) # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt) gname2_chips = collections.defaultdict(list) multinamed_gname_list = [] for gname, val in gname2_chips_raw.iteritems(): val_repr = map(repr, val) unique_reprs = set(val_repr) unique_indexes = [val_repr.index(urep) for urep in unique_reprs] for ux in unique_indexes: gname2_chips[gname].append(val[ux]) if len(gname2_chips[gname]) > 1: multinamed_gname_list.append(gname) # print some statistics query_gname_list = [tup[0] for tup in query_chips] gname_with_groundtruth_list = gname2_chips.keys() gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list) print(' * num_images = %d ' % len(gname_list)) print(' * images with groundtruth = %d ' % len(gname_with_groundtruth_list)) print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list)) print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list)) #make sure all queries have ground truth and there are no duplicate queries assert len(query_gname_list) == len(np.intersect1d(query_gname_list, gname_with_groundtruth_list)) assert len(query_gname_list) == len(set(query_gname_list)) # build hotspotter tables print('adding to table: ') gx2_gname = gname_list nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) gx2_gid = range(1, len(gx2_gname) + 1) cx2_cid = [] cx2_theta = [] cx2_quality = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {'oxnum': []} def add_to_hs_tables(gname, name, roi, quality, num=''): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) if nx == 0: nx = 1 gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_quality.append(quality) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(0) prop_dict['oxnum'].append(num) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid for gname, roi, name, num in query_chips: add_to_hs_tables(gname, name, roi, 'query', num) for gname in gname2_chips.keys(): if len(gname2_chips[gname]) == 1: (name, roi, quality) = gname2_chips[gname][0] add_to_hs_tables(gname, name, roi, quality) else: # just take the first name. This is foobar names, rois, qualities = zip(*gname2_chips[gname]) add_to_hs_tables(gname, names[0], rois[0], qualities[0]) for gname in gname_without_groundtruth_list: gpath = join(img_dpath, gname) try: (w, h) = Image.open(gpath).size roi = [0, 0, w, h] add_to_hs_tables(gname, '____', roi, 'unknown') except Exception as ex: print('Exception ex=%r' % ex) print('Not adding gname=%r' % gname) print('----') cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)
def convert_from_oxford_style(db_dir): # Get directories for the oxford groundtruth oxford_gt_dpath = join(db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dpath) # Check for corrupted files (Looking at your Paris Buildings Dataset) corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt') corrupted_gname_set = set([]) if helpers.checkpath(corrupted_file_fpath): with open(corrupted_file_fpath) as f: corrupted_gname_list = f.read().splitlines() corrupted_gname_set = set(corrupted_gname_list) # Recursively get relative path of all files in img_dpath print('Loading Oxford Style Images from: ' + db_dir) img_dpath = join(db_dir, 'images') helpers.assertpath(img_dpath) gname_list_ = [ join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '') for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist) ] gname_list = [ gname for gname in iter(gname_list_) if not gname in corrupted_gname_set and helpers.matches_image(gname) ] print(' * num_images = %d ' % len(gname_list)) # Read the Oxford Style Groundtruth files print('Loading Oxford Style Names and Chips') gt_fname_list = os.listdir(oxford_gt_dpath) num_gt_files = len(gt_fname_list) query_chips = [] gname2_chips_raw = collections.defaultdict(list) name_set = set([]) print(' * num_gt_files = %d ' % num_gt_files) sys.stdout.write('parsed: 0000/%4d' % (num_gt_files)) for gtx, gt_fname in enumerate(gt_fname_list): sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files)) if gtx % 10 - 1 == 0: sys.stdout.flush() if gt_fname == 'corrupted_files.txt': continue #Get name, quality, and num from fname (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname) gt_fpath = join(oxford_gt_dpath, gt_fname) name_set.add(name) oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, corrupted_gname_set) if quality == 'query': for (gname, roi) in iter(oxsty_chip_info_sublist): query_chips.append((gname, roi, name, num)) else: for (gname, roi) in iter(oxsty_chip_info_sublist): gname2_chips_raw[gname].append((name, roi, quality)) sys.stdout.write('\n') print(' * num_query images = %d ' % len(query_chips)) # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt) gname2_chips = collections.defaultdict(list) multinamed_gname_list = [] for gname, val in gname2_chips_raw.iteritems(): val_repr = map(repr, val) unique_reprs = set(val_repr) unique_indexes = [val_repr.index(urep) for urep in unique_reprs] for ux in unique_indexes: gname2_chips[gname].append(val[ux]) if len(gname2_chips[gname]) > 1: multinamed_gname_list.append(gname) # print some statistics query_gname_list = [tup[0] for tup in query_chips] gname_with_groundtruth_list = gname2_chips.keys() gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list) print(' * num_images = %d ' % len(gname_list)) print(' * images with groundtruth = %d ' % len(gname_with_groundtruth_list)) print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list)) print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list)) #make sure all queries have ground truth and there are no duplicate queries assert len(query_gname_list) == len( np.intersect1d(query_gname_list, gname_with_groundtruth_list)) assert len(query_gname_list) == len(set(query_gname_list)) # build hotspotter tables print('adding to table: ') gx2_gname = gname_list nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) gx2_gid = range(1, len(gx2_gname) + 1) cx2_cid = [] cx2_theta = [] cx2_quality = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {'oxnum': []} def add_to_hs_tables(gname, name, roi, quality, num=''): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) if nx == 0: nx = 1 gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_quality.append(quality) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(0) prop_dict['oxnum'].append(num) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid for gname, roi, name, num in query_chips: add_to_hs_tables(gname, name, roi, 'query', num) for gname in gname2_chips.keys(): if len(gname2_chips[gname]) == 1: (name, roi, quality) = gname2_chips[gname][0] add_to_hs_tables(gname, name, roi, quality) else: # just take the first name. This is foobar names, rois, qualities = zip(*gname2_chips[gname]) add_to_hs_tables(gname, names[0], rois[0], qualities[0]) for gname in gname_without_groundtruth_list: gpath = join(img_dpath, gname) try: (w, h) = Image.open(gpath).size roi = [0, 0, w, h] add_to_hs_tables(gname, '____', roi, 'unknown') except Exception as ex: print('Exception ex=%r' % ex) print('Not adding gname=%r' % gname) print('----') cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)
def wildid_to_tables(db_dir, img_dpath, column_labels, column_list): row_lengths = [len(col) for col in column_list] num_rows = row_lengths[0] assert all([num_rows == rowlen for rowlen in row_lengths]), 'number of rows in xlsx file must be consistent' #header = 'Converted from: '+repr(xlsx_fpath) #csv_string = ld2.make_csv_table(column_labels, column_list, header) # Get Image set print('[convert] Building image table') gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath) # Get name set print('[convert] Building name table') def get_lbl_pos(column_labels, valid_labels): for lbl in valid_labels: index = helpers.listfind(column_labels, lbl) if index is not None: return index raise Exception('There is no valid label') name_colx = get_lbl_pos(column_labels, ['ANIMAL_ID', 'AnimalID']) name_set = set(column_list[name_colx]) nx2_name, nx2_nid = nametables_from_nameset(name_set) # Get chip set print('[convert] build chip table') # --------- # This format has multiple images per row chips_per_name = 2 # this is apparently always 2 def get_multiprop_colx_list(prefix): colx_list = [] for num in xrange(chips_per_name): lbl = prefix + str(num + 1) colx = get_lbl_pos(column_labels, [lbl]) colx_list.append(colx) return colx_list # --------- # Essential properties #prop2_colx_list = {} try: image_colx_list = get_multiprop_colx_list('IMAGE_') except Exception: image_colx_list = get_multiprop_colx_list('Image') # --------- # Nonessential multi-properties try_multiprops = ['DATE_NO'] multiprop2_colx = {} for key in try_multiprops: try: multiprop2_colx[key] = get_multiprop_colx_list(key) except Exception: pass # --------- # Nonessential single-properties try_props = ['SEX'] prop2_colx = {} for key in try_props: try: other_colx = get_lbl_pos(column_labels, [key]) prop2_colx[key] = other_colx except Exception: pass # --------- # Nonessential pairwise-properties try_match_props = ['matches', 'WildID_score'] pairprop2_colx = {} for key in try_match_props: try: other_colx = get_lbl_pos(column_labels, [key]) pairprop2_colx[key] = other_colx except Exception: pass # --------- # Build tables cx2_cid = [] cx2_theta = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {} pairwise_dict = {} gnameroi_to_cid = {} for key in prop2_colx.keys(): prop_dict[key] = [] for key in multiprop2_colx.keys(): prop_dict[key] = [] cid = 1 def wildid_add_to_hs_tables(gname, name, roi, theta=0, **kwargs): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_nx.append(nx) cx2_gx.append(gx) for key, val in kwargs.iteritems(): prop_dict[key].append(val) cx2_theta.append(theta) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid # --------- # Wildid parsing bad_rows = 0 for rowx in xrange(num_rows): name = column_list[name_colx][rowx] tbl_kwargs2 = {key: column_list[val][rowx] for key, val in prop2_colx.iteritems()} pairwise_vals = [column_list[colx][rowx] for colx in pairprop2_colx.values()] cid_tup = [] for num in xrange(chips_per_name): # TODO: This is always just pairwise img_colx = image_colx_list[num] gname = column_list[img_colx][rowx] tbl_kwargs1 = {key: column_list[val[num]][rowx] for key, val in multiprop2_colx.iteritems()} tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items()) roi = roi_from_imgsize(join(img_dpath, gname), silent=True) if roi is None: img_fpath = join(img_dpath, gname) bad_rows += 1 if not exists(img_fpath): print('nonexistant image: %r' % gname) else: print('corrupted image: %r' % gname) continue gnameroi = (gname, tuple(roi)) if gnameroi in gnameroi_to_cid.keys(): cid = gnameroi_to_cid[gnameroi] cid_tup.append(cid) continue cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs) gnameroi_to_cid[gnameroi] = cid cid_tup.append(cid) pairwise_dict[tuple(cid_tup)] = pairwise_vals print('bad_rows = %r ' % bad_rows) print('num_rows = %r ' % num_rows) print('chips_per_name = %r ' % chips_per_name) print('cid = %r ' % cid) print('num pairwise properties: %r' % len(pairwise_dict)) print('implementation of pairwise properties does not yet exist') num_known_chips = len(cx2_cid) print('[convert] Added %r known chips.' % num_known_chips) # Add the rest of the nongroundtruthed chips print('[convert] Adding unknown images to table') # Check that images were unique unique_gx = np.unique(np.array(cx2_gx)) print('len(cx2_gx)=%r' % len(cx2_gx)) print('len(unique_gx)=%r' % len(unique_gx)) assert len(cx2_gx) == len(unique_gx), \ 'There are images specified twice' # Check that cids were unique cx2_cid_arr = np.array(cx2_cid) valid_cids = cx2_cid_arr[np.where(cx2_cid_arr > 0)[0]] unique_cids = np.unique(valid_cids) print('len(cx2_cid) = %r' % len(cx2_cid)) print('len(valid_cids) = %r' % len(valid_cids)) print('len(unique_cids) = %r' % len(unique_cids)) assert len(valid_cids) == len(unique_cids), \ 'There are chipids specified twice' known_gx_set = set(cx2_gx) for gx, gname in enumerate(gx2_gname): if gx in known_gx_set: continue name = '____' roi = roi_from_imgsize(join(img_dpath, gname), silent=False) tbl_kwargs1 = {key: 'NA' for key, val in multiprop2_colx.iteritems()} tbl_kwargs2 = {key: 'NA' for key, val in prop2_colx.iteritems()} tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items()) if not roi is None: cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs) num_unknown_chips = len(cx2_cid) - num_known_chips print('[convert] Added %r more unknown chips.' % num_unknown_chips) cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] print('[convert] There are %d chips' % (cid - 1)) # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname) print('[convert] finished conversion')