def is_imgdir(path): if not isdir(path): return False img_dir = path + '/images' if exists(img_dir): return True files = os.listdir(path) num_files = 0 num_imgs = 0 num_dirs = 0 for name in files: subpath = join(path, name) if helpers.matches_image(subpath): num_imgs += 1 return True elif isdir(subpath): num_dirs += 1 elif isfile(subpath): num_files += 1 return False
def convert_from_oxford_style(db_dir): # Get directories for the oxford groundtruth oxford_gt_dpath = join(db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dpath) # Check for corrupted files (Looking at your Paris Buildings Dataset) corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt') corrupted_gname_set = set([]) if helpers.checkpath(corrupted_file_fpath): with open(corrupted_file_fpath) as f: corrupted_gname_list = f.read().splitlines() corrupted_gname_set = set(corrupted_gname_list) # Recursively get relative path of all files in img_dpath print('Loading Oxford Style Images from: ' + db_dir) img_dpath = join(db_dir, 'images') helpers.assertpath(img_dpath) gname_list_ = [ join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '') for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist) ] gname_list = [ gname for gname in iter(gname_list_) if not gname in corrupted_gname_set and helpers.matches_image(gname) ] print(' * num_images = %d ' % len(gname_list)) # Read the Oxford Style Groundtruth files print('Loading Oxford Style Names and Chips') gt_fname_list = os.listdir(oxford_gt_dpath) num_gt_files = len(gt_fname_list) query_chips = [] gname2_chips_raw = collections.defaultdict(list) name_set = set([]) print(' * num_gt_files = %d ' % num_gt_files) sys.stdout.write('parsed: 0000/%4d' % (num_gt_files)) for gtx, gt_fname in enumerate(gt_fname_list): sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files)) if gtx % 10 - 1 == 0: sys.stdout.flush() if gt_fname == 'corrupted_files.txt': continue #Get name, quality, and num from fname (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname) gt_fpath = join(oxford_gt_dpath, gt_fname) name_set.add(name) oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, corrupted_gname_set) if quality == 'query': for (gname, roi) in iter(oxsty_chip_info_sublist): query_chips.append((gname, roi, name, num)) else: for (gname, roi) in iter(oxsty_chip_info_sublist): gname2_chips_raw[gname].append((name, roi, quality)) sys.stdout.write('\n') print(' * num_query images = %d ' % len(query_chips)) # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt) gname2_chips = collections.defaultdict(list) multinamed_gname_list = [] for gname, val in gname2_chips_raw.iteritems(): val_repr = map(repr, val) unique_reprs = set(val_repr) unique_indexes = [val_repr.index(urep) for urep in unique_reprs] for ux in unique_indexes: gname2_chips[gname].append(val[ux]) if len(gname2_chips[gname]) > 1: multinamed_gname_list.append(gname) # print some statistics query_gname_list = [tup[0] for tup in query_chips] gname_with_groundtruth_list = gname2_chips.keys() gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list) print(' * num_images = %d ' % len(gname_list)) print(' * images with groundtruth = %d ' % len(gname_with_groundtruth_list)) print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list)) print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list)) #make sure all queries have ground truth and there are no duplicate queries assert len(query_gname_list) == len( np.intersect1d(query_gname_list, gname_with_groundtruth_list)) assert len(query_gname_list) == len(set(query_gname_list)) # build hotspotter tables print('adding to table: ') gx2_gname = gname_list nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) gx2_gid = range(1, len(gx2_gname) + 1) cx2_cid = [] cx2_theta = [] cx2_quality = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {'oxnum': []} def add_to_hs_tables(gname, name, roi, quality, num=''): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) if nx == 0: nx = 1 gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_quality.append(quality) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(0) prop_dict['oxnum'].append(num) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid for gname, roi, name, num in query_chips: add_to_hs_tables(gname, name, roi, 'query', num) for gname in gname2_chips.keys(): if len(gname2_chips[gname]) == 1: (name, roi, quality) = gname2_chips[gname][0] add_to_hs_tables(gname, name, roi, quality) else: # just take the first name. This is foobar names, rois, qualities = zip(*gname2_chips[gname]) add_to_hs_tables(gname, names[0], rois[0], qualities[0]) for gname in gname_without_groundtruth_list: gpath = join(img_dpath, gname) try: (w, h) = Image.open(gpath).size roi = [0, 0, w, h] add_to_hs_tables(gname, '____', roi, 'unknown') except Exception as ex: print('Exception ex=%r' % ex) print('Not adding gname=%r' % gname) print('----') cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)
def convert_from_oxford_style(db_dir): # Get directories for the oxford groundtruth oxford_gt_dpath = join(db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dpath) # Check for corrupted files (Looking at your Paris Buildings Dataset) corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt') corrupted_gname_set = set([]) if helpers.checkpath(corrupted_file_fpath): with open(corrupted_file_fpath) as f: corrupted_gname_list = f.read().splitlines() corrupted_gname_set = set(corrupted_gname_list) # Recursively get relative path of all files in img_dpath print('Loading Oxford Style Images from: ' + db_dir) img_dpath = join(db_dir, 'images') helpers.assertpath(img_dpath) gname_list_ = [join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '') for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist)] gname_list = [gname for gname in iter(gname_list_) if not gname in corrupted_gname_set and helpers.matches_image(gname)] print(' * num_images = %d ' % len(gname_list)) # Read the Oxford Style Groundtruth files print('Loading Oxford Style Names and Chips') gt_fname_list = os.listdir(oxford_gt_dpath) num_gt_files = len(gt_fname_list) query_chips = [] gname2_chips_raw = collections.defaultdict(list) name_set = set([]) print(' * num_gt_files = %d ' % num_gt_files) sys.stdout.write('parsed: 0000/%4d' % (num_gt_files)) for gtx, gt_fname in enumerate(gt_fname_list): sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files)) if gtx % 10 - 1 == 0: sys.stdout.flush() if gt_fname == 'corrupted_files.txt': continue #Get name, quality, and num from fname (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname) gt_fpath = join(oxford_gt_dpath, gt_fname) name_set.add(name) oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, corrupted_gname_set) if quality == 'query': for (gname, roi) in iter(oxsty_chip_info_sublist): query_chips.append((gname, roi, name, num)) else: for (gname, roi) in iter(oxsty_chip_info_sublist): gname2_chips_raw[gname].append((name, roi, quality)) sys.stdout.write('\n') print(' * num_query images = %d ' % len(query_chips)) # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt) gname2_chips = collections.defaultdict(list) multinamed_gname_list = [] for gname, val in gname2_chips_raw.iteritems(): val_repr = map(repr, val) unique_reprs = set(val_repr) unique_indexes = [val_repr.index(urep) for urep in unique_reprs] for ux in unique_indexes: gname2_chips[gname].append(val[ux]) if len(gname2_chips[gname]) > 1: multinamed_gname_list.append(gname) # print some statistics query_gname_list = [tup[0] for tup in query_chips] gname_with_groundtruth_list = gname2_chips.keys() gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list) print(' * num_images = %d ' % len(gname_list)) print(' * images with groundtruth = %d ' % len(gname_with_groundtruth_list)) print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list)) print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list)) #make sure all queries have ground truth and there are no duplicate queries assert len(query_gname_list) == len(np.intersect1d(query_gname_list, gname_with_groundtruth_list)) assert len(query_gname_list) == len(set(query_gname_list)) # build hotspotter tables print('adding to table: ') gx2_gname = gname_list nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) gx2_gid = range(1, len(gx2_gname) + 1) cx2_cid = [] cx2_theta = [] cx2_quality = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {'oxnum': []} def add_to_hs_tables(gname, name, roi, quality, num=''): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) if nx == 0: nx = 1 gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_quality.append(quality) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(0) prop_dict['oxnum'].append(num) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid for gname, roi, name, num in query_chips: add_to_hs_tables(gname, name, roi, 'query', num) for gname in gname2_chips.keys(): if len(gname2_chips[gname]) == 1: (name, roi, quality) = gname2_chips[gname][0] add_to_hs_tables(gname, name, roi, quality) else: # just take the first name. This is foobar names, rois, qualities = zip(*gname2_chips[gname]) add_to_hs_tables(gname, names[0], rois[0], qualities[0]) for gname in gname_without_groundtruth_list: gpath = join(img_dpath, gname) try: (w, h) = Image.open(gpath).size roi = [0, 0, w, h] add_to_hs_tables(gname, '____', roi, 'unknown') except Exception as ex: print('Exception ex=%r' % ex) print('Not adding gname=%r' % gname) print('----') cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)