def sequential_feat_load(feat_cfg, feat_fpath_list): kpts_list = [] desc_list = [] # Debug loading (seems to use lots of memory) print('\n') try: nFeats = len(feat_fpath_list) prog_label = '[fc2] Loading feature: ' mark_progress, end_progress = helpers.progress_func(nFeats, prog_label) for count, feat_path in enumerate(feat_fpath_list): try: npz = np.load(feat_path, mmap_mode=None) except IOError: print('\n') helpers.checkpath(feat_path, verbose=True) print('IOError on feat_path=%r' % feat_path) raise kpts = npz['arr_0'] desc = npz['arr_1'] npz.close() kpts_list.append(kpts) desc_list.append(desc) mark_progress(count) end_progress() print('[fc2] Finished load of individual kpts and desc') except MemoryError: print('\n------------') print('[fc2] Out of memory') print('[fc2] Trying to read: %r' % feat_path) print('[fc2] len(kpts_list) = %d' % len(kpts_list)) print('[fc2] len(desc_list) = %d' % len(desc_list)) raise if feat_cfg.whiten: desc_list = whiten_features(desc_list) return kpts_list, desc_list
def assign_alternate(tblname): path = join(db_dir, tblname) if helpers.checkpath(path, verbose=True): return path path = join(db_dir, '.hs_internals', tblname) if helpers.checkpath(path, verbose=True): return path raise Exception('bad state=%r' % tblname)
def precompute_flann(data, cache_dir=None, uid='', flann_params=None, force_recompute=False): ''' Tries to load a cached flann index before doing anything''' print('[algos] precompute_flann(%r): ' % uid) cache_dir = '.' if cache_dir is None else cache_dir # Generate a unique filename for data and flann parameters fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]') data_uid = helpers.hashstr_arr(data, 'dID') # flann is dependent on the data flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann' # Append any user labels flann_fname = 'flann_index_' + uid + flann_suffix flann_fpath = os.path.normpath(join(cache_dir, flann_fname)) # Load the index if it exists flann = pyflann.FLANN() load_success = False if helpers.checkpath(flann_fpath) and not force_recompute: try: #print('[algos] precompute_flann(): #trying to load: %r ' % flann_fname) flann.load_index(flann_fpath, data) print('[algos]...flann cache hit') load_success = True except Exception as ex: print('[algos] precompute_flann(): ...cannot load index') print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex,)) if not load_success: # Rebuild the index otherwise with helpers.Timer(msg='compute FLANN', newline=False): flann.build_index(data, **flann_params) print('[algos] precompute_flann(): save_index(%r)' % flann_fname) flann.save_index(flann_fpath) return flann
def precompute_flann(data, cache_dir=None, uid='', flann_params=None, force_recompute=False): ''' Tries to load a cached flann index before doing anything''' print('[algos] precompute_flann(%r): ' % uid) cache_dir = '.' if cache_dir is None else cache_dir # Generate a unique filename for data and flann parameters fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]') data_uid = helpers.hashstr_arr(data, 'dID') # flann is dependent on the data flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann' # Append any user labels flann_fname = 'flann_index_' + uid + flann_suffix flann_fpath = os.path.normpath(join(cache_dir, flann_fname)) # Load the index if it exists flann = pyflann.FLANN() load_success = False if helpers.checkpath(flann_fpath) and not force_recompute: try: #print('[algos] precompute_flann(): #trying to load: %r ' % flann_fname) flann.load_index(flann_fpath, data) print('[algos]...flann cache hit') load_success = True except Exception as ex: print('[algos] precompute_flann(): ...cannot load index') print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex, )) if not load_success: # Rebuild the index otherwise with helpers.Timer(msg='compute FLANN', newline=False): flann.build_index(data, **flann_params) print('[algos] precompute_flann(): save_index(%r)' % flann_fname) flann.save_index(flann_fpath) return flann
def detect_checkpath(dir_): return helpers.checkpath(dir_, verbose=VERBOSE_DETERMINE_VERSION)
def convert_from_oxford_style(db_dir): # Get directories for the oxford groundtruth oxford_gt_dpath = join(db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dpath) # Check for corrupted files (Looking at your Paris Buildings Dataset) corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt') corrupted_gname_set = set([]) if helpers.checkpath(corrupted_file_fpath): with open(corrupted_file_fpath) as f: corrupted_gname_list = f.read().splitlines() corrupted_gname_set = set(corrupted_gname_list) # Recursively get relative path of all files in img_dpath print('Loading Oxford Style Images from: ' + db_dir) img_dpath = join(db_dir, 'images') helpers.assertpath(img_dpath) gname_list_ = [ join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '') for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist) ] gname_list = [ gname for gname in iter(gname_list_) if not gname in corrupted_gname_set and helpers.matches_image(gname) ] print(' * num_images = %d ' % len(gname_list)) # Read the Oxford Style Groundtruth files print('Loading Oxford Style Names and Chips') gt_fname_list = os.listdir(oxford_gt_dpath) num_gt_files = len(gt_fname_list) query_chips = [] gname2_chips_raw = collections.defaultdict(list) name_set = set([]) print(' * num_gt_files = %d ' % num_gt_files) sys.stdout.write('parsed: 0000/%4d' % (num_gt_files)) for gtx, gt_fname in enumerate(gt_fname_list): sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files)) if gtx % 10 - 1 == 0: sys.stdout.flush() if gt_fname == 'corrupted_files.txt': continue #Get name, quality, and num from fname (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname) gt_fpath = join(oxford_gt_dpath, gt_fname) name_set.add(name) oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, corrupted_gname_set) if quality == 'query': for (gname, roi) in iter(oxsty_chip_info_sublist): query_chips.append((gname, roi, name, num)) else: for (gname, roi) in iter(oxsty_chip_info_sublist): gname2_chips_raw[gname].append((name, roi, quality)) sys.stdout.write('\n') print(' * num_query images = %d ' % len(query_chips)) # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt) gname2_chips = collections.defaultdict(list) multinamed_gname_list = [] for gname, val in gname2_chips_raw.iteritems(): val_repr = map(repr, val) unique_reprs = set(val_repr) unique_indexes = [val_repr.index(urep) for urep in unique_reprs] for ux in unique_indexes: gname2_chips[gname].append(val[ux]) if len(gname2_chips[gname]) > 1: multinamed_gname_list.append(gname) # print some statistics query_gname_list = [tup[0] for tup in query_chips] gname_with_groundtruth_list = gname2_chips.keys() gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list) print(' * num_images = %d ' % len(gname_list)) print(' * images with groundtruth = %d ' % len(gname_with_groundtruth_list)) print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list)) print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list)) #make sure all queries have ground truth and there are no duplicate queries assert len(query_gname_list) == len( np.intersect1d(query_gname_list, gname_with_groundtruth_list)) assert len(query_gname_list) == len(set(query_gname_list)) # build hotspotter tables print('adding to table: ') gx2_gname = gname_list nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) gx2_gid = range(1, len(gx2_gname) + 1) cx2_cid = [] cx2_theta = [] cx2_quality = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {'oxnum': []} def add_to_hs_tables(gname, name, roi, quality, num=''): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) if nx == 0: nx = 1 gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_quality.append(quality) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(0) prop_dict['oxnum'].append(num) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid for gname, roi, name, num in query_chips: add_to_hs_tables(gname, name, roi, 'query', num) for gname in gname2_chips.keys(): if len(gname2_chips[gname]) == 1: (name, roi, quality) = gname2_chips[gname][0] add_to_hs_tables(gname, name, roi, quality) else: # just take the first name. This is foobar names, rois, qualities = zip(*gname2_chips[gname]) add_to_hs_tables(gname, names[0], rois[0], qualities[0]) for gname in gname_without_groundtruth_list: gpath = join(img_dpath, gname) try: (w, h) = Image.open(gpath).size roi = [0, 0, w, h] add_to_hs_tables(gname, '____', roi, 'unknown') except Exception as ex: print('Exception ex=%r' % ex) print('Not adding gname=%r' % gname) print('----') cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)
def has_v1_gt(path): info_table = join(path, 'animal_info_table.csv') print(info_table) return helpers.checkpath(info_table, verbose=True)
def has_ss_gt(path): ss_data = join(path, 'SightingData.csv') print(ss_data) return helpers.checkpath(ss_data, verbose=True)
def convert_from_oxford_style(db_dir): # Get directories for the oxford groundtruth oxford_gt_dpath = join(db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dpath) # Check for corrupted files (Looking at your Paris Buildings Dataset) corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt') corrupted_gname_set = set([]) if helpers.checkpath(corrupted_file_fpath): with open(corrupted_file_fpath) as f: corrupted_gname_list = f.read().splitlines() corrupted_gname_set = set(corrupted_gname_list) # Recursively get relative path of all files in img_dpath print('Loading Oxford Style Images from: ' + db_dir) img_dpath = join(db_dir, 'images') helpers.assertpath(img_dpath) gname_list_ = [join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '') for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist)] gname_list = [gname for gname in iter(gname_list_) if not gname in corrupted_gname_set and helpers.matches_image(gname)] print(' * num_images = %d ' % len(gname_list)) # Read the Oxford Style Groundtruth files print('Loading Oxford Style Names and Chips') gt_fname_list = os.listdir(oxford_gt_dpath) num_gt_files = len(gt_fname_list) query_chips = [] gname2_chips_raw = collections.defaultdict(list) name_set = set([]) print(' * num_gt_files = %d ' % num_gt_files) sys.stdout.write('parsed: 0000/%4d' % (num_gt_files)) for gtx, gt_fname in enumerate(gt_fname_list): sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files)) if gtx % 10 - 1 == 0: sys.stdout.flush() if gt_fname == 'corrupted_files.txt': continue #Get name, quality, and num from fname (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname) gt_fpath = join(oxford_gt_dpath, gt_fname) name_set.add(name) oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, corrupted_gname_set) if quality == 'query': for (gname, roi) in iter(oxsty_chip_info_sublist): query_chips.append((gname, roi, name, num)) else: for (gname, roi) in iter(oxsty_chip_info_sublist): gname2_chips_raw[gname].append((name, roi, quality)) sys.stdout.write('\n') print(' * num_query images = %d ' % len(query_chips)) # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt) gname2_chips = collections.defaultdict(list) multinamed_gname_list = [] for gname, val in gname2_chips_raw.iteritems(): val_repr = map(repr, val) unique_reprs = set(val_repr) unique_indexes = [val_repr.index(urep) for urep in unique_reprs] for ux in unique_indexes: gname2_chips[gname].append(val[ux]) if len(gname2_chips[gname]) > 1: multinamed_gname_list.append(gname) # print some statistics query_gname_list = [tup[0] for tup in query_chips] gname_with_groundtruth_list = gname2_chips.keys() gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list) print(' * num_images = %d ' % len(gname_list)) print(' * images with groundtruth = %d ' % len(gname_with_groundtruth_list)) print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list)) print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list)) #make sure all queries have ground truth and there are no duplicate queries assert len(query_gname_list) == len(np.intersect1d(query_gname_list, gname_with_groundtruth_list)) assert len(query_gname_list) == len(set(query_gname_list)) # build hotspotter tables print('adding to table: ') gx2_gname = gname_list nx2_name = ['____', '____'] + list(name_set) nx2_nid = [1, 1] + range(2, len(name_set) + 2) gx2_gid = range(1, len(gx2_gname) + 1) cx2_cid = [] cx2_theta = [] cx2_quality = [] cx2_roi = [] cx2_nx = [] cx2_gx = [] prop_dict = {'oxnum': []} def add_to_hs_tables(gname, name, roi, quality, num=''): cid = len(cx2_cid) + 1 nx = nx2_name.index(name) if nx == 0: nx = 1 gx = gx2_gname.index(gname) cx2_cid.append(cid) cx2_roi.append(roi) cx2_quality.append(quality) cx2_nx.append(nx) cx2_gx.append(gx) cx2_theta.append(0) prop_dict['oxnum'].append(num) sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid) return cid for gname, roi, name, num in query_chips: add_to_hs_tables(gname, name, roi, 'query', num) for gname in gname2_chips.keys(): if len(gname2_chips[gname]) == 1: (name, roi, quality) = gname2_chips[gname][0] add_to_hs_tables(gname, name, roi, quality) else: # just take the first name. This is foobar names, rois, qualities = zip(*gname2_chips[gname]) add_to_hs_tables(gname, names[0], rois[0], qualities[0]) for gname in gname_without_groundtruth_list: gpath = join(img_dpath, gname) try: (w, h) = Image.open(gpath).size roi = [0, 0, w, h] add_to_hs_tables(gname, '____', roi, 'unknown') except Exception as ex: print('Exception ex=%r' % ex) print('Not adding gname=%r' % gname) print('----') cx2_nid = np.array(nx2_nid)[cx2_nx] cx2_gid = np.array(gx2_gid)[cx2_gx] # # Write tables internal_dir = join(db_dir, ld2.RDIR_INTERNAL2) helpers.ensurepath(internal_dir) write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict) write_name_table(internal_dir, nx2_nid, nx2_name) write_image_table(internal_dir, gx2_gid, gx2_gname)
def load_csv_tables(db_dir, allow_new_dir=True): ''' Big function which loads the csv tables from a datatabase directory Returns HotspotterDirs and HotspotterTables ''' if 'vdd' in sys.argv: helpers.vd(db_dir) print('\n=============================') print('[ld2] Loading hotspotter csv tables: %r' % db_dir) print('=============================') hs_dirs = ds.HotspotterDirs(db_dir) hs_tables = ds.HotspotterTables() #exec(hs_dirs.execstr('hs_dirs')) #print(hs_dirs.execstr('hs_dirs')) img_dir = hs_dirs.img_dir internal_dir = hs_dirs.internal_dir db_dir = hs_dirs.db_dir # --- Table Names --- chip_table = join(internal_dir, CHIP_TABLE_FNAME) name_table = join(internal_dir, NAME_TABLE_FNAME) image_table = join(internal_dir, IMAGE_TABLE_FNAME) # TODO: Make optional # --- CHECKS --- has_dbdir = helpers.checkpath(db_dir) has_imgdir = helpers.checkpath(img_dir) has_chiptbl = helpers.checkpath(chip_table) has_nametbl = helpers.checkpath(name_table) has_imgtbl = helpers.checkpath(image_table) # ChipTable Header Markers header_numdata = '# NumData ' header_csvformat_re = '# *ChipID,' v12_csvformat_re = r'#[0-9]*\) ' # Default ChipTable Header Variables chip_csv_format = ['ChipID', 'ImgID', 'NameID', 'roi[tl_x tl_y w h]', 'theta'] v12_csv_format = ['instance_id', 'image_id', 'name_id', 'roi'] # TODO DETECT OLD FORMATS HERE db_version = 'current' isCurrentVersion = all([has_dbdir, has_imgdir, has_chiptbl, has_nametbl, has_imgtbl]) print('[ld2] isCurrentVersion=%r' % isCurrentVersion) IS_VERSION_1_OR_2 = False if not isCurrentVersion: helpers.checkpath(db_dir, verbose=True) helpers.checkpath(img_dir, verbose=True) helpers.checkpath(chip_table, verbose=True) helpers.checkpath(name_table, verbose=True) helpers.checkpath(image_table, verbose=True) import db_info def assign_alternate(tblname): path = join(db_dir, tblname) if helpers.checkpath(path, verbose=True): return path path = join(db_dir, '.hs_internals', tblname) if helpers.checkpath(path, verbose=True): return path raise Exception('bad state=%r' % tblname) # if db_info.has_v2_gt(db_dir): IS_VERSION_1_OR_2 = True db_version = 'hotspotter-v2' chip_csv_format = [] header_csvformat_re = v12_csvformat_re chip_table = assign_alternate('instance_table.csv') name_table = assign_alternate('name_table.csv') image_table = assign_alternate('image_table.csv') # elif db_info.has_v1_gt(db_dir): IS_VERSION_1_OR_2 = True db_version = 'hotspotter-v1' chip_csv_format = [] header_csvformat_re = v12_csvformat_re chip_table = assign_alternate('animal_info_table.csv') name_table = assign_alternate('name_table.csv') image_table = assign_alternate('image_table.csv') # elif db_info.has_ss_gt(db_dir): db_version = 'stripespotter' chip_table = join(db_dir, 'SightingData.csv') chip_csv_format = ['imgindex', 'original_filepath', 'roi', 'animal_name'] header_csvformat_re = '#imgindex,' #raise NotImplementedError('stripe spotter conversion') if not helpers.checkpath(chip_table, verbose=True): raise Exception('bad state chip_table=%r' % chip_table) else: try: db_version = 'current' # Well almost chip_table = assign_alternate(CHIP_TABLE_FNAME) name_table = assign_alternate(NAME_TABLE_FNAME) image_table = assign_alternate(IMAGE_TABLE_FNAME) except Exception: if db_info.has_partial_gt(db_dir): print('[ld2] detected incomplete database') raise NotImplementedError('partial database recovery') elif allow_new_dir: print('[ld2] detected new dir') hs_dirs.ensure_dirs() return hs_dirs, hs_tables, 'newdb' else: import traceback print(traceback.format_exc()) print('[ld2] I AM IN A BAD STATE!') errmsg = '' errmsg += ('\n\n!!!!!\n\n') errmsg += (' ! The data tables seem to not be loaded') errmsg += (' Files in internal dir: %r' % internal_dir) for fname in os.listdir(internal_dir): errmsg += (' ! fname') errmsg += ('\n\n!!!!!\n\n') print(errmsg) raise Exception(errmsg) if not helpers.checkpath(chip_table): raise Exception('bad state chip_table=%r' % chip_table) print('[ld2] detected %r' % db_version) hs_dirs.ensure_dirs() print('-------------------------') print('[ld2] Loading database tables: ') cid_lines = [] line_num = 0 csv_line = '' csv_fields = [] # RCOS TODO: We need a more general csv read function # which can handle all of these little corner cases delt with here. try: # ------------------ # --- READ NAMES --- # ------------------ print('[ld2] Loading name table: %r' % name_table) nx2_name = [UNKNOWN_NAME, UNKNOWN_NAME] nid2_nx = {0: 0, 1: 1} name_lines = open(name_table, 'r') for line_num, csv_line in enumerate(name_lines): csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0 or csv_line.find('#') == 0: continue csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')] nid = int(csv_fields[0]) name = csv_fields[1] nid2_nx[nid] = len(nx2_name) nx2_name.append(name) name_lines.close() if VERBOSE_LOAD_DATA: print('[ld2] * Loaded %r names (excluding unknown names)' % (len(nx2_name) - 2)) print('[ld2] * Done loading name table') except IOError as ex: print('IOError: %r' % ex) print('[ld2.name] loading without name table') #raise except Exception as ex: print('[ld2.name] ERROR %r' % ex) #print('[ld2.name] ERROR name_tbl parsing: %s' % (''.join(cid_lines))) print('[ld2.name] ERROR on line number: %r' % (line_num)) print('[ld2.name] ERROR on line: %r' % (csv_line)) print('[ld2.name] ERROR on fields: %r' % (csv_fields)) try: # ------------------- # --- READ IMAGES --- # ------------------- gx2_gname = [] gx2_aif = [] gid2_gx = {} # this is not used. It can probably be removed def add_image(gname, aif, gid): gx = len(gx2_gname) gx2_gname.append(gname) gx2_aif.append(aif) if gid is not None: # this is not used. It can probably be removed gid2_gx[gid] = gx print('[ld2] Loading images') # Load Image Table # <LEGACY CODE> if VERBOSE_LOAD_DATA: print('[ld2] * Loading image table: %r' % image_table) gid_lines = open(image_table, 'r').readlines() for line_num, csv_line in enumerate(gid_lines): csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0 or csv_line.find('#') == 0: continue csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')] gid = int(csv_fields[0]) # You have 3 csv files. Format == gid, gname.ext, aif if len(csv_fields) == 3: gname = csv_fields[1] aif = csv_fields[2].lower() in ['true', '1'] # convert to bool correctly # You have 4 csv fields. Format == gid, gname, ext, aif if len(csv_fields) == 4: gname = '.'.join(csv_fields[1:3]) aif = csv_fields[3].lower() in ['true', '1'] add_image(gname, aif, gid) nTableImgs = len(gx2_gname) fromTableNames = set(gx2_gname) if VERBOSE_LOAD_DATA: print('[ld2] * table specified %r images' % nTableImgs) # </LEGACY CODE> # Load Image Directory print('[ld2] * Loading image directory: %r' % img_dir) nDirImgs = 0 nDirImgsAlready = 0 for fname in os.listdir(img_dir): if len(fname) > 4 and fname[-4:].lower() in ['.jpg', '.png', '.tiff']: if fname in fromTableNames: nDirImgsAlready += 1 continue add_image(fname, False, None) nDirImgs += 1 if VERBOSE_LOAD_DATA: print('[ld2] * dir specified %r images' % nDirImgs) print('[ld2] * %r were already specified in the table' % nDirImgsAlready) print('[ld2] * Loaded %r images' % len(gx2_gname)) print('[ld2] * Done loading images') except IOError: print('IOError: %r' % ex) print('[ld2.img] loading without image table') #raise except Exception as ex: print('[ld2!.img] ERROR %r' % ex) #print('[ld2.img] ERROR image_tbl parsing: %s' % (''.join(cid_lines))) print('[ld2!.img] ERROR on line number: %r' % (line_num)) print('[ld2!.img] ERROR on line: %r' % (csv_line)) print('[ld2!.img] ERROR on fields: %r' % (csv_fields)) raise try: # ------------------ # --- READ CHIPS --- # ------------------ print('[ld2] Loading chip table: %r' % chip_table) # Load Chip Table Header cid_lines = open(chip_table, 'r').readlines() num_data = -1 # Parse Chip Table Header for line_num, csv_line in enumerate(cid_lines): #print('[LINE %4d] %r' % (line_num, csv_line)) csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0: #print('[LINE %4d] BROKEN' % (line_num)) continue csv_line = csv_line.strip('\n') if csv_line.find('#') != 0: #print('[LINE %4d] BROKEN' % (line_num)) break # Break after header if re.search(header_csvformat_re, csv_line) is not None: #print('[LINE %4d] SEARCH' % (line_num)) # Specified Header Variables if IS_VERSION_1_OR_2: #print(csv_line) end_ = csv_line.find('-') if end_ != -1: end_ = end_ - 1 #print('end_=%r' % end_) fieldname = csv_line[5:end_] else: fieldname = csv_line[5:] #print(fieldname) chip_csv_format += [fieldname] else: chip_csv_format = [_.strip() for _ in csv_line.strip('#').split(',')] #print('[ld2] read chip_csv_format: %r' % chip_csv_format) if csv_line.find(header_numdata) == 0: #print('[LINE %4d] NUM_DATA' % (line_num)) num_data = int(csv_line.replace(header_numdata, '')) if IS_VERSION_1_OR_2 and len(chip_csv_format) == 0: chip_csv_format = v12_csv_format if VERBOSE_LOAD_DATA: print('[ld2] * num_chips: %r' % num_data) print('[ld2] * chip_csv_format: %r ' % chip_csv_format) #print('[ld2.chip] Header Columns: %s\n ' % '\n '.join(chip_csv_format)) cid_x = tryindex(chip_csv_format, 'ChipID', 'imgindex', 'instance_id') gid_x = tryindex(chip_csv_format, 'ImgID', 'image_id') nid_x = tryindex(chip_csv_format, 'NameID', 'name_id') roi_x = tryindex(chip_csv_format, 'roi[tl_x tl_y w h]', 'roi') theta_x = tryindex(chip_csv_format, 'theta') # new fields gname_x = tryindex(chip_csv_format, 'Image', 'original_filepath') name_x = tryindex(chip_csv_format, 'Name', 'animal_name') required_x = [cid_x, gid_x, gname_x, nid_x, name_x, roi_x, theta_x] # Hotspotter Chip Tables cx2_cid = [] cx2_nx = [] cx2_gx = [] cx2_roi = [] cx2_theta = [] # x is a csv field index in this context # get csv indexes which are unknown properties prop_x_list = np.setdiff1d(range(len(chip_csv_format)), required_x).tolist() px2_prop_key = [chip_csv_format[x] for x in prop_x_list] prop_dict = {} for prop in iter(px2_prop_key): prop_dict[prop] = [] if VERBOSE_LOAD_DATA: print('[ld2] * num_user_properties: %r' % (len(prop_dict.keys()))) # Parse Chip Table for line_num, csv_line in enumerate(cid_lines): csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0 or csv_line.find('#') == 0: continue csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')] # # Load Chip ID try: cid = int(csv_fields[cid_x]) except ValueError: print('[ld2!] cid_x = %r' % cid_x) print('[ld2!] csv_fields = %r' % csv_fields) print('[ld2!] csv_fields[cid_x] = %r' % csv_fields[cid_x]) print(chip_csv_format) raise # # Load Chip ROI Info if roi_x != -1: roi_str = csv_fields[roi_x].strip('[').strip(']') roi = [int(round(float(_))) for _ in roi_str.split()] # # Load Chip theta Info if theta_x != -1: theta = float(csv_fields[theta_x]) else: theta = 0 # # Load Image ID/X if gid_x != -1: gid = int(csv_fields[gid_x]) gx = gid2_gx[gid] elif gname_x != -1: gname = csv_fields[gname_x] if db_version == 'stripespotter': if not exists(gname): gname = 'img-%07d.jpg' % cid gpath = join(db_dir, 'images', gname) w, h = Image.open(gpath).size roi = [1, 1, w, h] try: gx = gx2_gname.index(gname) except ValueError: gx = len(gx2_gname) gx2_gname.append(gname) # # Load Name ID/X if nid_x != -1: #print('namedbg csv_fields=%r' % csv_fields) #print('namedbg nid_x = %r' % nid_x) nid = int(csv_fields[nid_x]) #print('namedbg %r' % nid) nx = nid2_nx[nid] elif name_x != -1: name = csv_fields[name_x] try: nx = nx2_name.index(name) except ValueError: nx = len(nx2_name) nx2_name.append(name) # Append info to cid lists cx2_cid.append(cid) cx2_gx.append(gx) cx2_nx.append(nx) cx2_roi.append(roi) cx2_theta.append(theta) for px, x in enumerate(prop_x_list): prop = px2_prop_key[px] prop_val = csv_fields[x] prop_dict[prop].append(prop_val) except Exception as ex: print('[chip.ld2] ERROR %r' % ex) #print('[chip.ld2] ERROR parsing: %s' % (''.join(cid_lines))) print('[chip.ld2] ERROR reading header: %r' % (line_num)) print('[chip.ld2] ERROR on line number: %r' % (line_num)) print('[chip.ld2] ERROR on line: %r' % (csv_line)) print('[chip.ld2] ERROR on fields: %r' % (csv_fields)) raise if VERBOSE_LOAD_DATA: print('[ld2] * Loaded: %r chips' % (len(cx2_cid))) print('[ld2] * Done loading chip table') # Return all information from load_tables #hs_tables.gid2_gx = gid2_gx #hs_tables.nid2_nx = nid2_nx hs_tables.init(gx2_gname, gx2_aif, nx2_name, cx2_cid, cx2_nx, cx2_gx, cx2_roi, cx2_theta, prop_dict) print('[ld2] Done Loading hotspotter csv tables: %r' % (db_dir)) if 'vcd' in sys.argv: helpers.vd(hs_dirs.computed_dir) return hs_dirs, hs_tables, db_version
def load_csv_tables(db_dir, allow_new_dir=True): ''' Big function which loads the csv tables from a datatabase directory Returns HotspotterDirs and HotspotterTables ''' if 'vdd' in sys.argv: helpers.vd(db_dir) print('\n=============================') print('[ld2] Loading hotspotter csv tables: %r' % db_dir) print('=============================') hs_dirs = ds.HotspotterDirs(db_dir) hs_tables = ds.HotspotterTables() #exec(hs_dirs.execstr('hs_dirs')) #print(hs_dirs.execstr('hs_dirs')) img_dir = hs_dirs.img_dir internal_dir = hs_dirs.internal_dir db_dir = hs_dirs.db_dir # --- Table Names --- chip_table = join(internal_dir, CHIP_TABLE_FNAME) name_table = join(internal_dir, NAME_TABLE_FNAME) image_table = join(internal_dir, IMAGE_TABLE_FNAME) # TODO: Make optional # --- CHECKS --- has_dbdir = helpers.checkpath(db_dir) has_imgdir = helpers.checkpath(img_dir) has_chiptbl = helpers.checkpath(chip_table) has_nametbl = helpers.checkpath(name_table) has_imgtbl = helpers.checkpath(image_table) # ChipTable Header Markers header_numdata = '# NumData ' header_csvformat_re = '# *ChipID,' v12_csvformat_re = r'#[0-9]*\) ' # Default ChipTable Header Variables chip_csv_format = [ 'ChipID', 'ImgID', 'NameID', 'roi[tl_x tl_y w h]', 'theta' ] v12_csv_format = ['instance_id', 'image_id', 'name_id', 'roi'] # TODO DETECT OLD FORMATS HERE db_version = 'current' isCurrentVersion = all( [has_dbdir, has_imgdir, has_chiptbl, has_nametbl, has_imgtbl]) print('[ld2] isCurrentVersion=%r' % isCurrentVersion) IS_VERSION_1_OR_2 = False if not isCurrentVersion: helpers.checkpath(db_dir, verbose=True) helpers.checkpath(img_dir, verbose=True) helpers.checkpath(chip_table, verbose=True) helpers.checkpath(name_table, verbose=True) helpers.checkpath(image_table, verbose=True) import db_info def assign_alternate(tblname): path = join(db_dir, tblname) if helpers.checkpath(path, verbose=True): return path path = join(db_dir, '.hs_internals', tblname) if helpers.checkpath(path, verbose=True): return path raise Exception('bad state=%r' % tblname) # if db_info.has_v2_gt(db_dir): IS_VERSION_1_OR_2 = True db_version = 'hotspotter-v2' chip_csv_format = [] header_csvformat_re = v12_csvformat_re chip_table = assign_alternate('instance_table.csv') name_table = assign_alternate('name_table.csv') image_table = assign_alternate('image_table.csv') # elif db_info.has_v1_gt(db_dir): IS_VERSION_1_OR_2 = True db_version = 'hotspotter-v1' chip_csv_format = [] header_csvformat_re = v12_csvformat_re chip_table = assign_alternate('animal_info_table.csv') name_table = assign_alternate('name_table.csv') image_table = assign_alternate('image_table.csv') # elif db_info.has_ss_gt(db_dir): db_version = 'stripespotter' chip_table = join(db_dir, 'SightingData.csv') chip_csv_format = [ 'imgindex', 'original_filepath', 'roi', 'animal_name' ] header_csvformat_re = '#imgindex,' #raise NotImplementedError('stripe spotter conversion') if not helpers.checkpath(chip_table, verbose=True): raise Exception('bad state chip_table=%r' % chip_table) else: try: db_version = 'current' # Well almost chip_table = assign_alternate(CHIP_TABLE_FNAME) name_table = assign_alternate(NAME_TABLE_FNAME) image_table = assign_alternate(IMAGE_TABLE_FNAME) except Exception: if db_info.has_partial_gt(db_dir): print('[ld2] detected incomplete database') raise NotImplementedError('partial database recovery') elif allow_new_dir: print('[ld2] detected new dir') hs_dirs.ensure_dirs() return hs_dirs, hs_tables, 'newdb' else: import traceback print(traceback.format_exc()) print('[ld2] I AM IN A BAD STATE!') errmsg = '' errmsg += ('\n\n!!!!!\n\n') errmsg += (' ! The data tables seem to not be loaded') errmsg += (' Files in internal dir: %r' % internal_dir) for fname in os.listdir(internal_dir): errmsg += (' ! fname') errmsg += ('\n\n!!!!!\n\n') print(errmsg) raise Exception(errmsg) if not helpers.checkpath(chip_table): raise Exception('bad state chip_table=%r' % chip_table) print('[ld2] detected %r' % db_version) hs_dirs.ensure_dirs() print('-------------------------') print('[ld2] Loading database tables: ') cid_lines = [] line_num = 0 csv_line = '' csv_fields = [] # RCOS TODO: We need a more general csv read function # which can handle all of these little corner cases delt with here. try: # ------------------ # --- READ NAMES --- # ------------------ print('[ld2] Loading name table: %r' % name_table) nx2_name = [UNKNOWN_NAME, UNKNOWN_NAME] nid2_nx = {0: 0, 1: 1} name_lines = open(name_table, 'r') for line_num, csv_line in enumerate(name_lines): csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0 or csv_line.find('#') == 0: continue csv_fields = [ _.strip(' ') for _ in csv_line.strip('\n\r ').split(',') ] nid = int(csv_fields[0]) name = csv_fields[1] nid2_nx[nid] = len(nx2_name) nx2_name.append(name) name_lines.close() if VERBOSE_LOAD_DATA: print('[ld2] * Loaded %r names (excluding unknown names)' % (len(nx2_name) - 2)) print('[ld2] * Done loading name table') except IOError as ex: print('IOError: %r' % ex) print('[ld2.name] loading without name table') #raise except Exception as ex: print('[ld2.name] ERROR %r' % ex) #print('[ld2.name] ERROR name_tbl parsing: %s' % (''.join(cid_lines))) print('[ld2.name] ERROR on line number: %r' % (line_num)) print('[ld2.name] ERROR on line: %r' % (csv_line)) print('[ld2.name] ERROR on fields: %r' % (csv_fields)) try: # ------------------- # --- READ IMAGES --- # ------------------- gx2_gname = [] gx2_aif = [] gid2_gx = {} # this is not used. It can probably be removed def add_image(gname, aif, gid): gx = len(gx2_gname) gx2_gname.append(gname) gx2_aif.append(aif) if gid is not None: # this is not used. It can probably be removed gid2_gx[gid] = gx print('[ld2] Loading images') # Load Image Table # <LEGACY CODE> if VERBOSE_LOAD_DATA: print('[ld2] * Loading image table: %r' % image_table) gid_lines = open(image_table, 'r').readlines() for line_num, csv_line in enumerate(gid_lines): csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0 or csv_line.find('#') == 0: continue csv_fields = [ _.strip(' ') for _ in csv_line.strip('\n\r ').split(',') ] gid = int(csv_fields[0]) # You have 3 csv files. Format == gid, gname.ext, aif if len(csv_fields) == 3: gname = csv_fields[1] aif = csv_fields[2].lower() in ['true', '1' ] # convert to bool correctly # You have 4 csv fields. Format == gid, gname, ext, aif if len(csv_fields) == 4: gname = '.'.join(csv_fields[1:3]) aif = csv_fields[3].lower() in ['true', '1'] add_image(gname, aif, gid) nTableImgs = len(gx2_gname) fromTableNames = set(gx2_gname) if VERBOSE_LOAD_DATA: print('[ld2] * table specified %r images' % nTableImgs) # </LEGACY CODE> # Load Image Directory print('[ld2] * Loading image directory: %r' % img_dir) nDirImgs = 0 nDirImgsAlready = 0 for fname in os.listdir(img_dir): if len(fname) > 4 and fname[-4:].lower() in [ '.jpg', '.png', '.tiff' ]: if fname in fromTableNames: nDirImgsAlready += 1 continue add_image(fname, False, None) nDirImgs += 1 if VERBOSE_LOAD_DATA: print('[ld2] * dir specified %r images' % nDirImgs) print('[ld2] * %r were already specified in the table' % nDirImgsAlready) print('[ld2] * Loaded %r images' % len(gx2_gname)) print('[ld2] * Done loading images') except IOError: print('IOError: %r' % ex) print('[ld2.img] loading without image table') #raise except Exception as ex: print('[ld2!.img] ERROR %r' % ex) #print('[ld2.img] ERROR image_tbl parsing: %s' % (''.join(cid_lines))) print('[ld2!.img] ERROR on line number: %r' % (line_num)) print('[ld2!.img] ERROR on line: %r' % (csv_line)) print('[ld2!.img] ERROR on fields: %r' % (csv_fields)) raise try: # ------------------ # --- READ CHIPS --- # ------------------ print('[ld2] Loading chip table: %r' % chip_table) # Load Chip Table Header cid_lines = open(chip_table, 'r').readlines() num_data = -1 # Parse Chip Table Header for line_num, csv_line in enumerate(cid_lines): #print('[LINE %4d] %r' % (line_num, csv_line)) csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0: #print('[LINE %4d] BROKEN' % (line_num)) continue csv_line = csv_line.strip('\n') if csv_line.find('#') != 0: #print('[LINE %4d] BROKEN' % (line_num)) break # Break after header if re.search(header_csvformat_re, csv_line) is not None: #print('[LINE %4d] SEARCH' % (line_num)) # Specified Header Variables if IS_VERSION_1_OR_2: #print(csv_line) end_ = csv_line.find('-') if end_ != -1: end_ = end_ - 1 #print('end_=%r' % end_) fieldname = csv_line[5:end_] else: fieldname = csv_line[5:] #print(fieldname) chip_csv_format += [fieldname] else: chip_csv_format = [ _.strip() for _ in csv_line.strip('#').split(',') ] #print('[ld2] read chip_csv_format: %r' % chip_csv_format) if csv_line.find(header_numdata) == 0: #print('[LINE %4d] NUM_DATA' % (line_num)) num_data = int(csv_line.replace(header_numdata, '')) if IS_VERSION_1_OR_2 and len(chip_csv_format) == 0: chip_csv_format = v12_csv_format if VERBOSE_LOAD_DATA: print('[ld2] * num_chips: %r' % num_data) print('[ld2] * chip_csv_format: %r ' % chip_csv_format) #print('[ld2.chip] Header Columns: %s\n ' % '\n '.join(chip_csv_format)) cid_x = tryindex(chip_csv_format, 'ChipID', 'imgindex', 'instance_id') gid_x = tryindex(chip_csv_format, 'ImgID', 'image_id') nid_x = tryindex(chip_csv_format, 'NameID', 'name_id') roi_x = tryindex(chip_csv_format, 'roi[tl_x tl_y w h]', 'roi') theta_x = tryindex(chip_csv_format, 'theta') # new fields gname_x = tryindex(chip_csv_format, 'Image', 'original_filepath') name_x = tryindex(chip_csv_format, 'Name', 'animal_name') required_x = [cid_x, gid_x, gname_x, nid_x, name_x, roi_x, theta_x] # Hotspotter Chip Tables cx2_cid = [] cx2_nx = [] cx2_gx = [] cx2_roi = [] cx2_theta = [] # x is a csv field index in this context # get csv indexes which are unknown properties prop_x_list = np.setdiff1d(range(len(chip_csv_format)), required_x).tolist() px2_prop_key = [chip_csv_format[x] for x in prop_x_list] prop_dict = {} for prop in iter(px2_prop_key): prop_dict[prop] = [] if VERBOSE_LOAD_DATA: print('[ld2] * num_user_properties: %r' % (len(prop_dict.keys()))) # Parse Chip Table for line_num, csv_line in enumerate(cid_lines): csv_line = csv_line.strip('\n\r\t ') if len(csv_line) == 0 or csv_line.find('#') == 0: continue csv_fields = [ _.strip(' ') for _ in csv_line.strip('\n\r ').split(',') ] # # Load Chip ID try: cid = int(csv_fields[cid_x]) except ValueError: print('[ld2!] cid_x = %r' % cid_x) print('[ld2!] csv_fields = %r' % csv_fields) print('[ld2!] csv_fields[cid_x] = %r' % csv_fields[cid_x]) print(chip_csv_format) raise # # Load Chip ROI Info if roi_x != -1: roi_str = csv_fields[roi_x].strip('[').strip(']') roi = [int(round(float(_))) for _ in roi_str.split()] # # Load Chip theta Info if theta_x != -1: theta = float(csv_fields[theta_x]) else: theta = 0 # # Load Image ID/X if gid_x != -1: gid = int(csv_fields[gid_x]) gx = gid2_gx[gid] elif gname_x != -1: gname = csv_fields[gname_x] if db_version == 'stripespotter': if not exists(gname): gname = 'img-%07d.jpg' % cid gpath = join(db_dir, 'images', gname) w, h = Image.open(gpath).size roi = [1, 1, w, h] try: gx = gx2_gname.index(gname) except ValueError: gx = len(gx2_gname) gx2_gname.append(gname) # # Load Name ID/X if nid_x != -1: #print('namedbg csv_fields=%r' % csv_fields) #print('namedbg nid_x = %r' % nid_x) nid = int(csv_fields[nid_x]) #print('namedbg %r' % nid) nx = nid2_nx[nid] elif name_x != -1: name = csv_fields[name_x] try: nx = nx2_name.index(name) except ValueError: nx = len(nx2_name) nx2_name.append(name) # Append info to cid lists cx2_cid.append(cid) cx2_gx.append(gx) cx2_nx.append(nx) cx2_roi.append(roi) cx2_theta.append(theta) for px, x in enumerate(prop_x_list): prop = px2_prop_key[px] prop_val = csv_fields[x] prop_dict[prop].append(prop_val) except Exception as ex: print('[chip.ld2] ERROR %r' % ex) #print('[chip.ld2] ERROR parsing: %s' % (''.join(cid_lines))) print('[chip.ld2] ERROR reading header: %r' % (line_num)) print('[chip.ld2] ERROR on line number: %r' % (line_num)) print('[chip.ld2] ERROR on line: %r' % (csv_line)) print('[chip.ld2] ERROR on fields: %r' % (csv_fields)) raise if VERBOSE_LOAD_DATA: print('[ld2] * Loaded: %r chips' % (len(cx2_cid))) print('[ld2] * Done loading chip table') # Return all information from load_tables #hs_tables.gid2_gx = gid2_gx #hs_tables.nid2_nx = nid2_nx hs_tables.init(gx2_gname, gx2_aif, nx2_name, cx2_cid, cx2_nx, cx2_gx, cx2_roi, cx2_theta, prop_dict) print('[ld2] Done Loading hotspotter csv tables: %r' % (db_dir)) if 'vcd' in sys.argv: helpers.vd(hs_dirs.computed_dir) return hs_dirs, hs_tables, db_version
def load_chips(hs, cx_list=None, **kwargs): print('\n=============================') print('[cc2] Precomputing chips and loading chip paths: %r' % hs.get_db_name()) print('=============================') #---------------- # COMPUTE SETUP #---------------- chip_cfg = hs.prefs.chip_cfg chip_uid = chip_cfg.get_uid() if hs.cpaths.chip_uid != '' and hs.cpaths.chip_uid != chip_uid: print('[cc2] Disagreement: OLD_chip_uid = %r' % hs.cpaths.chip_uid) print('[cc2] Disagreement: NEW_chip_uid = %r' % chip_uid) print('[cc2] Unloading all chip information') hs.unload_all() print('[cc2] chip_uid = %r' % chip_uid) # Get the list of chips paths to load cx_list = hs.get_valid_cxs() if cx_list is None else cx_list if not np.iterable(cx_list): cx_list = [cx_list] if len(cx_list) == 0: return # HACK cx_list = np.array(cx_list) # HACK hs.cpaths.chip_uid = chip_uid #print('[cc2] Requested %d chips' % (len(cx_list))) #print('[cc2] cx_list = %r' % (cx_list,)) # Get table information try: gx_list = hs.tables.cx2_gx[cx_list] cid_list = hs.tables.cx2_cid[cx_list] theta_list = hs.tables.cx2_theta[cx_list] roi_list = hs.tables.cx2_roi[cx_list] #gname_list = hs.tables.gx2_gname[gx_list] except IndexError as ex: print(repr(ex)) print(hs.tables) print('cx_list=%r' % (cx_list,)) raise # Get ChipConfig Parameters sqrt_area = chip_cfg['chip_sqrt_area'] filter_list = [] if chip_cfg['adapteq']: filter_list.append(adapteq_fn) if chip_cfg['histeq']: filter_list.append(histeq_fn) if chip_cfg['region_norm']: filter_list.append(region_norm_fn) #if chip_cfg['maxcontrast']: #filter_list.append(maxcontr_fn) #if chip_cfg['rank_eq']: #filter_list.append(rankeq_fn) #if chip_cfg['local_eq']: #filter_list.append(localeq_fn) if chip_cfg['grabcut']: filter_list.append(grabcut_fn) #--------------------------- # ___Normalized Chip Args___ #--------------------------- # Full Image Paths: where to extract the chips from gfpath_list = hs.gx2_gname(gx_list, full=True) #img_dir = hs.dirs.img_dir #gfpath_list = [join(img_dir, gname) for gname in iter(gname_list)] # Chip Paths: where to write extracted chips to _cfname_fmt = 'cid%d' + chip_uid + '.png' _cfpath_fmt = join(hs.dirs.chip_dir, _cfname_fmt) cfpath_list = [_cfpath_fmt % cid for cid in iter(cid_list)] # Normalized Chip Sizes: ensure chips have about sqrt_area squared pixels chipsz_list = compute_uniform_area_chip_sizes(roi_list, sqrt_area) #-------------------------- # EXTRACT AND RESIZE CHIPS #-------------------------- pcc_kwargs = { 'arg_list': [gfpath_list, cfpath_list, roi_list, theta_list, chipsz_list], 'lazy': not hs.args.nocache_chips, 'num_procs': hs.args.num_procs, 'common_args': [filter_list] } # Compute all chips with paramatarized filters parallel_compute(compute_chip, **pcc_kwargs) # Read sizes try: rsize_list = [(None, None) if path is None else Image.open(path).size for path in iter(cfpath_list)] except IOError as ex: import gc gc.collect() print('[cc] ex=%r' % ex) print('path=%r' % path) if helpers.checkpath(path, verbose=True): import time time.sleep(1) # delays for 1 seconds print('[cc] file exists but cause IOError?') print('[cc] probably corrupted. Removing it') try: helpers.remove_file(path) except OSError: print('Something bad happened') raise raise #---------------------- # UPDATE API VARIABLES #---------------------- print('[cc2] Done Precomputing chips and loading chip paths') # Extend the datastructure if needed list_size = max(cx_list) + 1 #helpers.ensure_list_size(hs.cpaths.cx2_chip_path, list_size) helpers.ensure_list_size(hs.cpaths.cx2_rchip_path, list_size) helpers.ensure_list_size(hs.cpaths.cx2_rchip_size, list_size) # Copy the values into the ChipPaths object #for lx, cx in enumerate(cx_list): #hs.cpaths.cx2_chip_path[cx] = cfpath_list[lx] for lx, cx in enumerate(cx_list): hs.cpaths.cx2_rchip_path[cx] = cfpath_list[lx] for lx, cx in enumerate(cx_list): hs.cpaths.cx2_rchip_size[cx] = rsize_list[lx] #hs.load_cx2_rchip_size() # TODO: Loading rchip size should be handled more robustly print('[cc2]=============================')
def load_chips(hs, cx_list=None, **kwargs): print('\n=============================') print('[cc2] Precomputing chips and loading chip paths: %r' % hs.get_db_name()) print('=============================') #---------------- # COMPUTE SETUP #---------------- chip_cfg = hs.prefs.chip_cfg chip_uid = chip_cfg.get_uid() if hs.cpaths.chip_uid != '' and hs.cpaths.chip_uid != chip_uid: print('[cc2] Disagreement: OLD_chip_uid = %r' % hs.cpaths.chip_uid) print('[cc2] Disagreement: NEW_chip_uid = %r' % chip_uid) print('[cc2] Unloading all chip information') hs.unload_all() print('[cc2] chip_uid = %r' % chip_uid) # Get the list of chips paths to load cx_list = hs.get_valid_cxs() if cx_list is None else cx_list if not np.iterable(cx_list): cx_list = [cx_list] if len(cx_list) == 0: return # HACK cx_list = np.array(cx_list) # HACK hs.cpaths.chip_uid = chip_uid #print('[cc2] Requested %d chips' % (len(cx_list))) #print('[cc2] cx_list = %r' % (cx_list,)) # Get table information try: gx_list = hs.tables.cx2_gx[cx_list] cid_list = hs.tables.cx2_cid[cx_list] theta_list = hs.tables.cx2_theta[cx_list] roi_list = hs.tables.cx2_roi[cx_list] #gname_list = hs.tables.gx2_gname[gx_list] except IndexError as ex: print(repr(ex)) print(hs.tables) print('cx_list=%r' % (cx_list, )) raise # Get ChipConfig Parameters sqrt_area = chip_cfg['chip_sqrt_area'] filter_list = [] if chip_cfg['adapteq']: filter_list.append(adapteq_fn) if chip_cfg['histeq']: filter_list.append(histeq_fn) if chip_cfg['region_norm']: filter_list.append(region_norm_fn) #if chip_cfg['maxcontrast']: #filter_list.append(maxcontr_fn) #if chip_cfg['rank_eq']: #filter_list.append(rankeq_fn) #if chip_cfg['local_eq']: #filter_list.append(localeq_fn) if chip_cfg['grabcut']: filter_list.append(grabcut_fn) #--------------------------- # ___Normalized Chip Args___ #--------------------------- # Full Image Paths: where to extract the chips from gfpath_list = hs.gx2_gname(gx_list, full=True) #img_dir = hs.dirs.img_dir #gfpath_list = [join(img_dir, gname) for gname in iter(gname_list)] # Chip Paths: where to write extracted chips to _cfname_fmt = 'cid%d' + chip_uid + '.png' _cfpath_fmt = join(hs.dirs.chip_dir, _cfname_fmt) cfpath_list = [_cfpath_fmt % cid for cid in iter(cid_list)] # Normalized Chip Sizes: ensure chips have about sqrt_area squared pixels chipsz_list = compute_uniform_area_chip_sizes(roi_list, sqrt_area) #-------------------------- # EXTRACT AND RESIZE CHIPS #-------------------------- pcc_kwargs = { 'arg_list': [gfpath_list, cfpath_list, roi_list, theta_list, chipsz_list], 'lazy': not hs.args.nocache_chips, 'num_procs': hs.args.num_procs, 'common_args': [filter_list] } # Compute all chips with paramatarized filters parallel_compute(compute_chip, **pcc_kwargs) # Read sizes try: rsize_list = [(None, None) if path is None else Image.open(path).size for path in iter(cfpath_list)] except IOError as ex: import gc gc.collect() print('[cc] ex=%r' % ex) print('path=%r' % path) if helpers.checkpath(path, verbose=True): import time time.sleep(1) # delays for 1 seconds print('[cc] file exists but cause IOError?') print('[cc] probably corrupted. Removing it') try: helpers.remove_file(path) except OSError: print('Something bad happened') raise raise #---------------------- # UPDATE API VARIABLES #---------------------- print('[cc2] Done Precomputing chips and loading chip paths') # Extend the datastructure if needed list_size = max(cx_list) + 1 #helpers.ensure_list_size(hs.cpaths.cx2_chip_path, list_size) helpers.ensure_list_size(hs.cpaths.cx2_rchip_path, list_size) helpers.ensure_list_size(hs.cpaths.cx2_rchip_size, list_size) # Copy the values into the ChipPaths object #for lx, cx in enumerate(cx_list): #hs.cpaths.cx2_chip_path[cx] = cfpath_list[lx] for lx, cx in enumerate(cx_list): hs.cpaths.cx2_rchip_path[cx] = cfpath_list[lx] for lx, cx in enumerate(cx_list): hs.cpaths.cx2_rchip_size[cx] = rsize_list[lx] #hs.load_cx2_rchip_size() # TODO: Loading rchip size should be handled more robustly print('[cc2]=============================')