def get_annotmatch_rowid_from_undirected_superkey(ibs, aids1, aids2): # The directed nature of this makes a few things difficult and may cause # odd behavior am_rowids = ibs.get_annotmatch_rowid_from_superkey(aids1, aids2) idxs = ut.where([r is None for r in am_rowids]) # Check which ones are None aids1_ = ut.take(aids1, idxs) aids2_ = ut.take(aids2, idxs) am_rowids_ = ibs.get_annotmatch_rowid_from_superkey(aids2_, aids1_) # Use the other rowid if found for idx, rowid in zip(idxs, am_rowids_): am_rowids[idx] = rowid return am_rowids
def expand_input(inputs, index, inplace=False): """ Pushes the rootmost inputs all the way up to the sources of the graph CommandLine: python -m dtool.input_helpers expand_input Example: >>> # ENABLE_DOCTEST >>> from dtool.input_helpers import * # NOQA >>> from dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc4() >>> inputs = depc['smk_match'].rootmost_inputs >>> inputs = depc['neighbs'].rootmost_inputs >>> print('(pre-expand) inputs = %r' % (inputs,)) >>> index = 'indexer' >>> inputs2 = inputs.expand_input(index) >>> print('(post-expand) inputs2 = %r' % (inputs2,)) >>> assert 'indexer' in str(inputs), 'missing indexer1' >>> assert 'indexer' not in str(inputs2), ( >>> '(2) unexpected indexer in %s' % (inputs2,)) """ if isinstance(index, six.string_types): index_list = ut.where( [rmi.tablename == index for rmi in inputs.rmi_list]) if len(index_list) == 0: index = 0 else: index = index_list[0] rmi = inputs.rmi_list[index] parent_level = rmi.parent_level() if len(parent_level) == 0: #raise AssertionError('no parents to expand') new_rmi_list = inputs.rmi_list[:] else: new_rmi_list = ut.insert_values(inputs.rmi_list, index, parent_level, inplace) new_rmi_list = ut.unique(new_rmi_list) if inplace: inputs.rmi_list = new_rmi_list new_inputs = inputs else: new_inputs = TableInput(new_rmi_list, inputs.exi_graph, inputs.table) return new_inputs
def purge_ensure_one_annot_per_images(ibs): """ pip install Pipe """ # Purge all but one annotation images = ibs.images() # images.aids groups = images._annot_groups import numpy as np # Take all but the largest annotations per images large_masks = [ ut.index_to_boolmask([np.argmax(x)], len(x)) for x in groups.bbox_area ] small_masks = ut.lmap(ut.not_list, large_masks) # Remove all but the largets annotation small_aids = ut.zipcompress(groups.aid, small_masks) small_aids = ut.flatten(small_aids) # Fix any empty images images = ibs.images() empty_images = ut.where(np.array(images.num_annotations) == 0) logger.info('empty_images = %r' % (empty_images, )) # list(map(basename, map(dirname, images.uris_original))) def VecPipe(func): import pipe @pipe.Pipe def wrapped(sequence): return map(func, sequence) # return (None if item is None else func(item) for item in sequence) return wrapped name_list = list(images.uris_original | VecPipe(dirname) | VecPipe(basename)) aids_list = images.aids ut.assert_all_eq(list(aids_list | VecPipe(len))) annots = ibs.annots(ut.flatten(aids_list)) annots.names = name_list
def add_annotmatch_undirected(ibs, aids1, aids2, **kwargs): if len(aids1) == 0 and len(aids2) == 0: return [] edges = list(zip(aids1, aids2)) from ibeis.algo.graph import nx_utils as nxu # Enforce new undirected constraint edges = ut.estarmap(nxu.e_, edges) aids1, aids2 = list(zip(*edges)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(aids1, aids2) idxs = ut.where([r is None for r in am_rowids]) # Check which ones are None aids1_ = ut.take(aids1, idxs) aids2_ = ut.take(aids2, idxs) # Create anything that is None am_rowids_ = ibs.add_annotmatch(aids2_, aids1_) # Use the other rowid if found for idx, rowid in zip(idxs, am_rowids_): am_rowids[idx] = rowid return am_rowids
def compress(self, flags): idxs = ut.where(flags) return self.take(idxs)
def fuzzy_find_colxs(self, pat): import utool as ut colxs = ut.where( ut.filterflags_general_tags(self.header_tags, in_any=[pat])) return colxs
def get_col(table, tbl_rowids, colnames=None): """ colnames = ('mask', 'size') FIXME; unpacking is confusing with sql controller """ # print('Get prop of %r, colnames=%r' % (table, colnames)) try: request_unpack = False if colnames is None: colnames = table.data_colnames #table._internal_data_colnames else: if isinstance(colnames, six.text_type): request_unpack = True colnames = (colnames,) # print('* colnames = %r' % (colnames,)) eager = True nInput = None total = 0 intern_colnames = [] extern_resolve_colxs = [] nesting_xs = [] for c in colnames: if c in table.external_to_internal: intern_colnames.append([table.external_to_internal[c]]) read_func = table.extern_read_funcs[c] extern_resolve_colxs.append((total, read_func)) nesting_xs.append(total) total += 1 elif c in table.nested_to_flat: nest = table.nested_to_flat[c] nesting_xs.append(list(range(total, total + len(nest)))) intern_colnames.append(nest) total += len(nest) else: nesting_xs.append(total) intern_colnames.append([c]) total += 1 flat_intern_colnames = tuple(ut.flatten(intern_colnames)) # do sql read # FIXME: understand unpack_scalars and keepwrap raw_prop_list = table.get_internal_columns( tbl_rowids, flat_intern_colnames, eager, nInput, unpack_scalars=True, keepwrap=True) # unpack_scalars=not # request_unpack) # print('depth(raw_prop_list) = %r' % (ut.depth_profile(raw_prop_list),)) prop_listT = list(zip(*raw_prop_list)) for extern_colx, read_func in extern_resolve_colxs: data_list = [] for uri in prop_listT[extern_colx]: try: # FIXME: only do this for a localpath uri1 = ut.unixjoin(table.depc.cache_dpath, uri) data = read_func(uri1) except Exception as ex: ut.printex(ex, 'failed to load external data', iswarning=False) raise # FIXME #data = None data_list.append(data) prop_listT[extern_colx] = data_list nested_proplistT = ut.list_unflat_take(prop_listT, nesting_xs) for tx in ut.where([isinstance(xs, list) for xs in nesting_xs]): nested_proplistT[tx] = list(zip(*nested_proplistT[tx])) prop_list = list(zip(*nested_proplistT)) if request_unpack: prop_list = [None if p is None else p[0] for p in prop_list] except Exception as ex: ut.printex(ex, 'failed in get col', keys=[ 'table.tablename', 'request_unpack', 'tbl_rowids', 'colnames', 'raw_prop_list', (ut.depth_profile, 'raw_prop_list'), 'prop_listT', (ut.depth_profile, 'prop_listT'), 'nesting_xs', 'nested_proplistT', 'prop_list']) raise return prop_list
def add_rows_from_parent(table, parent_rowids, config=None, verbose=True, return_num_dirty=False): """ Lazy addition """ try: # Get requested configuration id config_rowid = table.get_config_rowid(config) # Find leaf rowids that need to be computed initial_rowid_list = table._get_rowid_from_superkey(parent_rowids, config=config) # Get corresponding "dirty" parent rowids isdirty_list = ut.flag_None_items(initial_rowid_list) dirty_parent_rowids = ut.compress(parent_rowids, isdirty_list) num_dirty = len(dirty_parent_rowids) num_total = len(parent_rowids) if num_dirty > 0: if verbose: fmtstr = 'adding %d / %d new props to %r for config_rowid=%r' print(fmtstr % (num_dirty, num_total, table.tablename, config_rowid)) args = zip(*dirty_parent_rowids) if table._asobject: # Convinience args = [table.depc.get_obj(parent, rowids) for parent, rowids in zip(table.parents, args)] # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION proptup_gen = table.preproc_func(table.depc, *args, config=config) #proptup_gen = list(proptup_gen) if len(table._nested_idxs) > 0: # TODO: rewrite nested_nCols = len(table.data_colnames) idxs1 = table._nested_idxs mask1 = ut.index_to_boolmask(idxs1, nested_nCols) mask2 = ut.not_list(mask1) idxs2 = ut.where(mask2) def unnest_data(data): unnested_cols = list(zip(ut.take(data, idxs2))) nested_cols = ut.take(data, idxs1) grouped_items = [nested_cols, unnested_cols] groupxs = [idxs1, idxs2] unflat = ut.ungroup(grouped_items, groupxs, nested_nCols - 1) return tuple(ut.flatten(unflat)) # Hack when a sql schema has tuples defined in it proptup_gen = (unnest_data(data) for data in proptup_gen) #proptup_gen = list(proptup_gen) dirty_params_iter = ( parent_rowids + (config_rowid,) + data_cols for parent_rowids, data_cols in zip(dirty_parent_rowids, proptup_gen)) #dirty_params_iter = list(dirty_params_iter) #print('dirty_params_iter = %s' % (ut.repr2(dirty_params_iter, nl=1),)) CHUNKED_ADD = table.chunksize is not None if CHUNKED_ADD: for dirty_params_chunk in ut.ichunks(dirty_params_iter, chunksize=table.chunksize): table.db._add(table.tablename, table._table_colnames, dirty_params_chunk, nInput=len(dirty_params_chunk)) else: nInput = num_dirty table.db._add(table.tablename, table._table_colnames, dirty_params_iter, nInput=nInput) # Now that the dirty params are added get the correct order of rowids rowid_list = table._get_rowid_from_superkey(parent_rowids, config=config) else: rowid_list = initial_rowid_list if return_num_dirty: return rowid_list, num_dirty else: return rowid_list except Exception as ex: ut.printex(ex, 'error in add_rowids', keys=[ 'table', 'parent_rowids', 'config', 'args', 'dirty_parent_rowids', 'table.preproc_func']) raise
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs): r""" Args hsdir (str): Directory to folder *containing* _hsdb dbdir (str): Output directory (defaults to same as hsdb) CommandLine: python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016" Ignore: from ibeis.dbio.ingest_hsdb import * # NOQA hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016" dbdir = "~/work/RotanTurtles" Example: >>> # SCRIPT >>> from ibeis.dbio.ingest_hsdb import * # NOQA >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None) >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir) >>> result = convert_hsdb_to_ibeis(hsdir) >>> print(result) """ from ibeis.control import IBEISControl import utool as ut if dbdir is None: dbdir = hsdir print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir)) assert is_hsdb( hsdir ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % ( hsdir, ) assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % ( hsdir, ) #print('FORCE DELETE: %r' % (hsdir,)) #ibsfuncs.delete_ibeis_database(hsdir) imgdir = join(hsdir, 'images') internal_dir = get_hsinternal(hsdir) nametbl_fpath = join(internal_dir, 'name_table.csv') imgtbl_fpath = join(internal_dir, 'image_table.csv') chiptbl_fpath = join(internal_dir, 'chip_table.csv') # READ NAME TABLE name_text_list = ['____'] name_hs_nid_list = [0] with open(nametbl_fpath, 'r') as nametbl_file: name_reader = csv.reader(nametbl_file) for ix, row in enumerate(name_reader): #if ix >= 3: if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_nid = int(row[0]) name = row[1].strip() name_text_list.append(name) name_hs_nid_list.append(hs_nid) # READ IMAGE TABLE iamge_hs_gid_list = [] image_gname_list = [] image_reviewed_list = [] with open(imgtbl_fpath, 'r') as imgtb_file: image_reader = csv.reader(imgtb_file) for ix, row in enumerate(image_reader): if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_gid = int(row[0]) gname_ = row[1].strip() # aif in hotspotter is equivilant to reviewed in IBEIS reviewed = bool(row[2]) iamge_hs_gid_list.append(hs_gid) image_gname_list.append(gname_) image_reviewed_list.append(reviewed) image_gpath_list = [join(imgdir, gname) for gname in image_gname_list] ut.debug_duplicate_items(image_gpath_list) #print(image_gpath_list) image_exist_flags = list(map(exists, image_gpath_list)) missing_images = [] for image_gpath, flag in zip(image_gpath_list, image_exist_flags): if not flag: missing_images.append(image_gpath) print('Image does not exist: %s' % image_gpath) if not all(image_exist_flags): print('Only %d / %d image exist' % (sum(image_exist_flags), len(image_exist_flags))) SEARCH_FOR_IMAGES = False if SEARCH_FOR_IMAGES: # Hack to try and find the missing images from os.path import basename subfiles = ut.glob(hsdir, '*', recursive=True, fullpath=True, with_files=True) basename_to_existing = ut.group_items(subfiles, ut.lmap(basename, subfiles)) can_copy_list = [] for gpath in missing_images: gname = basename(gpath) if gname not in basename_to_existing: print('gname = %r' % (gname, )) pass else: existing = basename_to_existing[gname] can_choose = True if len(existing) > 1: if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)): can_choose = False if can_choose: found = existing[0] can_copy_list.append((found, gpath)) else: print(existing) src, dst = ut.listT(can_copy_list) ut.copy_list(src, dst) # READ CHIP TABLE chip_bbox_list = [] chip_theta_list = [] chip_hs_nid_list = [] chip_hs_gid_list = [] chip_note_list = [] with open(chiptbl_fpath, 'r') as chiptbl_file: chip_reader = csv.reader(chiptbl_file) for ix, row in enumerate(chip_reader): if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_gid = int(row[1]) hs_nid = int(row[2]) bbox_text = row[3] theta = float(row[4]) notes = '<COMMA>'.join([item.strip() for item in row[5:]]) bbox_text = bbox_text.replace('[', '').replace(']', '').strip() bbox_text = re.sub(' *', ' ', bbox_text) bbox_strlist = bbox_text.split(' ') bbox = tuple(map(int, bbox_strlist)) #bbox = [int(item) for item in bbox_strlist] chip_hs_nid_list.append(hs_nid) chip_hs_gid_list.append(hs_gid) chip_bbox_list.append(bbox) chip_theta_list.append(theta) chip_note_list.append(notes) names = ut.ColumnLists({ 'hs_nid': name_hs_nid_list, 'text': name_text_list, }) images = ut.ColumnLists({ 'hs_gid': iamge_hs_gid_list, 'gpath': image_gpath_list, 'reviewed': image_reviewed_list, 'exists': image_exist_flags, }) chips = ut.ColumnLists({ 'hs_gid': chip_hs_gid_list, 'hs_nid': chip_hs_nid_list, 'bbox': chip_bbox_list, 'theta': chip_theta_list, 'note': chip_note_list, }) IGNORE_MISSING_IMAGES = True if IGNORE_MISSING_IMAGES: # Ignore missing information print('pre') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) missing_gxs = ut.where(ut.not_list(images['exists'])) missing_gids = ut.take(images['hs_gid'], missing_gxs) gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid')) missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids)) # Remove missing images and dependant chips images = images.remove(missing_gxs) chips = chips.remove(missing_cxs) valid_nids = set(chips['hs_nid'] + [0]) isvalid = [nid in valid_nids for nid in names['hs_nid']] names = names.compress(isvalid) print('post') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) assert all(images['exists']), 'some images dont exist' # if gid is None: # print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,)) # # continue # # Build mappings to new indexes # names_nid_to_nid = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)} # names_nid_to_nid[1] = names_nid_to_nid[0] # hsdb unknknown is 0 or 1 # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)} ibs = IBEISControl.request_IBEISController(dbdir=dbdir, check_hsdb=False, **kwargs) assert len(ibs.get_valid_gids()) == 0, 'target database is not empty' # Add names, images, and annotations names['ibs_nid'] = ibs.add_names(names['text']) images['ibs_gid'] = ibs.add_images( images['gpath']) # any failed gids will be None if True: # Remove corrupted images print('pre') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid'])) missing_gids = ut.take(images['hs_gid'], missing_gxs) gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid')) missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids)) # Remove missing images and dependant chips chips = chips.remove(missing_cxs) images = images.remove(missing_gxs) print('post') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) # Index chips using new ibs rowids ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid']) ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid']) try: chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid']) except KeyError: chips['ibs_gid'] = [ ibs_gid_lookup.get(index, None) for index in chips['hs_gid'] ] try: chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid']) except KeyError: chips['ibs_nid'] = [ ibs_nid_lookup.get(index, None) for index in chips['hs_nid'] ] ibs.add_annots(chips['ibs_gid'], bbox_list=chips['bbox'], theta_list=chips['theta'], nid_list=chips['ibs_nid'], notes_list=chips['note']) # aid_list = ibs.get_valid_aids() # flag_list = [True] * len(aid_list) # ibs.set_annot_exemplar_flags(aid_list, flag_list) # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly' # Write file flagging successful conversion with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_: file_.write('Successfully converted hsdir=%r' % (hsdir, )) print('finished ingest') return ibs
def fuzzy_find_colxs(self, pat): import utool as ut colxs = ut.where(ut.filterflags_general_tags(self.header_tags, in_any=[pat])) return colxs