def permute_columns(self, new_order, inplace=True): import utool as ut self.header = ut.take(self.header, new_order) self.header_tags = ut.take(self.header_tags, new_order) self.row_data = ut.listT(ut.take(ut.listT(self.row_data), new_order)) if self.short_header is not None: self.short_header = ut.take(self.short_header, new_order) return self
def fuzzy_filter_columns(self, fuzzy_headers): import utool as ut col_flags = ut.filterflags_general_tags( self.header_tags, logic='or', in_any=fuzzy_headers) self.header = ut.compress(self.header, col_flags) self.header_tags = ut.compress(self.header_tags, col_flags) self.row_data = ut.listT(ut.compress(ut.listT(self.row_data), col_flags)) if self.short_header is not None: self.short_header = ut.compress(self.short_header, col_flags)
def fuzzy_filter_columns(self, fuzzy_headers): import utool as ut col_flags = ut.filterflags_general_tags(self.header_tags, logic='or', in_any=fuzzy_headers) self.header = ut.compress(self.header, col_flags) self.header_tags = ut.compress(self.header_tags, col_flags) self.row_data = ut.listT( ut.compress(ut.listT(self.row_data), col_flags)) if self.short_header is not None: self.short_header = ut.compress(self.short_header, col_flags)
def make_standard_csv(column_list, column_lbls=None): from six.moves import cStringIO as StringIO import utool as ut import csv stream = StringIO() row_list = ut.listT(column_list) if six.PY2: row_list = [[ut.ensure_unicode(c).encode('utf-8') for c in r] for r in row_list] if column_lbls is not None: column_lbls = [ ut.ensure_unicode(c).encode('utf-8') for c in column_lbls ] writer = csv.writer(stream, dialect=csv.excel) if column_lbls is not None: writer.writerow(column_lbls) writer.writerows(row_list) csv_str = stream.getvalue() return csv_str
def make_standard_csv(column_list, column_lbls=None): from six.moves import cStringIO as StringIO import utool as ut import csv stream = StringIO() row_list = ut.listT(column_list) if six.PY2: row_list = [[ut.ensure_unicode(c).encode('utf-8') for c in r] for r in row_list] if column_lbls is not None: column_lbls = [ut.ensure_unicode(c).encode('utf-8') for c in column_lbls] writer = csv.writer(stream, dialect=csv.excel) if column_lbls is not None: writer.writerow(column_lbls) writer.writerows(row_list) csv_str = stream.getvalue() return csv_str
def get_annotmatch_rowids_between(ibs, aids1, aids2, method=None): """ Example: >>> # ENABLE_DOCTEST >>> from wbia.annotmatch_funcs import * # NOQA >>> import wbia >>> ibs = wbia.opendb('PZ_MTEST') >>> aids1 = aids2 = [1, 2, 3, 4, 5, 6] >>> rowids_between = ibs.get_annotmatch_rowids_between >>> ams1 = sorted(rowids_between(aids1, aids2, method=1)) >>> ams2 = sorted(rowids_between(aids1, aids2, method=2)) >>> assert len(ub.find_duplicates(ams1)) == 0 >>> assert len(ub.find_duplicates(ams2)) == 0 >>> assert sorted(ams2) == sorted(ams1) """ if method is None: if len(aids1) * len(aids2) > 5000: method = 1 else: method = 2 if method == 1: # Strategy 1: get all existing rows and see what intersects # This is better when the enumerated set of rows would be larger than # the database size unflat_rowids1L = ibs.get_annotmatch_rowids_from_aid1(aids1) unflat_rowids1R = ibs.get_annotmatch_rowids_from_aid2(aids1) unflat_rowids2L = ibs.get_annotmatch_rowids_from_aid1(aids2) unflat_rowids2R = ibs.get_annotmatch_rowids_from_aid2(aids2) am_rowids1L = { r for r in ut.iflatten(unflat_rowids1L) if r is not None } am_rowids1R = { r for r in ut.iflatten(unflat_rowids1R) if r is not None } am_rowids2L = { r for r in ut.iflatten(unflat_rowids2L) if r is not None } am_rowids2R = { r for r in ut.iflatten(unflat_rowids2R) if r is not None } ams12 = am_rowids1L.intersection(am_rowids2R) ams21 = am_rowids2L.intersection(am_rowids1R) ams = sorted(ams12.union(ams21)) # ams = sorted(am_rowids1.intersection(am_rowids2)) # rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list) # unflat_rowids1 = ibs.get_annotmatch_rowids_from_aid(aids1) # unflat_rowids2 = ibs.get_annotmatch_rowids_from_aid(aids2) # am_rowids1 = {r for r in ut.iflatten(unflat_rowids1) if r is not None} # am_rowids2 = {r for r in ut.iflatten(unflat_rowids2) if r is not None} # ams = sorted(am_rowids1.intersection(am_rowids2)) # ams = ut.isect(am_rowids1, am_rowids2) elif method == 2: # Strategy 2: enumerate what rows could exist and see what does exist # This is better when the enumerated set of rows would be smaller than # the database size edges = list(ut.product_nonsame(aids1, aids2)) if len(edges) == 0: ams = [] else: aids1_, aids2_ = ut.listT(edges) # ams = ibs.get_annotmatch_rowid_from_undirected_superkey(aids1_, aids2_) ams = ibs.get_annotmatch_rowid_from_superkey(aids1_, aids2_) if ams is None: ams = [] ams = ut.filter_Nones(ams) return ams
def nice_table(self): import utool as ut return ut.make_csv_table(ut.listT(self.row_data), raw=True)
def make_annot_inference_dict(infr, internal=False): #import uuid def convert_to_name_uuid(nid): #try: text = ibs.get_name_texts(nid, apply_fix=False) if text is None: text = 'NEWNAME_%s' % (str(nid), ) #uuid_ = uuid.UUID(text) #except ValueError: # text = 'NEWNAME_%s' % (str(nid),) # #uuid_ = nid return text ibs = infr.qreq_.ibs if internal: get_annot_uuids = ut.identity else: get_annot_uuids = ibs.get_annot_uuids #return uuid_ # Compile the cluster_dict col_list = [ 'aid_list', 'orig_nid_list', 'new_nid_list', 'exemplar_flag_list', 'error_flag_list' ] cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples))) cluster_dict['annot_uuid_list'] = get_annot_uuids( cluster_dict['aid_list']) # We store the name's UUID as the name's text #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['orig_nid_list']] #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['new_nid_list']] cluster_dict['orig_name_list'] = [ convert_to_name_uuid(nid) for nid in cluster_dict['orig_nid_list'] ] cluster_dict['new_name_list'] = [ convert_to_name_uuid(nid) for nid in cluster_dict['new_nid_list'] ] # Filter out only the keys we want to send back in the dictionary #key_list = ['annot_uuid_list', 'orig_name_uuid_list', # 'new_name_uuid_list', 'exemplar_flag_list', # 'error_flag_list'] key_list = [ 'annot_uuid_list', 'orig_name_list', 'new_name_list', 'exemplar_flag_list', 'error_flag_list' ] cluster_dict = ut.dict_subset(cluster_dict, key_list) # Compile the annot_pair_dict col_list = [ 'aid_1_list', 'aid_2_list', 'p_same_list', 'confidence_list', 'raw_score_list' ] annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list))) annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids( annot_pair_dict['aid_1_list']) annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids( annot_pair_dict['aid_2_list']) zipped = zip(annot_pair_dict['annot_uuid_1_list'], annot_pair_dict['annot_uuid_2_list'], annot_pair_dict['p_same_list']) annot_pair_dict['review_pair_list'] = [{ 'annot_uuid_key': annot_uuid_1, 'annot_uuid_1': annot_uuid_1, 'annot_uuid_2': annot_uuid_2, 'prior_matching_state': { 'p_match': p_same, 'p_nomatch': 1.0 - p_same, 'p_notcomp': 0.0, } } for (annot_uuid_1, annot_uuid_2, p_same) in zipped] # Filter out only the keys we want to send back in the dictionary key_list = ['review_pair_list', 'confidence_list'] annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list) # Compile the inference dict inference_dict = ut.odict([ ('cluster_dict', cluster_dict), ('annot_pair_dict', annot_pair_dict), ('_internal_state', None), ]) return inference_dict
def fix_annotmatch_pzmaster1(): """ PZ_Master1 had annotmatch rowids that did not agree with the current name labeling. Looking at the inconsistencies in the graph interface was too cumbersome, because over 3000 annots were incorrectly grouped together. This function deletes any annotmatch rowid that is not consistent with the current labeling so we can go forward with using the new AnnotInference object """ import wbia ibs = wbia.opendb('PZ_Master1') infr = wbia.AnnotInference(ibs=ibs, aids=ibs.get_valid_aids(), verbose=5) infr.initialize_graph() annots = ibs.annots() aid_to_nid = ut.dzip(annots.aids, annots.nids) if False: infr.reset_feedback() infr.ensure_mst() infr.apply_feedback_edges() infr.relabel_using_reviews() infr.start_qt_interface() # Get annotmatch rowids that agree with current labeling if False: annotmatch = ibs.db.get_table_as_pandas('annotmatch') import pandas as pd flags1 = pd.isnull(annotmatch['annotmatch_evidence_decision']) flags2 = annotmatch['annotmatch_tag_text'] == '' bad_part = annotmatch[flags1 & flags2] rowids = bad_part.index.tolist() ibs.delete_annotmatch(rowids) if False: # Delete bidirectional annotmatches annotmatch = ibs.db.get_table_as_pandas('annotmatch') df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2']) # Find entires that have both directions pairs1 = annotmatch[['annot_rowid1', 'annot_rowid2']].values f_edges = {tuple(p) for p in pairs1} b_edges = {tuple(p[::-1]) for p in pairs1} isect_edges = {tuple(sorted(p)) for p in b_edges.intersection(f_edges)} isect_edges1 = list(isect_edges) isect_edges2 = [p[::-1] for p in isect_edges] # cols = ['annotmatch_evidence_decision', 'annotmatch_tag_text'] import pandas as pd custom_ = { (559, 4909): (False, ['photobomb']), (7918, 8041): (False, ['photobomb']), (6634, 6754): (False, ['photobomb']), (3707, 3727): (False, ['photobomb']), (86, 103): (False, ['photobomb']), } extra_ = {} fixme_edges = [] d1 = df.loc[isect_edges1].reset_index(drop=False) d2 = df.loc[isect_edges2].reset_index(drop=False) flags = d1['annotmatch_evidence_decision'] != d2[ 'annotmatch_evidence_decision'] from wbia.tag_funcs import _parse_tags for f, r1, r2 in zip(flags, d1.iterrows(), d2.iterrows()): v1, v2 = r1[1], r2[1] aid1 = v1['annot_rowid1'] aid2 = v1['annot_rowid2'] truth_real = (ibs.const.EVIDENCE_DECISION.POSITIVE if aid_to_nid[aid1] == aid_to_nid[aid2] else ibs.const.EVIDENCE_DECISION.NEGATIVE) truth1 = v1['annotmatch_evidence_decision'] truth2 = v2['annotmatch_evidence_decision'] t1 = _parse_tags(v1['annotmatch_tag_text']) t2 = _parse_tags(v2['annotmatch_tag_text']) newtag = ut.union_ordered(t1, t2) if (aid1, aid2) in custom_: continue fixme_flag = False if not pd.isnull(truth1): if truth_real != truth1: fixme_flag = True if not pd.isnull(truth2): if truth_real != truth2: fixme_flag = True if fixme_flag: logger.info('newtag = %r' % (newtag, )) logger.info('truth_real = %r' % (truth_real, )) logger.info('truth1 = %r' % (truth1, )) logger.info('truth2 = %r' % (truth2, )) logger.info('aid1 = %r' % (aid1, )) logger.info('aid2 = %r' % (aid2, )) fixme_edges.append((aid1, aid2)) else: extra_[(aid1, aid2)] = (truth_real, newtag) extra_.update(custom_) new_pairs = extra_.keys() new_truths = ut.take_column(ut.dict_take(extra_, new_pairs), 0) new_tags = ut.take_column(ut.dict_take(extra_, new_pairs), 1) new_tag_texts = [';'.join(t) for t in new_tags] aids1, aids2 = ut.listT(new_pairs) # Delete the old ibs.delete_annotmatch((d1['annotmatch_rowid'].values.tolist() + d2['annotmatch_rowid'].values.tolist())) # Add the new ams = ibs.add_annotmatch_undirected(aids1, aids2) ibs.set_annotmatch_evidence_decision(ams, new_truths) ibs.set_annotmatch_tag_text(ams, new_tag_texts) if False: import wbia.guitool as gt gt.ensure_qapp() ut.qtensure() from wbia.gui import inspect_gui inspect_gui.show_vsone_tuner(ibs, aid1, aid2) # pairs2 = pairs1.T[::-1].T # idx1, idx2 = ut.isect_indices(list(map(tuple, pairs1)), # list(map(tuple, pairs2))) # r_edges = list(set(map(tuple, map(sorted, pairs1[idx1])))) # unique_pairs = list(set(map(tuple, map(sorted, pairs1[idx1])))) # df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2']) x = ut.ddict(list) annotmatch = ibs.db.get_table_as_pandas('annotmatch') import ubelt as ub _iter = annotmatch.iterrows() prog = ub.ProgIter(_iter, length=len(annotmatch)) for k, m in prog: aid1 = m['annot_rowid1'] aid2 = m['annot_rowid2'] if m['annotmatch_evidence_decision'] == ibs.const.EVIDENCE_DECISION.POSITIVE: if aid_to_nid[aid1] == aid_to_nid[aid2]: x['agree1'].append(k) else: x['disagree1'].append(k) elif m['annotmatch_evidence_decision'] == ibs.const.EVIDENCE_DECISION.NEGATIVE: if aid_to_nid[aid1] == aid_to_nid[aid2]: x['disagree2'].append(k) else: x['agree2'].append(k) ub.map_vals(len, x) ut.dict_hist(annotmatch.loc[x['disagree1']]['annotmatch_tag_text']) disagree1 = annotmatch.loc[x['disagree1']] pb_disagree1 = disagree1[disagree1['annotmatch_tag_text'] == 'photobomb'] aids1 = pb_disagree1['annot_rowid1'].values.tolist() aids2 = pb_disagree1['annot_rowid2'].values.tolist() aid_pairs = list(zip(aids1, aids2)) infr = wbia.AnnotInference.from_pairs(aid_pairs, ibs=ibs, verbose=5) if False: feedback = infr.read_wbia_annotmatch_feedback(edges=infr.edges()) infr.external_feedback = feedback infr.apply_feedback_edges() infr.start_qt_interface(loop=False) # Delete these values if False: nonpb_disagree1 = disagree1[ disagree1['annotmatch_tag_text'] != 'photobomb'] disagree2 = annotmatch.loc[x['disagree2']] ibs.delete_annotmatch(nonpb_disagree1['annotmatch_rowid']) ibs.delete_annotmatch(disagree2['annotmatch_rowid']) # ut.dict_hist(disagree1['annotmatch_tag_text']) import networkx as nx graph = nx.Graph() graph.add_edges_from( zip(pb_disagree1['annot_rowid1'], pb_disagree1['annot_rowid2'])) list(nx.connected_components(graph)) set(annotmatch.loc[x['disagree2']]['annotmatch_tag_text'])
def fix_bidirectional_annotmatch(ibs): import wbia infr = wbia.AnnotInference(ibs=ibs, aids='all', verbose=5) infr.initialize_graph() annots = ibs.annots() aid_to_nid = ut.dzip(annots.aids, annots.nids) # Delete bidirectional annotmatches annotmatch = ibs.db.get_table_as_pandas('annotmatch') df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2']) # Find entires that have both directions pairs1 = annotmatch[['annot_rowid1', 'annot_rowid2']].values f_edges = {tuple(p) for p in pairs1} b_edges = {tuple(p[::-1]) for p in pairs1} isect_edges = {tuple(sorted(p)) for p in b_edges.intersection(f_edges)} logger.info('Found %d bidirectional edges' % len(isect_edges)) isect_edges1 = list(isect_edges) isect_edges2 = [p[::-1] for p in isect_edges] import pandas as pd extra_ = {} fixme_edges = [] d1 = df.loc[isect_edges1].reset_index(drop=False) d2 = df.loc[isect_edges2].reset_index(drop=False) flags = d1['annotmatch_evidence_decision'] != d2[ 'annotmatch_evidence_decision'] from wbia.tag_funcs import _parse_tags for f, r1, r2 in zip(flags, d1.iterrows(), d2.iterrows()): v1, v2 = r1[1], r2[1] aid1 = v1['annot_rowid1'] aid2 = v1['annot_rowid2'] truth_real = (ibs.const.EVIDENCE_DECISION.POSITIVE if aid_to_nid[aid1] == aid_to_nid[aid2] else ibs.const.EVIDENCE_DECISION.NEGATIVE) truth1 = v1['annotmatch_evidence_decision'] truth2 = v2['annotmatch_evidence_decision'] t1 = _parse_tags(v1['annotmatch_tag_text']) t2 = _parse_tags(v2['annotmatch_tag_text']) newtag = ut.union_ordered(t1, t2) fixme_flag = False if not pd.isnull(truth1): if truth_real != truth1: fixme_flag = True if not pd.isnull(truth2): if truth_real != truth2: fixme_flag = True if fixme_flag: logger.info('--') logger.info('t1, t2 = %r, %r' % (t1, t2)) logger.info('newtag = %r' % (newtag, )) logger.info('truth_real, truth1, truth2 = %r, %r, %r' % (truth_real, truth1, truth2)) logger.info('aid1, aid2 = %r, %r' % (aid1, aid2)) fixme_edges.append(tuple(sorted((aid1, aid2)))) else: extra_[(aid1, aid2)] = (truth_real, newtag) if len(fixme_edges) > 0: # need to manually fix these edges fix_infr = wbia.AnnotInference.from_pairs(fixme_edges, ibs=ibs, verbose=5) feedback = fix_infr.read_wbia_annotmatch_feedback( only_existing_edges=True) infr = fix_infr fix_infr.external_feedback = feedback fix_infr.apply_feedback_edges() fix_infr.start_qt_interface(loop=False) # DELETE OLD EDGES TWICE ams = ibs.get_annotmatch_rowid_from_edges(fixme_edges) ibs.delete_annotmatch(ams) ams = ibs.get_annotmatch_rowid_from_edges(fixme_edges) ibs.delete_annotmatch(ams) # MANUALLY CALL THIS ONCE FINISHED # TO ONLY CHANGE ANNOTMATCH EDGES infr.write_wbia_staging_feedback() infr.write_wbia_annotmatch_feedback() # extra_.update(custom_) new_pairs = extra_.keys() new_truths = ut.take_column(ut.dict_take(extra_, new_pairs), 0) new_tags = ut.take_column(ut.dict_take(extra_, new_pairs), 1) new_tag_texts = [';'.join(t) for t in new_tags] aids1, aids2 = ut.listT(new_pairs) # Delete the old ibs.delete_annotmatch((d1['annotmatch_rowid'].values.tolist() + d2['annotmatch_rowid'].values.tolist())) # Add the new ams = ibs.add_annotmatch_undirected(aids1, aids2) ibs.set_annotmatch_evidence_decision(ams, new_truths) ibs.set_annotmatch_tag_text(ams, new_tag_texts) if False: import wbia.guitool as gt gt.ensure_qapp() ut.qtensure() from wbia.gui import inspect_gui inspect_gui.show_vsone_tuner(ibs, aid1, aid2)
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs): r""" Args hsdir (str): Directory to folder *containing* _hsdb dbdir (str): Output directory (defaults to same as hsdb) CommandLine: python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016" Ignore: from ibeis.dbio.ingest_hsdb import * # NOQA hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016" dbdir = "~/work/RotanTurtles" Example: >>> # SCRIPT >>> from ibeis.dbio.ingest_hsdb import * # NOQA >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None) >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir) >>> result = convert_hsdb_to_ibeis(hsdir) >>> print(result) """ from ibeis.control import IBEISControl import utool as ut if dbdir is None: dbdir = hsdir print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir)) assert is_hsdb( hsdir ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % ( hsdir, ) assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % ( hsdir, ) #print('FORCE DELETE: %r' % (hsdir,)) #ibsfuncs.delete_ibeis_database(hsdir) imgdir = join(hsdir, 'images') internal_dir = get_hsinternal(hsdir) nametbl_fpath = join(internal_dir, 'name_table.csv') imgtbl_fpath = join(internal_dir, 'image_table.csv') chiptbl_fpath = join(internal_dir, 'chip_table.csv') # READ NAME TABLE name_text_list = ['____'] name_hs_nid_list = [0] with open(nametbl_fpath, 'r') as nametbl_file: name_reader = csv.reader(nametbl_file) for ix, row in enumerate(name_reader): #if ix >= 3: if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_nid = int(row[0]) name = row[1].strip() name_text_list.append(name) name_hs_nid_list.append(hs_nid) # READ IMAGE TABLE iamge_hs_gid_list = [] image_gname_list = [] image_reviewed_list = [] with open(imgtbl_fpath, 'r') as imgtb_file: image_reader = csv.reader(imgtb_file) for ix, row in enumerate(image_reader): if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_gid = int(row[0]) gname_ = row[1].strip() # aif in hotspotter is equivilant to reviewed in IBEIS reviewed = bool(row[2]) iamge_hs_gid_list.append(hs_gid) image_gname_list.append(gname_) image_reviewed_list.append(reviewed) image_gpath_list = [join(imgdir, gname) for gname in image_gname_list] ut.debug_duplicate_items(image_gpath_list) #print(image_gpath_list) image_exist_flags = list(map(exists, image_gpath_list)) missing_images = [] for image_gpath, flag in zip(image_gpath_list, image_exist_flags): if not flag: missing_images.append(image_gpath) print('Image does not exist: %s' % image_gpath) if not all(image_exist_flags): print('Only %d / %d image exist' % (sum(image_exist_flags), len(image_exist_flags))) SEARCH_FOR_IMAGES = False if SEARCH_FOR_IMAGES: # Hack to try and find the missing images from os.path import basename subfiles = ut.glob(hsdir, '*', recursive=True, fullpath=True, with_files=True) basename_to_existing = ut.group_items(subfiles, ut.lmap(basename, subfiles)) can_copy_list = [] for gpath in missing_images: gname = basename(gpath) if gname not in basename_to_existing: print('gname = %r' % (gname, )) pass else: existing = basename_to_existing[gname] can_choose = True if len(existing) > 1: if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)): can_choose = False if can_choose: found = existing[0] can_copy_list.append((found, gpath)) else: print(existing) src, dst = ut.listT(can_copy_list) ut.copy_list(src, dst) # READ CHIP TABLE chip_bbox_list = [] chip_theta_list = [] chip_hs_nid_list = [] chip_hs_gid_list = [] chip_note_list = [] with open(chiptbl_fpath, 'r') as chiptbl_file: chip_reader = csv.reader(chiptbl_file) for ix, row in enumerate(chip_reader): if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_gid = int(row[1]) hs_nid = int(row[2]) bbox_text = row[3] theta = float(row[4]) notes = '<COMMA>'.join([item.strip() for item in row[5:]]) bbox_text = bbox_text.replace('[', '').replace(']', '').strip() bbox_text = re.sub(' *', ' ', bbox_text) bbox_strlist = bbox_text.split(' ') bbox = tuple(map(int, bbox_strlist)) #bbox = [int(item) for item in bbox_strlist] chip_hs_nid_list.append(hs_nid) chip_hs_gid_list.append(hs_gid) chip_bbox_list.append(bbox) chip_theta_list.append(theta) chip_note_list.append(notes) names = ut.ColumnLists({ 'hs_nid': name_hs_nid_list, 'text': name_text_list, }) images = ut.ColumnLists({ 'hs_gid': iamge_hs_gid_list, 'gpath': image_gpath_list, 'reviewed': image_reviewed_list, 'exists': image_exist_flags, }) chips = ut.ColumnLists({ 'hs_gid': chip_hs_gid_list, 'hs_nid': chip_hs_nid_list, 'bbox': chip_bbox_list, 'theta': chip_theta_list, 'note': chip_note_list, }) IGNORE_MISSING_IMAGES = True if IGNORE_MISSING_IMAGES: # Ignore missing information print('pre') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) missing_gxs = ut.where(ut.not_list(images['exists'])) missing_gids = ut.take(images['hs_gid'], missing_gxs) gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid')) missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids)) # Remove missing images and dependant chips images = images.remove(missing_gxs) chips = chips.remove(missing_cxs) valid_nids = set(chips['hs_nid'] + [0]) isvalid = [nid in valid_nids for nid in names['hs_nid']] names = names.compress(isvalid) print('post') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) assert all(images['exists']), 'some images dont exist' # if gid is None: # print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,)) # # continue # # Build mappings to new indexes # names_nid_to_nid = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)} # names_nid_to_nid[1] = names_nid_to_nid[0] # hsdb unknknown is 0 or 1 # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)} ibs = IBEISControl.request_IBEISController(dbdir=dbdir, check_hsdb=False, **kwargs) assert len(ibs.get_valid_gids()) == 0, 'target database is not empty' # Add names, images, and annotations names['ibs_nid'] = ibs.add_names(names['text']) images['ibs_gid'] = ibs.add_images( images['gpath']) # any failed gids will be None if True: # Remove corrupted images print('pre') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid'])) missing_gids = ut.take(images['hs_gid'], missing_gxs) gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid')) missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids)) # Remove missing images and dependant chips chips = chips.remove(missing_cxs) images = images.remove(missing_gxs) print('post') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) # Index chips using new ibs rowids ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid']) ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid']) try: chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid']) except KeyError: chips['ibs_gid'] = [ ibs_gid_lookup.get(index, None) for index in chips['hs_gid'] ] try: chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid']) except KeyError: chips['ibs_nid'] = [ ibs_nid_lookup.get(index, None) for index in chips['hs_nid'] ] ibs.add_annots(chips['ibs_gid'], bbox_list=chips['bbox'], theta_list=chips['theta'], nid_list=chips['ibs_nid'], notes_list=chips['note']) # aid_list = ibs.get_valid_aids() # flag_list = [True] * len(aid_list) # ibs.set_annot_exemplar_flags(aid_list, flag_list) # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly' # Write file flagging successful conversion with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_: file_.write('Successfully converted hsdir=%r' % (hsdir, )) print('finished ingest') return ibs
def make_annot_inference_dict(infr, internal=False): #import uuid def convert_to_name_uuid(nid): #try: text = ibs.get_name_texts(nid, apply_fix=False) if text is None: text = 'NEWNAME_%s' % (str(nid),) #uuid_ = uuid.UUID(text) #except ValueError: # text = 'NEWNAME_%s' % (str(nid),) # #uuid_ = nid return text ibs = infr.qreq_.ibs if internal: get_annot_uuids = ut.identity else: get_annot_uuids = ibs.get_annot_uuids #return uuid_ # Compile the cluster_dict col_list = ['aid_list', 'orig_nid_list', 'new_nid_list', 'exemplar_flag_list', 'error_flag_list'] cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples))) cluster_dict['annot_uuid_list'] = get_annot_uuids(cluster_dict['aid_list']) # We store the name's UUID as the name's text #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['orig_nid_list']] #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['new_nid_list']] cluster_dict['orig_name_list'] = [convert_to_name_uuid(nid) for nid in cluster_dict['orig_nid_list']] cluster_dict['new_name_list'] = [convert_to_name_uuid(nid) for nid in cluster_dict['new_nid_list']] # Filter out only the keys we want to send back in the dictionary #key_list = ['annot_uuid_list', 'orig_name_uuid_list', # 'new_name_uuid_list', 'exemplar_flag_list', # 'error_flag_list'] key_list = ['annot_uuid_list', 'orig_name_list', 'new_name_list', 'exemplar_flag_list', 'error_flag_list'] cluster_dict = ut.dict_subset(cluster_dict, key_list) # Compile the annot_pair_dict col_list = ['aid_1_list', 'aid_2_list', 'p_same_list', 'confidence_list', 'raw_score_list'] annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list))) annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids(annot_pair_dict['aid_1_list']) annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids(annot_pair_dict['aid_2_list']) zipped = zip(annot_pair_dict['annot_uuid_1_list'], annot_pair_dict['annot_uuid_2_list'], annot_pair_dict['p_same_list']) annot_pair_dict['review_pair_list'] = [ { 'annot_uuid_key' : annot_uuid_1, 'annot_uuid_1' : annot_uuid_1, 'annot_uuid_2' : annot_uuid_2, 'prior_matching_state' : { 'p_match' : p_same, 'p_nomatch' : 1.0 - p_same, 'p_notcomp' : 0.0, } } for (annot_uuid_1, annot_uuid_2, p_same) in zipped ] # Filter out only the keys we want to send back in the dictionary key_list = ['review_pair_list', 'confidence_list'] annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list) # Compile the inference dict inference_dict = ut.odict([ ('cluster_dict', cluster_dict), ('annot_pair_dict', annot_pair_dict), ('_internal_state', None), ]) return inference_dict