def modify_tags(tags_list, direct_map=None, regex_map=None, regex_aug=None, delete_unmapped=False, return_unmapped=False, return_map=False): import utool as ut tag_vocab = ut.unique(ut.flatten(tags_list)) alias_map = ut.odict() if regex_map is not None: alias_map.update(**ut.build_alias_map(regex_map, tag_vocab)) if direct_map is not None: alias_map.update(ut.odict(direct_map)) new_tags_list = tags_list new_tags_list = ut.alias_tags(new_tags_list, alias_map) if regex_aug is not None: alias_aug = ut.build_alias_map(regex_aug, tag_vocab) aug_tags_list = ut.alias_tags(new_tags_list, alias_aug) new_tags_list = [ut.unique(t1 + t2) for t1, t2 in zip(new_tags_list, aug_tags_list)] unmapped = list(set(tag_vocab) - set(alias_map.keys())) if delete_unmapped: new_tags_list = [ut.setdiff(tags, unmapped) for tags in new_tags_list] toreturn = None if return_map: toreturn = (alias_map,) if return_unmapped: toreturn = toreturn + (unmapped,) if toreturn is None: toreturn = new_tags_list else: toreturn = (new_tags_list,) + toreturn return toreturn
def export_annots(ibs, aid_list, new_dbpath=None): r""" exports a subset of annotations and other required info TODO: PZ_Master1 needs to backproject information back on to NNP_Master3 and PZ_Master0 Args: ibs (IBEISController): ibeis controller object aid_list (list): list of annotation rowids new_dbpath (None): (default = None) Returns: str: new_dbpath CommandLine: python -m ibeis.dbio.export_subset export_annots python -m ibeis.dbio.export_subset export_annots --db NNP_Master3 \ -a viewpoint_compare --nocache-aid --verbtd --new_dbpath=PZ_ViewPoints python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 \ --db NNP_Master3 \ -a viewpoint_compare --nocache-aid --verbtd python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db NNP_Master3 \ -a viewpoint_compare --nocache-aid --verbtd python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db NNP_Master3 \ -a default:aids=all,is_known=True,view_pername=#primary>0&#primary1>0,per_name=4,size=200 python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db NNP_Master3 \ -a default:aids=all,is_known=True,view_pername='#primary>0&#primary1>0',per_name=4,size=200 --acfginfo python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db PZ_Master1 \ -a default:has_any=photobomb --acfginfo Example: >>> # SCRIPT >>> from ibeis.dbio.export_subset import * # NOQA >>> import ibeis >>> from ibeis.expt import experiment_helpers >>> ibs = ibeis.opendb(defaultdb='NNP_Master3') >>> acfg_name_list = ut.get_argval(('--aidcfg', '--acfg', '-a'), type_=list, default=['']) >>> acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(ibs, acfg_name_list) >>> aid_list = expanded_aids_list[0][0] >>> ibs.print_annot_stats(aid_list, viewcode_isect=True, per_image=True) >>> # Expand to get all annots in each chosen image >>> gid_list = ut.unique_ordered(ibs.get_annot_gids(aid_list)) >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> ibs.print_annot_stats(aid_list, viewcode_isect=True, per_image=True) >>> new_dbpath = ut.get_argval('--new-dbpath', default='PZ_ViewPoints') >>> new_dbpath = export_annots(ibs, aid_list, new_dbpath) >>> result = ('new_dbpath = %s' % (str(new_dbpath),)) >>> print(result) """ print('Exporting annotations aid_list=%r' % (aid_list,)) if new_dbpath is None: new_dbpath = make_new_dbpath(ibs, 'aid', aid_list) gid_list = ut.unique(ibs.get_annot_gids(aid_list)) nid_list = ut.unique(ibs.get_annot_nids(aid_list)) return export_data(ibs, gid_list, aid_list, nid_list, new_dbpath=new_dbpath)
def color_by_nids(graph, unique_nids=None, ibs=None, nid2_color_=None): """ Colors edges and nodes by nid """ # TODO use ut.color_nodes import plottool as pt ensure_graph_nid_labels(graph, unique_nids, ibs=ibs) node_to_nid = nx.get_node_attributes(graph, 'nid') unique_nids = ut.unique(node_to_nid.values()) ncolors = len(unique_nids) if (ncolors) == 1: unique_colors = [pt.UNKNOWN_PURP] else: if nid2_color_ is not None: unique_colors = pt.distinct_colors(ncolors + len(nid2_color_) * 2) else: unique_colors = pt.distinct_colors(ncolors) # Find edges and aids strictly between two nids nid_to_color = dict(zip(unique_nids, unique_colors)) if nid2_color_ is not None: # HACK NEED TO ENSURE COLORS ARE NOT REUSED nid_to_color.update(nid2_color_) edge_aids = list(graph.edges()) edge_nids = ut.unflat_take(node_to_nid, edge_aids) flags = [nids[0] == nids[1] for nids in edge_nids] flagged_edge_aids = ut.compress(edge_aids, flags) flagged_edge_nids = ut.compress(edge_nids, flags) flagged_edge_colors = [nid_to_color[nids[0]] for nids in flagged_edge_nids] edge_to_color = dict(zip(flagged_edge_aids, flagged_edge_colors)) node_to_color = ut.map_dict_vals(ut.partial(ut.take, nid_to_color), node_to_nid) nx.set_edge_attributes(graph, 'color', edge_to_color) nx.set_node_attributes(graph, 'color', node_to_color)
def __add__(self, other): assert self.__class__ is other.__class__, 'incompatable' assert self._ibs is other._ibs, 'incompatable' assert self._config is other._config, 'incompatable' rowids = ut.unique(self._rowids + other._rowids) new = self.__class__(rowids, self._ibs, self._config) return new
def _cm_breaking(infr, cm_list=None, review_cfg={}): """ >>> from wbia.algo.graph.core import * # NOQA >>> review_cfg = {} """ if cm_list is None: cm_list = infr.cm_list ranks_top = review_cfg.get('ranks_top', None) ranks_bot = review_cfg.get('ranks_bot', None) # Construct K-broken graph edges = [] if ranks_bot is None: ranks_bot = 0 for count, cm in enumerate(cm_list): score_list = cm.annot_score_list rank_list = ut.argsort(score_list)[::-1] sortx = ut.argsort(rank_list) top_sortx = sortx[:ranks_top] bot_sortx = sortx[len(sortx) - ranks_bot :] short_sortx = ut.unique(top_sortx + bot_sortx) daid_list = ut.take(cm.daid_list, short_sortx) for daid in daid_list: u, v = (cm.qaid, daid) if v < u: u, v = v, u edges.append((u, v)) return edges
def get_name_image_closure(self): ibs = self._ibs aids = self.aids old_aids = [] while len(old_aids) != len(aids): old_aids = aids gids = ut.unique(ibs.get_annot_gids(aids)) other_aids = list(set(ut.flatten(ibs.get_image_aids(gids)))) other_nids = list(set(ibs.get_annot_nids(other_aids))) aids = ut.flatten(ibs.get_name_aids(other_nids)) return aids
def newFileDialog(directory_, other_sidebar_dpaths=[], use_sidebar_cwd=True): qdlg = QtGui.QFileDialog() sidebar_urls = qdlg.sidebarUrls()[:] if use_sidebar_cwd: sidebar_urls.append(QtCore.QUrl.fromLocalFile(os.getcwd())) if directory_ is not None: sidebar_urls.append(QtCore.QUrl.fromLocalFile(directory_)) sidebar_urls.extend(list(map(QtCore.QUrl.fromUserInput, other_sidebar_dpaths))) sidebar_urls = ut.unique(sidebar_urls) #print('sidebar_urls = %r' % (sidebar_urls,)) qdlg.setSidebarUrls(sidebar_urls) return qdlg
def _set_dialog_sidebar( qdlg, directory_=None, use_sidebar_cwd=True, other_sidebar_dpaths=[] ): sidebar_urls = qdlg.sidebarUrls()[:] if use_sidebar_cwd: sidebar_urls.append(QtCore.QUrl.fromLocalFile(os.getcwd())) if directory_ is not None: sidebar_urls.append(QtCore.QUrl.fromLocalFile(directory_)) sidebar_urls.extend(list(map(QtCore.QUrl.fromUserInput, other_sidebar_dpaths))) sidebar_urls = ut.unique(sidebar_urls) # print('sidebar_urls = %r' % (sidebar_urls,)) qdlg.setSidebarUrls(sidebar_urls)
def newFileDialog(directory_, other_sidebar_dpaths=[], use_sidebar_cwd=True): qdlg = QtGui.QFileDialog() sidebar_urls = qdlg.sidebarUrls()[:] if use_sidebar_cwd: sidebar_urls.append(QtCore.QUrl.fromLocalFile(os.getcwd())) if directory_ is not None: sidebar_urls.append(QtCore.QUrl.fromLocalFile(directory_)) sidebar_urls.extend( list(map(QtCore.QUrl.fromUserInput, other_sidebar_dpaths))) sidebar_urls = ut.unique(sidebar_urls) #print('sidebar_urls = %r' % (sidebar_urls,)) qdlg.setSidebarUrls(sidebar_urls) return qdlg
def aliases(repo): aliases = [] if repo._modname is not None: aliases.append(repo._modname) aliases.extend(repo._modname_hints[:]) if repo.dpath and exists(repo.dpath): reponame = repo._find_modname_from_repo() if reponame is not None: aliases.append(reponame) aliases.append(repo.reponame) import utool as ut aliases = ut.unique(aliases) return aliases
def color_nodes(graph, labelattr='label'): """ Colors edges and nodes by nid """ import plottool as pt import utool as ut import networkx as nx node_to_lbl = nx.get_node_attributes(graph, labelattr) unique_lbls = ut.unique(node_to_lbl.values()) ncolors = len(unique_lbls) if (ncolors) == 1: unique_colors = [pt.NEUTRAL_BLUE] else: unique_colors = pt.distinct_colors(ncolors) # Find edges and aids strictly between two nids lbl_to_color = dict(zip(unique_lbls, unique_colors)) node_to_color = {node: lbl_to_color[lbl] for node, lbl in node_to_lbl.items()} nx.set_node_attributes(graph, 'color', node_to_color) ut.nx_ensure_agraph_color(graph)
def expand_input(inputs, index, inplace=False): """ Pushes the rootmost inputs all the way up to the sources of the graph CommandLine: python -m dtool.input_helpers expand_input Example: >>> # ENABLE_DOCTEST >>> from dtool.input_helpers import * # NOQA >>> from dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc4() >>> inputs = depc['smk_match'].rootmost_inputs >>> inputs = depc['neighbs'].rootmost_inputs >>> print('(pre-expand) inputs = %r' % (inputs,)) >>> index = 'indexer' >>> inputs2 = inputs.expand_input(index) >>> print('(post-expand) inputs2 = %r' % (inputs2,)) >>> assert 'indexer' in str(inputs), 'missing indexer1' >>> assert 'indexer' not in str(inputs2), ( >>> '(2) unexpected indexer in %s' % (inputs2,)) """ if isinstance(index, six.string_types): index_list = ut.where( [rmi.tablename == index for rmi in inputs.rmi_list]) if len(index_list) == 0: index = 0 else: index = index_list[0] rmi = inputs.rmi_list[index] parent_level = rmi.parent_level() if len(parent_level) == 0: #raise AssertionError('no parents to expand') new_rmi_list = inputs.rmi_list[:] else: new_rmi_list = ut.insert_values(inputs.rmi_list, index, parent_level, inplace) new_rmi_list = ut.unique(new_rmi_list) if inplace: inputs.rmi_list = new_rmi_list new_inputs = inputs else: new_inputs = TableInput(new_rmi_list, inputs.exi_graph, inputs.table) return new_inputs
def append_annot_case_tags(ibs, aid_list, tag_list): """ Generally appends tags to annotations. Careful not to introduce too many random tags. Maybe we should just let that happen and introduce tag-aliases Note: this is more of a set add rather than a list append TODO: remove """ # Ensure each item is a list #tags_list = [tag if isinstance(tag, list) else [tag] for tag in tag_list] if isinstance(tag_list, six.string_types): # Apply single tag to everybody tag_list = [tag_list] * len(aid_list) tags_list = [ut.ensure_iterable(tag) for tag in tag_list] text_list = ibs.get_annot_tag_text(aid_list) orig_tags_list = [[] if note is None else _parse_tags(note) for note in text_list] new_tags_list = [ut.unique(t1 + t2) for t1, t2 in zip(tags_list, orig_tags_list)] ibs.set_annot_case_tags(aid_list, new_tags_list)
def flat_compute_order(inputs): """ This is basically the scheduler TODO: We need to verify the correctness of this logic. It seems to not be deterministic between versions of python. CommandLine: python -m dtool.input_helpers flat_compute_order Example: >>> # xdoctest: +REQUIRES(--fixme) >>> from wbia.dtool.input_helpers import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc4() >>> inputs = depc['feat'].rootmost_inputs.total_expand() >>> flat_compute_order = inputs.flat_compute_order() >>> result = ut.repr2(flat_compute_order) ... >>> print(result) [chip[t, t:1, 1:1], probchip[t, t:1, 1:1], feat[t, t:1]] """ # Compute the order in which all noes must be evaluated import networkx as nx # NOQA ordered_compute_nodes = [ rmi.compute_order() for rmi in inputs.rmi_list ] flat_node_order_ = ut.unique(ut.flatten(ordered_compute_nodes)) rgraph = inputs.exi_graph.reverse() toprank = ut.nx_topsort_rank(rgraph, flat_node_order_) sortx = ut.argsort(toprank)[::-1] flat_compute_order = ut.take(flat_node_order_, sortx) # Inputs are pre-computed. for rmi in inputs.rmi_list: try: flat_compute_order.remove(rmi.node) except ValueError as ex: ut.printex(ex, 'something is wrong', keys=['rmi.node']) raise return flat_compute_order
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.dict_str(grouped, nl=4)) keytoid = {} for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.repr4(grouped, nl=4)) keytoid = { } for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def get_annotmatch_rowids_from_aid(ibs, aid_list, eager=True, nInput=None, force_method=None): """ Undirected version Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m wbia.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid python -m wbia.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show Example: >>> # DISABLE_DOCTEST >>> from wbia.annotmatch_funcs import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='testdb1') >>> ut.exec_funckw(ibs.get_annotmatch_rowids_from_aid, globals()) >>> aid_list = ibs.get_valid_aids()[0:4] >>> annotmatch_rowid_list = ibs.get_annotmatch_rowids_from_aid(aid_list, >>> eager, nInput) >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),)) >>> print(result) """ # from wbia.control import manual_annotmatch_funcs if nInput is None: nInput = len(aid_list) if nInput == 0: return [] rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list) rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list) annotmatch_rowid_list = [ ut.unique(ut.flatten(p)) for p in zip(rowids1, rowids2) ] # Ensure funciton output is consistent annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def fix_path(r): """ Removes duplicates from the path Variable """ PATH_SEP = os.path.pathsep pathstr = robos.get_env_var('PATH') import utool as ut pathlist = ut.unique(pathstr.split(PATH_SEP)) new_path = '' failed_bit = False for p in pathlist: if os.path.exists(p): new_path = new_path + p + PATH_SEP elif p == '': pass elif p.find('%') > -1 or p.find('$') > -1: print('PATH=%s has a envvar. Not checking existance' % p) new_path = new_path + p + PATH_SEP else: print('PATH=%s does not exist!!' % p) failed_bit = True #remove trailing semicolons if failed_bit: ans = input('Should I overwrite the path? yes/no?') if ans == 'yes': failed_bit = False if len(new_path) > 0 and new_path[-1] == PATH_SEP: new_path = new_path[0:-1] if failed_bit is True: print("Path FIXING Failed. A Good path should be: \n%s" % new_path) print("\n\n====\n\n The old path was:\n%s" % pathstr) elif pathstr == new_path: print("The path was already clean") else: robos.set_env_var('PATH', new_path)
def get_dbinfo( ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None, aids=None, ): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: SeeAlso: python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all CommandLine: python -m wbia.other.dbinfo --exec-get_dbinfo:0 python -m wbia.other.dbinfo --test-get_dbinfo:1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> defaultdb = 'testdb1' >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = wbia.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from wbia.expt import cfghelpers >>> #from wbia.expt import annotation_configs >>> #from wbia.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> verbose = True >>> short = True >>> #ibs = wbia.opendb(db='GZ_ALL') >>> #ibs = wbia.opendb(db='PZ_Master0') >>> ibs = wbia.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = wbia.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... if aids is not None: aid_list = aids # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,)) from wbia.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list ) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) # aid_list = if verbose: logger.info('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) # associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation valid_images = ibs.images(valid_gids) valid_annots = ibs.annots(valid_aids) # Image info if verbose: logger.info('Checking Image Info') gx2_aids = valid_images.aids if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.repr4( ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True ) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: logger.info('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) if False: # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from wbia.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) config = {'seconds_thresh': 4 * 60 * 60} flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences( ibs, gid_list, config=config, verbose=False ) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = { oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items() } return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1 } nid2_occurx_resight = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1 } singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats( list(map(len, singlesight_encounters)), use_median=True, use_sum=True ) resight_name_stats = ut.get_stats( list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True ) # Encounter Info def break_annots_into_encounters(aids): from wbia.algo.preproc import occurrence_blackbox import datetime thresh_sec = datetime.timedelta(minutes=30).seconds posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids)) # latlons = ibs.get_annot_image_gps(aids) labels = occurrence_blackbox.cluster_timespace2( posixtimes, None, thresh_sec=thresh_sec ) return labels # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()] # ut.square_pdist(ave_enc_time) try: am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[ 0 ] aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids) undirected_tags = ibs.get_aidpair_tags( aid_pairs.T[0], aid_pairs.T[1], directed=False ) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) except Exception: pair_tag_info = {} # logger.info(ut.repr2(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: logger.info('Checking Annot Species') unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots)) species_list = valid_annots.species_texts species2_annots = valid_annots.group_items(valid_annots.species_texts) species2_nAids = {key: len(val) for key, val in species2_annots.items()} if verbose: logger.info('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: logger.info('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: logger.info('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = collections.OrderedDict( [ ('max', wh_list.max(0)), ('min', wh_list.min(0)), ('mean', wh_list.mean(0)), ('std', wh_list.std(0)), ] ) def arr2str(var): return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']' ret = ',\n '.join( ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()] ) return '{\n ' + ret + '\n}' logger.info('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: logger.info('Building Stats String') multiton_stats = ut.repr3( ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True ) # Time stats unixtime_list = valid_images.unixtime2 # valid_unixtime_list = [time for time in unixtime_list if time != -1] # unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda: 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if max_age is None: max_age = min_age if min_age is None: min_age = max_age if max_age is None and min_age is None: logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,)) age_dict['UNKNOWN'] += 1 elif (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (max_age is None or 36 <= max_age): age_dict['Adult'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str( set(annot_sextext_list) - set(sex_keys) ) sextext2_nAnnots = ut.odict( [(key, len(sextext2_aids.get(key, []))) for key in sex_keys] ) # Filter 0's sextext2_nAnnots = { key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0 } return sextext2_nAnnots def get_annot_qual_stats(ibs, aid_list): annots = ibs.annots(aid_list) qualtext2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.quality_texts)), list(ibs.const.QUALITY_TEXT_TO_INT.keys()), ) return qualtext2_nAnnots def get_annot_viewpoint_stats(ibs, aid_list): annots = ibs.annots(aid_list) viewcode2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.viewpoint_code)), list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None], ) return viewcode2_nAnnots if verbose: logger.info('Checking Other Annot Stats') qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids) viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: logger.info('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags) contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags) contributor_tag_to_qualstats = { key: get_annot_qual_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_viewstats = { key: get_annot_viewpoint_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_nImages = { key: len(val) for key, val in six.iteritems(contributor_tag_to_gids) } contributor_tag_to_nAnnots = { key: len(val) for key, val in six.iteritems(contributor_tag_to_aids) } if verbose: logger.info('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_annots) num_annots = len(valid_aids) if with_bytes: if verbose: logger.info('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: logger.info('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_annots) _num_names_total_check = ( num_names_singleton + num_names_unassociated + num_names_multiton ) _num_annots_total_check = ( num_unknown_annots + num_singleton_annots + num_multiton_annots ) assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' # if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex( ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ], ) raise # Get contributor statistics contributor_rowids = ibs.get_valid_contributor_rowids() num_contributors = len(contributor_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.repr2(dict_, si=True) return align2(str_) header_block_lines = [('+============================')] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = ( [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] ) name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = ( [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] ) occurrence_block_lines = ( [ ('--' * num_tabs), # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] ) annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = ( [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] ) contributor_block_lines = ( [ '# Images per contributor = ' + align_dict2(contributor_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contributor_tag_to_nAnnots), '# Quality per contributor = ' + ut.repr2(contributor_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.repr2(contributor_tag_to_viewstats, sorted_=True), ] if with_contrib else [] ) img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), # ('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contributor_block_lines + imgsize_stat_lines + [('L============================')] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: logger.info(info_str2) locals_ = locals() return locals_
def bigcache_vsone(qreq_, hyper_params): """ Cached output of one-vs-one matches >>> from wbia.scripts.script_vsone import * # NOQA >>> self = OneVsOneProblem() >>> qreq_ = self.qreq_ >>> hyper_params = self.hyper_params """ import vtool as vt import wbia # Get a set of training pairs ibs = qreq_.ibs cm_list = qreq_.execute() infr = wbia.AnnotInference.from_qreq_(qreq_, cm_list, autoinit=True) # Per query choose a set of correct, incorrect, and random training pairs aid_pairs_ = infr._cm_training_pairs( rng=np.random.RandomState(42), **hyper_params.pair_sample ) aid_pairs_ = vt.unique_rows(np.array(aid_pairs_), directed=False).tolist() pb_aid_pairs_ = photobomb_samples(ibs) # TODO: try to add in more non-comparable samples aid_pairs_ = pb_aid_pairs_ + aid_pairs_ aid_pairs_ = vt.unique_rows(np.array(aid_pairs_)) # ====================================== # Compute one-vs-one scores and local_measures # ====================================== # Prepare lazy attributes for annotations qreq_ = infr.qreq_ ibs = qreq_.ibs qconfig2_ = qreq_.extern_query_config2 dconfig2_ = qreq_.extern_data_config2 qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_) dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_) # Remove any pairs missing features if dannot_cfg == qannot_cfg: unique_annots = ibs.annots(np.unique(np.array(aid_pairs_)), config=dannot_cfg) bad_aids = unique_annots.compress(~np.array(unique_annots.num_feats) > 0).aids bad_aids = set(bad_aids) else: annots1_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 0)), config=qannot_cfg) annots2_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 1)), config=dannot_cfg) bad_aids1 = annots1_.compress(~np.array(annots1_.num_feats) > 0).aids bad_aids2 = annots2_.compress(~np.array(annots2_.num_feats) > 0).aids bad_aids = set(bad_aids1 + bad_aids2) subset_idxs = np.where( [not (a1 in bad_aids or a2 in bad_aids) for a1, a2 in aid_pairs_] )[0] # Keep only a random subset if hyper_params.subsample: rng = np.random.RandomState(3104855634) num_max = hyper_params.subsample if num_max < len(subset_idxs): subset_idxs = rng.choice(subset_idxs, size=num_max, replace=False) subset_idxs = sorted(subset_idxs) # Take the current selection aid_pairs = ut.take(aid_pairs_, subset_idxs) if True: # NEW WAY config = hyper_params.vsone_assign # TODO: ensure annot probs like chips and features can be appropriately # set via qreq_ config or whatever matches = infr.exec_vsone_subset(aid_pairs, config=config) else: query_aids = ut.take_column(aid_pairs, 0) data_aids = ut.take_column(aid_pairs, 1) # OLD WAY # Determine a unique set of annots per config configured_aids = ut.ddict(set) configured_aids[qannot_cfg].update(query_aids) configured_aids[dannot_cfg].update(data_aids) # Make efficient annot-object representation configured_obj_annots = {} for config, aids in configured_aids.items(): annots = ibs.annots(sorted(list(aids)), config=config) configured_obj_annots[config] = annots annots1 = configured_obj_annots[qannot_cfg].loc(query_aids) annots2 = configured_obj_annots[dannot_cfg].loc(data_aids) # Get hash based on visual annotation appearence of each pair # as well as algorithm configurations used to compute those properties qvuuids = annots1.visual_uuids dvuuids = annots2.visual_uuids qcfgstr = annots1._config.get_cfgstr() dcfgstr = annots2._config.get_cfgstr() annots_cfgstr = ut.hashstr27(qcfgstr) + ut.hashstr27(dcfgstr) vsone_uuids = [ ut.combine_uuids(uuids, salt=annots_cfgstr) for uuids in ut.ProgIter( zip(qvuuids, dvuuids), length=len(qvuuids), label='hashing ids' ) ] # Combine into a big cache for the entire 1-v-1 matching run big_uuid = ut.hashstr_arr27(vsone_uuids, '', pathsafe=True) cacher = ut.Cacher('vsone_v7', cfgstr=str(big_uuid), appname='vsone_rf_train') cached_data = cacher.tryload() if cached_data is not None: # Caching doesn't work 100% for PairwiseMatch object, so we need to do # some postprocessing configured_lazy_annots = ut.ddict(dict) for config, annots in configured_obj_annots.items(): annot_dict = configured_lazy_annots[config] for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'): annot_dict[_annot.aid] = _annot._make_lazy_dict() # Extract pairs of annot objects (with shared caches) lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids) lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids) # Create a set of PairwiseMatches with the correct annot properties matches = [ vt.PairwiseMatch(annot1, annot2) for annot1, annot2 in zip(lazy_annots1, lazy_annots2) ] # Updating a new matches dictionary ensure the annot1/annot2 properties # are set correctly for key, cached_matches in list(cached_data.items()): fixed_matches = [match.copy() for match in matches] for fixed, internal in zip(fixed_matches, cached_matches): dict_ = internal.__dict__ ut.delete_dict_keys(dict_, ['annot1', 'annot2']) fixed.__dict__.update(dict_) cached_data[key] = fixed_matches else: cached_data = vsone_( qreq_, query_aids, data_aids, qannot_cfg, dannot_cfg, configured_obj_annots, hyper_params, ) cacher.save(cached_data) # key_ = 'SV_LNBNN' key_ = 'RAT_SV' # for key in list(cached_data.keys()): # if key != 'SV_LNBNN': # del cached_data[key] matches = cached_data[key_] return matches, infr
def remerge_subset(): """ Assumes ibs1 is an updated subset of ibs2. Re-merges ibs1 back into ibs2. TODO: annotmatch table must have non-directional edges for this to work. I.e. u < v Ignore: # Ensure annotmatch and names are up to date with staging # Load graph import ibei ibs = wbia.opendb('PZ_PB_RF_TRAIN') infr = wbia.AnnotInference(aids='all', ibs=ibs, verbose=3) infr.reset_feedback('staging', apply=True) infr.relabel_using_reviews() # Check deltas infr.wbia_name_group_delta_info() infr.wbia_delta_info() # Write if it looks good infr.write_wbia_annotmatch_feedback() infr.write_wbia_name_assignment() Ignore: import wbia ibs = wbia.opendb('PZ_Master1') infr = wbia.AnnotInference(ibs, 'all') infr.reset_feedback('annotmatch', apply=True) CommandLine: python -m wbia.dbio.export_subset remerge_subset """ import wbia ibs1 = wbia.opendb('PZ_PB_RF_TRAIN') ibs2 = wbia.opendb('PZ_Master1') gids1, gids2 = ibs1.images(), ibs2.images() idxs1, idxs2 = ut.isect_indices(gids1.uuids, gids2.uuids) isect_gids1, isect_gids2 = gids1.take(idxs1), gids2.take(idxs2) assert all( set.issubset(set(a1), set(a2)) for a1, a2 in zip(isect_gids1.annot_uuids, isect_gids2.annot_uuids)) annot_uuids = ut.flatten(isect_gids1.annot_uuids) # aids1 = ibs1.annots(ibs1.get_annot_aids_from_uuid(annot_uuids), asarray=True) # aids2 = ibs2.annots(ibs2.get_annot_aids_from_uuid(annot_uuids), asarray=True) aids1 = ibs1.annots(uuids=annot_uuids, asarray=True) aids2 = ibs2.annots(uuids=annot_uuids, asarray=True) import numpy as np to_aids2 = dict(zip(aids1, aids2)) # to_aids1 = dict(zip(aids2, aids1)) # Step 1) Update individual annot properties # These annots need updates # np.where(aids1.visual_uuids != aids2.visual_uuids) # np.where(aids1.semantic_uuids != aids2.semantic_uuids) annot_unary_props = [ # 'yaws', 'bboxes', 'thetas', 'qual', 'species', 'unary_tags'] 'yaws', 'bboxes', 'thetas', 'qual', 'species', 'case_tags', 'multiple', 'age_months_est_max', 'age_months_est_min', # 'sex_texts' ] to_change = {} for key in annot_unary_props: prop1 = getattr(aids1, key) prop2 = getattr(aids2, key) diff_idxs = set(np.where(prop1 != prop2)[0]) if diff_idxs: diff_prop1 = ut.take(prop1, diff_idxs) diff_prop2 = ut.take(prop2, diff_idxs) logger.info('key = %r' % (key, )) logger.info('diff_prop1 = %r' % (diff_prop1, )) logger.info('diff_prop2 = %r' % (diff_prop2, )) to_change[key] = diff_idxs if to_change: changed_idxs = ut.unique(ut.flatten(to_change.values())) logger.info('Found %d annots that need updated properties' % len(changed_idxs)) logger.info('changing unary attributes: %r' % (to_change, )) if False and ut.are_you_sure('apply change'): for key, idxs in to_change.items(): subaids1 = aids1.take(idxs) subaids2 = aids2.take(idxs) prop1 = getattr(subaids1, key) # prop2 = getattr(subaids2, key) setattr(subaids2, key, prop1) else: logger.info('Annot properties are in sync. Nothing to change') # Step 2) Update annotmatch - pairwise relationships infr1 = wbia.AnnotInference(aids=aids1.aids, ibs=ibs1, verbose=3, autoinit=False) # infr2 = wbia.AnnotInference(aids=ibs2.annots().aids, ibs=ibs2, verbose=3) aids2 = ibs2.get_valid_aids(is_known=True) infr2 = wbia.AnnotInference(aids=aids2, ibs=ibs2, verbose=3) infr2.reset_feedback('annotmatch', apply=True) # map feedback from ibs1 onto ibs2 using ibs2 aids. fb1 = infr1.read_wbia_annotmatch_feedback() fb1_t = {(to_aids2[u], to_aids2[v]): val for (u, v), val in fb1.items()} fb1_df_t = infr2._pandas_feedback_format(fb1_t).drop('am_rowid', axis=1) # Add transformed feedback into ibs2 infr2.add_feedback_from(fb1_df_t) # Now ensure that dummy connectivity exists to preserve origninal names # from wbia.algo.graph import nx_utils # for (u, v) in infr2.find_mst_edges('name_label'): # infr2.draw_aids((u, v)) # cc1 = infr2.pos_graph.connected_to(u) # cc2 = infr2.pos_graph.connected_to(v) # logger.info(nx_utils.edges_cross(infr2.graph, cc1, cc2)) # infr2.neg_redundancy(cc1, cc2) # infr2.pos_redundancy(cc2) infr2.relabel_using_reviews(rectify=True) infr2.apply_nondynamic_update() if False: infr2.wbia_delta_info() infr2.wbia_name_group_delta_info() if len(list(infr2.inconsistent_components())) > 0: raise NotImplementedError('need to fix inconsistencies first') # Make it so it just loops until inconsistencies are resolved infr2.prioritize() infr2.qt_review_loop() else: infr2.write_wbia_staging_feedback() infr2.write_wbia_annotmatch_feedback() infr2.write_wbia_name_assignment() # if False: # # Fix any inconsistency # infr2.start_qt_interface(loop=False) # test_nodes = [5344, 5430, 5349, 5334, 5383, 2280, 2265, 2234, 5399, # 5338, 2654] # import networkx as nx # nx.is_connected(infr2.graph.subgraph(test_nodes)) # # infr = wbia.AnnotInference(aids=test_nodes, ibs=ibs2, verbose=5) # # randomly sample some new labels to verify # import wbia.guitool as gt # from wbia.gui import inspect_gui # gt.ensure_qapp() # ut.qtensure() # old_groups = ut.group_items(name_delta.index.tolist(), name_delta['old_name']) # del old_groups['____'] # new_groups = ut.group_items(name_delta.index.tolist(), name_delta['new_name']) # from wbia.algo.hots import simulate # c = simulate.compare_groups( # list(new_groups.values()), # list(old_groups.values()), # ) # ut.map_vals(len, c) # for aids in c['pred_splits']: # old_nids = ibs2.get_annot_nids(aids) # new_nids = ut.take_column(infr2.gen_node_attrs('name_label', aids), 1) # split_aids = ut.take_column(ut.group_items(aids, new_nids).values(), 0) # aid1, aid2 = split_aids[0:2] # if False: # inspect_gui.show_vsone_tuner(ibs2, aid1, aid2) # infr2.start_qt_interface(loop=False) # if False: # # import wbia # ibs1 = wbia.opendb('PZ_PB_RF_TRAIN') # infr1 = wbia.AnnotInference(aids='all', ibs=ibs1, verbose=3) # infr1.initialize_graph() # # infr1.reset_feedback('staging') # infr1.reset_feedback('annotmatch') # infr1.apply_feedback_edges() # infr1.relabel_using_reviews() # infr1.apply_review_inference() # infr1.start_qt_interface(loop=False) # delta = infr2.match_state_delta() # logger.info('delta = %r' % (delta,)) # infr2.ensure_mst() # infr2.relabel_using_reviews() # infr2.apply_review_inference() # mst_edges = infr2.find_mst_edges() # set(infr2.graph.edges()).intersection(mst_edges) return """
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def estimate_twoday_count(ibs, day1, day2, filter_kw): #gid_list = ibs.get_valid_gids() all_images = ibs.images() dates = [dt.date() for dt in all_images.datetime] date_to_images = all_images.group_items(dates) date_to_images = ut.sort_dict(date_to_images) #date_hist = ut.map_dict_vals(len, date2_gids) #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),)) verbose = 0 visit_dates = [day1, day2] visit_info_list_ = [] for day in visit_dates: images = date_to_images[day] aids = ut.flatten(images.aids) aids = ibs.filter_annots_general(aids, filter_kw=filter_kw, verbose=verbose) nids = ibs.get_annot_name_rowids(aids) grouped_aids = ut.group_items(aids, nids) unique_nids = ut.unique(list(grouped_aids.keys())) if False: aids_list = ut.take(grouped_aids, unique_nids) for aids in aids_list: if len(aids) > 30: break timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list) # Do the five second rule marked_thresh = 5 flags = [] for nid, timedeltas in zip(unique_nids, timedeltas_list): flags.append(timedeltas.max() > marked_thresh) print('Unmarking %d names' % (len(flags) - sum(flags))) unique_nids = ut.compress(unique_nids, flags) grouped_aids = ut.dict_subset(grouped_aids, unique_nids) unique_aids = ut.flatten(list(grouped_aids.values())) info = { 'unique_nids': unique_nids, 'grouped_aids': grouped_aids, 'unique_aids': unique_aids, } visit_info_list_.append(info) # Estimate statistics from ibeis.other import dbinfo aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids') nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) nsight1 = len(nids_day1) nsight2 = len(nids_day2) resight = len(resight_nids) lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight) if False: from ibeis.other import dbinfo print('DAY 1 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1) # NOQA print('DAY 2 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2) # NOQA print('COMBINED STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2) # NOQA print('%d annots on day 1' % (len(aids_day1)) ) print('%d annots on day 2' % (len(aids_day2)) ) print('%d names on day 1' % (nsight1,)) print('%d names on day 2' % (nsight2,)) print('resight = %r' % (resight,)) print('lp_index = %r ± %r' % (lp_index, lp_error)) return nsight1, nsight2, resight, lp_index, lp_error
def ggr_random_name_splits(): """ CommandLine: python -m wbia.viz.viz_graph2 ggr_random_name_splits --show Ignore: sshfs -o idmap=user lev:/ ~/lev Example: >>> # DISABLE_DOCTEST >>> from wbia.viz.viz_graph2 import * # NOQA >>> ggr_random_name_splits() """ import wbia.guitool as gt gt.ensure_qtapp() # nid_list = ibs.get_valid_nids(filter_empty=True) import wbia dbdir = '/media/danger/GGR/GGR-IBEIS' dbdir = (dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')) ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) orig_filter_kw = { 'multiple': None, # 'view': ['right'], # 'minqual': 'good', 'is_known': True, 'min_pername': 2, } orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union( orig_filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, )) orig_all_annots = ibs.annots(orig_aids) orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group( orig_all_annots.nids) # Ensure we get everything orig_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids) ] # pip install quantumrandom if False: import quantumrandom data = quantumrandom.uint16() seed = data.sum() print('seed = %r' % (seed, )) # import Crypto.Random # from Crypto import Random # quantumrandom.get_data() # StrongRandom = Crypto.Random.random.StrongRandom # aes.reseed(3340258) # chars = [str(chr(x)) for x in data.view(np.uint8)] # aes_seed = str('').join(chars) # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator() # aes.reseed(aes_seed) # aes.pseudo_random_data(10) orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258) orig_sample_size = 75 random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs) orig_annot_sample = random_annot_groups[:orig_sample_size] # OOOPS MADE ERROR REDO ---- filter_kw = { 'multiple': None, 'view': ['right'], 'minqual': 'good', 'is_known': True, 'min_pername': 2, } filter_kw_ = ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, ) refiltered_sample = [ ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_) for annot in orig_annot_sample ] is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2 ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok) ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample] # Now compute real sample aids = ibs.filter_annots_general(filter_kw=filter_kw_) all_annots = ibs.annots(aids) unique_nids, grouped_annots_ = all_annots.group(all_annots.nids) grouped_annots = grouped_annots_ # Ensure we get everything # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)] pop = len(grouped_annots) pername_list = ut.lmap(len, grouped_annots) groups = wbia.annots.AnnotGroups(grouped_annots, ibs) match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags] tag_case_hist = ut.dict_hist(ut.flatten(match_tags)) print('name_pop = %r' % (pop, )) print('Annots per Multiton Name' + ut.repr3(ut.get_stats(pername_list, use_median=True))) print('Name Tag Hist ' + ut.repr3(tag_case_hist)) print('Percent Photobomb: %.2f%%' % (tag_case_hist['photobomb'] / pop * 100)) print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100)) # Remove the ok part from this sample remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids) remain_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids) ] sample_size = 75 import vtool as vt vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05) remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots), seed=3340258) remain_sample_size = sample_size - len(ok_part_orig_nids) remain_random_annot_groups = ut.take(remain_grouped_annots, remain_rand_idxs) remain_annot_sample = remain_random_annot_groups[:remain_sample_size] annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample # Filter out all bad parts annot_sample_filter = [ ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)) for annot in annot_sample_nofilter ] annot_sample = annot_sample_filter win = None from wbia.viz import viz_graph2 for annots in ut.InteractiveIter(annot_sample): if win is not None: win.close() win = viz_graph2.make_qt_graph_interface(ibs, aids=annots.aids, init_mode='rereview') print(win) sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs) flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags] print('Using Split and Photobomb') is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only Photobomb') is_positive = ['photobomb' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only SplitCase') is_positive = ['splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95)
def dans_splits(ibs): """ python -m ibeis dans_splits --show Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = dans_splits(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) """ #pair = 9262, 932 dans_aids = [26548, 2190, 9418, 29965, 14738, 26600, 3039, 2742, 8249, 20154, 8572, 4504, 34941, 4040, 7436, 31866, 28291, 16009, 7378, 14453, 2590, 2738, 22442, 26483, 21640, 19003, 13630, 25395, 20015, 14948, 21429, 19740, 7908, 23583, 14301, 26912, 30613, 19719, 21887, 8838, 16184, 9181, 8649, 8276, 14678, 21950, 4925, 13766, 12673, 8417, 2018, 22434, 21149, 14884, 5596, 8276, 14650, 1355, 21725, 21889, 26376, 2867, 6906, 4890, 21524, 6690, 14738, 1823, 35525, 9045, 31723, 2406, 5298, 15627, 31933, 19535, 9137, 21002, 2448, 32454, 12615, 31755, 20015, 24573, 32001, 23637, 3192, 3197, 8702, 1240, 5596, 33473, 23874, 9558, 9245, 23570, 33075, 23721, 24012, 33405, 23791, 19498, 33149, 9558, 4971, 34183, 24853, 9321, 23691, 9723, 9236, 9723, 21078, 32300, 8700, 15334, 6050, 23277, 31164, 14103, 21231, 8007, 10388, 33387, 4319, 26880, 8007, 31164, 32300, 32140] is_hyrbid = [7123, 7166, 7157, 7158, ] # NOQA needs_mask = [26836, 29742] # NOQA justfine = [19862] # NOQA annots = ibs.annots(dans_aids) unique_nids = ut.unique(annots.nids) grouped_aids = ibs.get_name_aids(unique_nids) annot_groups = ibs._annot_groups(grouped_aids) split_props = {'splitcase', 'photobomb'} needs_tag = [len(split_props.intersection(ut.flatten(tags))) == 0 for tags in annot_groups.match_tags] num_needs_tag = sum(needs_tag) num_had_split = len(needs_tag) - num_needs_tag print('num_had_split = %r' % (num_had_split,)) print('num_needs_tag = %r' % (num_needs_tag,)) #all_annot_groups = ibs._annot_groups(ibs.group_annots_by_name(ibs.get_valid_aids())[0]) #all_has_split = [len(split_props.intersection(ut.flatten(tags))) > 0 for tags in all_annot_groups.match_tags] #num_nondan = sum(all_has_split) - num_had_split #print('num_nondan = %r' % (num_nondan,)) from ibeis.algo.graph import graph_iden from ibeis.viz import viz_graph2 import guitool_ibeis as gt import plottool_ibeis as pt pt.qt4ensure() gt.ensure_qtapp() aids_list = ut.compress(grouped_aids, needs_tag) aids_list = [a for a in aids_list if len(a) > 1] print('len(aids_list) = %r' % (len(aids_list),)) for aids in aids_list: infr = graph_iden.AnnotInference(ibs, aids) infr.initialize_graph() win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode='rereview') win.populate_edge_model() win.show() return win assert False
def find_consistent_labeling_old(grouped_oldnames, extra_prefix='_extra_name', verbose=False): import numpy as np import scipy.optimize unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) # TODO: find names that are only used once, and just ignore those for # optimization. # unique_set = set(unique_old_names) oldname_sets = list(map(set, grouped_oldnames)) usage_hist = ut.dict_hist(ut.flatten(oldname_sets)) conflicts = {k for k, v in usage_hist.items() if v > 1} # nonconflicts = {k for k, v in usage_hist.items() if v == 1} conflict_groups = [] orig_idxs = [] assignment = [None] * len(grouped_oldnames) ntrivial = 0 for idx, group in enumerate(grouped_oldnames): if set(group).intersection(conflicts): orig_idxs.append(idx) conflict_groups.append(group) else: ntrivial += 1 if len(group) > 0: h = ut.dict_hist(group) hitems = list(h.items()) hvals = [i[1] for i in hitems] maxval = max(hvals) g = min([k for k, v in hitems if v == maxval]) assignment[idx] = g else: assignment[idx] = None if verbose: print('rectify %d non-trivial groups' % (len(conflict_groups), )) print('rectify %d trivial groups' % (ntrivial, )) num_extra = 0 if len(conflict_groups) > 0: grouped_oldnames_ = conflict_groups unique_old_names = ut.unique(ut.flatten(grouped_oldnames_)) num_new_names = len(grouped_oldnames_) num_old_names = len(unique_old_names) extra_oldnames = [] # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_extra = num_new_names - num_old_names if num_extra > 0: extra_oldnames = [ '%s%d' % ( extra_prefix, count, ) for count in range(num_extra) ] elif num_extra < 0: pass else: extra_oldnames = [] assignable_names = unique_old_names + extra_oldnames total = len(assignable_names) # Allocate assignment matrix # Start with a large negative value indicating # that you must select from your assignments only profit_matrix = -np.ones((total, total), dtype=np.int) * (2 * total) # Populate assignment profit matrix oldname2_idx = ut.make_index_lookup(assignable_names) name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames_] # Initialize base profit for using a previously used name for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] = 1 # Now add in the real profit for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] += freq # Set a small profit for using an extra name extra_colxs = ut.take(oldname2_idx, extra_oldnames) profit_matrix[:, extra_colxs] = 1 # Convert to minimization problem big_value = (profit_matrix.max()) - (profit_matrix.min()) cost_matrix = big_value - profit_matrix # Don't use munkres, it is pure python and very slow. Use scipy instead indexes = list(zip(*scipy.optimize.linear_sum_assignment(cost_matrix))) # Map output to be aligned with input rx2_cx = dict(indexes) assignment_ = [ assignable_names[rx2_cx[rx]] for rx in range(num_new_names) ] # Reintegrate trivial values for idx, g in zip(orig_idxs, assignment_): assignment[idx] = g for idx, val in enumerate(assignment): if val is None: assignment[idx] = '%s%d' % ( extra_prefix, num_extra, ) num_extra += 1 return assignment
def make_name_graph_interaction(ibs, nids=None, aids=None, selected_aids=[], with_all=True, invis_edges=None, ensure_edges=None, use_image=False, temp_nids=None, **kwargs): """ CommandLine: python -m ibeis --tf make_name_graph_interaction --db PZ_MTEST \ --aids=1,2,3,4,5,6,7,8,9 --show python -m ibeis --tf make_name_graph_interaction --db LEWA_splits \ --nids=1 --show --split Example: >>> # DISABLE_DOCTEST >>> from ibeis.viz.viz_graph import * # NOQA >>> import ibeis >>> import plottool as pt >>> exec(ut.execstr_funckw(make_name_graph_interaction), globals()) >>> defaultdb='testdb1' >>> ibs = ibeis.opendb(defaultdb=defaultdb) >>> aids = ut.get_argval('--aids', type_=list, default=None) >>> nids = ut.get_argval('--nids', type_=list, default=ibs.get_valid_nids()[0:5]) >>> nids = None if aids is not None else nids >>> with_all = not ut.get_argflag('--no-with-all') >>> make_name_graph_interaction(ibs, nids, aids, with_all=with_all) >>> #pt.zoom_factory() >>> ut.show_if_requested() """ if aids is None and nids is not None: aids = ut.flatten(ibs.get_name_aids(nids)) elif nids is not None and aids is not None: aids += ibs.get_name_aids(nids) aids = ut.unique(aids) if with_all: nids = ut.unique(ibs.get_annot_name_rowids(aids)) aids = ut.flatten(ibs.get_name_aids(nids)) #aids = aids[0:10] nids = ibs.get_annot_name_rowids(aids) #from ibeis.algo.graph import graph_iden #infr = graph_iden.AnnotInference(aids, nids, temp_nids) # NOQA #import utool #utool.embed() from ibeis.algo.graph import graph_iden infr = graph_iden.AnnotInference(ibs, aids, nids, temp_nids) infr.initialize_graph() #infr.apply_scores() #infr.apply_weights() if ut.get_argflag('--cut'): infr.apply_all() #import guitool as gt #gt.ensure_qtapp() #print('infr = %r' % (infr,)) #win = test_qt_graphs(infr=infr, use_image=use_image) #self = win #gt.qtapp_loop(qwin=win, freq=10) self = AnnotGraphInteraction(infr, selected_aids=selected_aids, use_image=use_image) self.show_page() self.show() return self
def find_consistent_labeling(grouped_oldnames, extra_prefix='_extra_name', verbose=False): r""" Solves a a maximum bipirtite matching problem to find a consistent name assignment that minimizes the number of annotations with different names. For each new grouping of annotations we assign For each group of annotations we must assign them all the same name, either from To reduce the running time Args: gropued_oldnames (list): A group of old names where the grouping is based on new names. For instance: Given: aids = [1, 2, 3, 4, 5] old_names = [0, 1, 1, 1, 0] new_names = [0, 0, 1, 1, 0] The grouping is [[0, 1, 0], [1, 1]] This lets us keep the old names in a split case and re-use exising names and make minimal changes to current annotation names while still being consistent with the new and improved grouping. The output will be: [0, 1] Meaning that all annots in the first group are assigned the name 0 and all annots in the second group are assigned the name 1. References: http://stackoverflow.com/questions/1398822/assignment-problem-numpy CommandLine: python -m ibeis.scripts.name_recitifer find_consistent_labeling Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = testdata_oldnames(25, 15, 5, n_per_incon=5) >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) >>> grouped_oldnames = testdata_oldnames(0, 15, 5, n_per_incon=1) >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) >>> grouped_oldnames = testdata_oldnames(0, 0, 0, n_per_incon=1) >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> ydata = [] >>> xdata = list(range(10, 150, 50)) >>> for x in xdata: >>> print('x = %r' % (x,)) >>> grouped_oldnames = testdata_oldnames(x, 15, 5, n_per_incon=5) >>> t = ut.Timerit(3, verbose=1) >>> for timer in t: >>> with timer: >>> new_names = find_consistent_labeling(grouped_oldnames) >>> ydata.append(t.ave_secs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> pt.qtensure() >>> pt.multi_plot(xdata, [ydata]) >>> ut.show_if_requested() Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) >>> result = ut.repr2(new_names) >>> print(new_names) ['a', 'b', 'e'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['b', 'a', '_extra_name0'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['e'], ['a', 'a', 'b'], [], ['a'], ['d']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['b', 'e', 'a', '_extra_name0', '_extra_name1', 'd'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [[], ['a', 'a'], [], >>> ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['_extra_name0', 'a', '_extra_name1', 'b', '_extra_name2'] """ unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) n_old_names = len(unique_old_names) n_new_names = len(grouped_oldnames) # Initialize assignment to all Nones assignment = [None for _ in range(n_new_names)] if verbose: print('finding maximally consistent labeling') print('n_old_names = %r' % (n_old_names, )) print('n_new_names = %r' % (n_new_names, )) # For each old_name, determine now many new_names use it. oldname_sets = list(map(set, grouped_oldnames)) oldname_usage = ut.dict_hist(ut.flatten(oldname_sets)) # Any name used more than once is a conflict and must be resolved conflict_oldnames = {k for k, v in oldname_usage.items() if v > 1} # Partition into trivial and non-trivial cases nontrivial_oldnames = [] nontrivial_new_idxs = [] trivial_oldnames = [] trivial_new_idxs = [] for new_idx, group in enumerate(grouped_oldnames): if set(group).intersection(conflict_oldnames): nontrivial_oldnames.append(group) nontrivial_new_idxs.append(new_idx) else: trivial_oldnames.append(group) trivial_new_idxs.append(new_idx) # Rectify trivial cases # Any new-name that does not share any of its old-names with other # new-names can be resolved trivially n_trivial_unchanged = 0 n_trivial_ignored = 0 n_trivial_merges = 0 for group, new_idx in zip(trivial_oldnames, trivial_new_idxs): if len(group) > 0: # new-names that use more than one old-name are simple merges h = ut.dict_hist(group) if len(h) > 1: n_trivial_merges += 1 else: n_trivial_unchanged += 1 hitems = list(h.items()) hvals = [i[1] for i in hitems] maxval = max(hvals) g = min([k for k, v in hitems if v == maxval]) assignment[new_idx] = g else: # new-names that use no old-names can be ignored n_trivial_ignored += 1 if verbose: n_trivial = len(trivial_oldnames) n_nontrivial = len(nontrivial_oldnames) print('rectify %d trivial groups' % (n_trivial, )) print(' * n_trivial_unchanged = %r' % (n_trivial_unchanged, )) print(' * n_trivial_merges = %r' % (n_trivial_merges, )) print(' * n_trivial_ignored = %r' % (n_trivial_ignored, )) print('rectify %d non-trivial groups' % (n_nontrivial, )) # Partition nontrivial_oldnames into smaller disjoint sets nontrivial_oldnames_sets = list(map(set, nontrivial_oldnames)) import networkx as nx g = nx.Graph() g.add_nodes_from(range(len(nontrivial_oldnames_sets))) for u, group1 in enumerate(nontrivial_oldnames_sets): rest = nontrivial_oldnames_sets[u + 1:] for v, group2 in enumerate(rest, start=u + 1): if group1.intersection(group2): g.add_edge(u, v) nontrivial_partition = list(nx.connected_components(g)) if verbose: print(' * partitioned non-trivial into %d subgroups' % (len(nontrivial_partition))) part_size_stats = ut.get_stats(map(len, nontrivial_partition)) stats_str = ut.repr2(part_size_stats, precision=2, strkeys=True) print(' * partition size stats = %s' % (stats_str, )) # Rectify nontrivial cases for part_idxs in ut.ProgIter(nontrivial_partition, labels='rectify parts', enabled=verbose): part_oldnames = ut.take(nontrivial_oldnames, part_idxs) part_newidxs = ut.take(nontrivial_new_idxs, part_idxs) # Rectify this part assignment_ = simple_munkres(part_oldnames) for new_idx, new_name in zip(part_newidxs, assignment_): assignment[new_idx] = new_name # Any unassigned name is now given a new unique label with a prefix if extra_prefix is not None: num_extra = 0 for idx, val in enumerate(assignment): if val is None: assignment[idx] = '%s%d' % ( extra_prefix, num_extra, ) num_extra += 1 return assignment
def find_consistent_labeling(grouped_oldnames): """ Solves a a maximum bipirtite matching problem to find a consistent name assignment. Notes: # Install module containing the Hungarian algorithm for matching pip install munkres Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'b', u'c', u'a'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] """ import numpy as np try: import munkres except ImportError: print('Need to install Hungrian algorithm bipartite matching solver.') print('Run:') print('pip install munkres') raise unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) num_new_names = len(grouped_oldnames) num_old_names = len(unique_old_names) extra_oldnames = [] # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_extra = num_new_names - num_old_names if num_extra > 0: extra_oldnames = ['_extra_name%d' % (count,) for count in range(num_extra)] elif num_extra < 0: pass else: extra_oldnames = [] assignable_names = unique_old_names + extra_oldnames total = len(assignable_names) # Allocate assignment matrix profit_matrix = np.zeros((total, total), dtype=np.int) # Populate assignment profit matrix oldname2_idx = ut.make_index_lookup(assignable_names) name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames] for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] += freq # Add extra profit for using a previously used name profit_matrix[profit_matrix > 0] += 2 # Add small profit for using an extra name extra_colxs = ut.take(oldname2_idx, extra_oldnames) profit_matrix[:, extra_colxs] += 1 # Convert to minimization problem big_value = (profit_matrix.max()) cost_matrix = big_value - profit_matrix m = munkres.Munkres() indexes = m.compute(cost_matrix) # Map output to be aligned with input rx2_cx = dict(indexes) assignment = [assignable_names[rx2_cx[rx]] for rx in range(num_new_names)] return assignment
def make_graph(infr, show=False): import networkx as nx import itertools cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() thresh = infr.choose_thresh() # Simply cut any edge with a weight less than a threshold qaid_list = [cm.qaid for cm in cm_list] postcut = prob_names > thresh qxs, nxs = np.where(postcut) if False: kw = dict(precision=2, max_line_width=140, suppress_small=True) print( ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw))) print( ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw))) matching_qaids = ut.take(qaid_list, qxs) matched_nids = ut.take(unique_nids, nxs) qreq_ = infr.qreq_ nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist()) if not hasattr(qreq_, 'dnids'): qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids) qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids) dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids) grouped_aids = dnid2_daids.values() matched_daids = ut.take(dnid2_daids, matched_nids) name_cliques = [ list(itertools.combinations(aids, 2)) for aids in grouped_aids ] aid_matches = [ list(ut.product([qaid], daids)) for qaid, daids in zip(matching_qaids, matched_daids) ] graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(ut.flatten(name_cliques)) graph.add_edges_from(ut.flatten(aid_matches)) #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list))) name_nodes = [('nid', l) for l in qreq_.dnids] db_aid_nid_edges = list(zip(qreq_.daids, name_nodes)) #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids])) #G = nx.Graph() #G.add_nodes_from(matchless_quries) #G.add_edges_from(db_aid_nid_edges) #G.add_edges_from(query_aid_nid_edges) graph.add_edges_from(db_aid_nid_edges) if infr.user_feedback is not None: user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) p_bg = 0.0 part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 for aid1, aid2, p_same in zip(user_feedback['aid1'], user_feedback['aid2'], p_same_list): if p_same > .5: if not graph.has_edge(aid1, aid2): graph.add_edge(aid1, aid2) else: if graph.has_edge(aid1, aid2): graph.remove_edge(aid1, aid2) if show: import plottool as pt nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK for aid in qreq_.daids}) nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE for aid in qreq_.qaids}) nx.set_node_attributes( graph, 'color', { aid: pt.LIGHT_PURPLE for aid in np.intersect1d(qreq_.qaids, qreq_.daids) }) nx.set_node_attributes( graph, 'label', {node: 'n%r' % (node[1], ) for node in name_nodes}) nx.set_node_attributes( graph, 'color', {node: pt.LIGHT_GREEN for node in name_nodes}) if show: import plottool as pt pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False) return graph
def intraoccurrence_connected(): r""" CommandLine: python -m ibeis.scripts.specialdraw intraoccurrence_connected --show python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = intraoccurrence_connected() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import plottool as pt from ibeis.viz import viz_graph import networkx as nx pt.ensure_pylab_qt4() ibs = ibeis.opendb(defaultdb='PZ_Master1') nid2_aid = { #4880: [3690, 3696, 3703, 3706, 3712, 3721], 4880: [3690, 3696, 3703], 6537: [3739], 6653: [7671], 6610: [7566, 7408], #6612: [7664, 7462, 7522], #6624: [7465, 7360], #6625: [7746, 7383, 7390, 7477, 7376, 7579], 6630: [7586, 7377, 7464, 7478], #6677: [7500] } nid2_dbaids = {4880: [33, 6120, 7164], 6537: [7017, 7206], 6653: [7660]} if ut.get_argflag('--small') or ut.get_argflag('--smaller'): del nid2_aid[6630] del nid2_aid[6537] del nid2_dbaids[6537] if ut.get_argflag('--smaller'): nid2_dbaids[4880].remove(33) nid2_aid[4880].remove(3690) nid2_aid[6610].remove(7408) #del nid2_aid[4880] #del nid2_dbaids[4880] aids = ut.flatten(nid2_aid.values()) temp_nids = [1] * len(aids) postcut = ut.get_argflag('--postcut') aids_list = ibs.group_annots_by_name(aids)[0] ensure_edges = 'all' if True or not postcut else None unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups( ibs, aids_list, #invis_edges=invis_edges, ensure_edges=ensure_edges, temp_nids=temp_nids) viz_graph.color_by_nids(unlabeled_graph, unique_nids=[1] * len(list(unlabeled_graph.nodes()))) viz_graph.ensure_node_images(ibs, unlabeled_graph) nx.set_node_attributes(unlabeled_graph, 'shape', 'rect') #unlabeled_graph = unlabeled_graph.to_undirected() # Find the "database exemplars for these annots" if False: gt_aids = ibs.get_annot_groundtruth(aids) gt_aids = [ut.setdiff(s, aids) for s in gt_aids] dbaids = ut.unique(ut.flatten(gt_aids)) dbaids = ibs.filter_annots_general(dbaids, minqual='good') ibs.get_annot_quality_texts(dbaids) else: dbaids = ut.flatten(nid2_dbaids.values()) exemplars = nx.DiGraph() #graph = exemplars # NOQA exemplars.add_nodes_from(dbaids) def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}): edge_list = ut.upper_diag_self_prodx(nodes) graph.add_edges_from(edge_list, **edgeattrs) return edge_list for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)): add_clique(exemplars, aids_) viz_graph.ensure_node_images(ibs, exemplars) viz_graph.color_by_nids(exemplars, ibs=ibs) nx.set_node_attributes(unlabeled_graph, 'framewidth', False) nx.set_node_attributes(exemplars, 'framewidth', 4.0) nx.set_node_attributes(unlabeled_graph, 'group', 'unlab') nx.set_node_attributes(exemplars, 'group', 'exemp') #big_graph = nx.compose_all([unlabeled_graph]) big_graph = nx.compose_all([exemplars, unlabeled_graph]) # add sparse connections from unlabeled to exemplars import numpy as np rng = np.random.RandomState(0) if True or not postcut: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes()))) flags = np.logical_or(exnids == nid_, flags) exmatches = ut.compress(list(exemplars.nodes()), flags) big_graph.add_edges_from(list(ut.product([aid_], exmatches)), color=pt.ORANGE, implicit=True) else: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 exmatches = ut.compress(list(exemplars.nodes()), flags) nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(exmatches)) exmatches = ut.compress(exmatches, exnids == nid_) big_graph.add_edges_from(list(ut.product([aid_], exmatches))) pass nx.set_node_attributes(big_graph, 'shape', 'rect') #if False and postcut: # ut.nx_delete_node_attr(big_graph, 'nid') # ut.nx_delete_edge_attr(big_graph, 'color') # viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs) # viz_graph.color_by_nids(big_graph, ibs=ibs) # big_graph = big_graph.to_undirected() layoutkw = { 'sep': 1 / 5, 'prog': 'neato', 'overlap': 'false', #'splines': 'ortho', 'splines': 'spline', } as_directed = False #as_directed = True #hacknode = True hacknode = 0 graph = big_graph ut.nx_ensure_agraph_color(graph) if hacknode: nx.set_edge_attributes(graph, 'taillabel', {e: str(e[0]) for e in graph.edges()}) nx.set_edge_attributes(graph, 'headlabel', {e: str(e[1]) for e in graph.edges()}) explicit_graph = pt.get_explicit_graph(graph) _, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, inplace=True, **layoutkw) if ut.get_argflag('--smaller'): graph.node[7660]['pos'] = np.array([550, 350]) graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) elif ut.get_argflag('--small'): graph.node[7660]['pos'] = np.array([750, 350]) graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400]) graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) if not postcut: #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw, # as_directed=False) #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw, # as_directed=as_directed, hacknode=hacknode) pt.show_nx(graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) else: #explicit_graph = pt.get_explicit_graph(graph) #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, # **layoutkw) #layout_info['edge']['alpha'] = .8 #pt.apply_graph_layout_attrs(graph, layout_info) #graph_layout_attrs = layout_info['graph'] ##edge_layout_attrs = layout_info['edge'] ##node_layout_attrs = layout_info['node'] #for key, vals in layout_info['node'].items(): # #print('[special] key = %r' % (key,)) # nx.set_node_attributes(graph, key, vals) #for key, vals in layout_info['edge'].items(): # #print('[special] key = %r' % (key,)) # nx.set_edge_attributes(graph, key, vals) #nx.set_edge_attributes(graph, 'alpha', .8) #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'polyline' # graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'line' cut_graph = graph.copy() edge_list = list(cut_graph.edges()) edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list)) cut_flags = edge_nids.T[0] != edge_nids.T[1] cut_edges = ut.compress(edge_list, cut_flags) cut_graph.remove_edges_from(cut_edges) ut.nx_delete_node_attr(cut_graph, 'nid') viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs) #ut.nx_get_default_node_attributes(exemplars, 'color', None) ut.nx_delete_node_attr(cut_graph, 'color', nodes=unlabeled_graph.nodes()) aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color', None) nid2_colors = ut.group_items(aid2_color.values(), ibs.get_annot_nids(aid2_color.keys())) nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors) nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors) #for val in nid2_colors.values(): # assert len(val) <= 1 # Get initial colors nid2_color_ = { nid: colors_[0] for nid, colors_ in nid2_colors.items() if len(colors_) == 1 } graph = cut_graph viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_) nx.set_node_attributes(cut_graph, 'framewidth', 4) pt.show_nx(cut_graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) pt.zoom_factory()
def make_graph(infr, show=False): import networkx as nx import itertools cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() thresh = infr.choose_thresh() # Simply cut any edge with a weight less than a threshold qaid_list = [cm.qaid for cm in cm_list] postcut = prob_names > thresh qxs, nxs = np.where(postcut) if False: kw = dict(precision=2, max_line_width=140, suppress_small=True) print(ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw))) print(ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw))) matching_qaids = ut.take(qaid_list, qxs) matched_nids = ut.take(unique_nids, nxs) qreq_ = infr.qreq_ nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist()) if not hasattr(qreq_, 'dnids'): qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids) qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids) dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids) grouped_aids = dnid2_daids.values() matched_daids = ut.take(dnid2_daids, matched_nids) name_cliques = [list(itertools.combinations(aids, 2)) for aids in grouped_aids] aid_matches = [list(ut.product([qaid], daids)) for qaid, daids in zip(matching_qaids, matched_daids)] graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(ut.flatten(name_cliques)) graph.add_edges_from(ut.flatten(aid_matches)) #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list))) name_nodes = [('nid', l) for l in qreq_.dnids] db_aid_nid_edges = list(zip(qreq_.daids, name_nodes)) #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids])) #G = nx.Graph() #G.add_nodes_from(matchless_quries) #G.add_edges_from(db_aid_nid_edges) #G.add_edges_from(query_aid_nid_edges) graph.add_edges_from(db_aid_nid_edges) if infr.user_feedback is not None: user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) p_bg = 0.0 part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 for aid1, aid2, p_same in zip(user_feedback['aid1'], user_feedback['aid2'], p_same_list): if p_same > .5: if not graph.has_edge(aid1, aid2): graph.add_edge(aid1, aid2) else: if graph.has_edge(aid1, aid2): graph.remove_edge(aid1, aid2) if show: import plottool as pt nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK for aid in qreq_.daids}) nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE for aid in qreq_.qaids}) nx.set_node_attributes(graph, 'color', { aid: pt.LIGHT_PURPLE for aid in np.intersect1d(qreq_.qaids, qreq_.daids)}) nx.set_node_attributes(graph, 'label', {node: 'n%r' % (node[1],) for node in name_nodes}) nx.set_node_attributes(graph, 'color', {node: pt.LIGHT_GREEN for node in name_nodes}) if show: import plottool as pt pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False) return graph
def add_column_names(self, new_colnames): col_name_list = ut.unique(self.col_name_list + new_colnames) self.update_column_names(col_name_list)
def simple_munkres(part_oldnames): """ Defines a munkres problem to solve name rectification. Notes: We create a matrix where each rows represents a group of annotations in the same PCC and each column represents an original name. If there are more PCCs than original names the columns are padded with extra values. The matrix is first initialized to be negative infinity representing impossible assignments. Then for each column representing a padded name, we set we its value to $1$ indicating that each new name could be assigned to a padded name for some small profit. Finally, let $f_{rc}$ be the the number of annotations in row $r$ with an original name of $c$. Each matrix value $(r, c)$ is set to $f_{rc} + 1$ if $f_{rc} > 0$, to represent how much each name ``wants'' to be labeled with a particular original name, and the extra one ensures that these original names are always preferred over padded names. CommandLine: python -m ibeis.scripts.name_recitifer simple_munkres Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> part_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']] >>> new_names = simple_munkres(part_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['b', 'c', 'a'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> part_oldnames = [[], ['a', 'a'], [], >>> ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']] >>> new_names = simple_munkres(part_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) [None, 'a', None, 'b', None] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> part_oldnames = [[], ['b'], ['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(part_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['_extra_name0', 'b', 'a', 'c', 'e'] Profit Matrix b a c e _0 0 -10 -10 -10 -10 1 1 2 -10 -10 -10 1 2 2 2 2 -10 1 3 2 -10 2 -10 1 4 -10 -10 2 3 1 """ import numpy as np import scipy.optimize unique_old_names = ut.unique(ut.flatten(part_oldnames)) num_new_names = len(part_oldnames) num_old_names = len(unique_old_names) # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_pad = max(num_new_names - num_old_names, 0) total = num_old_names + num_pad shape = (total, total) # Allocate assignment matrix. # rows are new-names and cols are old-names. # Initially the profit of any assignment is effectively -inf # This effectively marks all assignments as invalid profit_matrix = np.full(shape, -2 * total, dtype=np.int) # Overwrite valid assignments with positive profits oldname2_idx = ut.make_index_lookup(unique_old_names) name_freq_list = [ut.dict_hist(names) for names in part_oldnames] # Initialize profit of a valid assignment as 1 + freq # This incentivizes using a previously used name for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] = freq + 1 # Set a much smaller profit for using an extra name # This allows the solution to always exist profit_matrix[:, num_old_names:total] = 1 # Convert to minimization problem big_value = (profit_matrix.max()) - (profit_matrix.min()) cost_matrix = big_value - profit_matrix # Use scipy implementation of munkres algorithm. rx2_cx = dict(zip(*scipy.optimize.linear_sum_assignment(cost_matrix))) # Each row (new-name) has now been assigned a column (old-name) # Map this back to the input-space (using None to indicate extras) cx2_name = dict(enumerate(unique_old_names)) if False: import pandas as pd columns = unique_old_names + ['_%r' % x for x in range(num_pad)] print('Profit Matrix') print(pd.DataFrame(profit_matrix, columns=columns)) print('Cost Matrix') print(pd.DataFrame(cost_matrix, columns=columns)) assignment_ = [ cx2_name.get(rx2_cx[rx], None) for rx in range(num_new_names) ] return assignment_
def main(bib_fpath=None): r""" intro point to fixbib script CommmandLine: fixbib python -m fixtex bib python -m fixtex bib --dryrun python -m fixtex bib --dryrun --debug """ if bib_fpath is None: bib_fpath = 'My Library.bib' # DEBUG = ub.argflag('--debug') # Read in text and ensure ascii format dirty_text = ut.readfrom(bib_fpath) from fixtex.fix_tex import find_used_citations, testdata_fpaths if exists('custom_extra.bib'): extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False) parser = bparser.BibTexParser() ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing extra bibtex file') extra_text = ut.readfrom('custom_extra.bib') extra_database = extra_parser.parse(extra_text, partial=False) print('Finished parsing extra') extra_dict = extra_database.get_entry_dict() else: extra_dict = None #udata = dirty_text.decode("utf-8") #dirty_text = udata.encode("ascii", "ignore") #dirty_text = udata # parser = bparser.BibTexParser() # bib_database = parser.parse(dirty_text) # d = bib_database.get_entry_dict() print('BIBTEXPARSER LOAD') parser = bparser.BibTexParser(ignore_nonstandard_types=False, common_strings=True) ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing bibtex file') bib_database = parser.parse(dirty_text, partial=False) print('Finished parsing') bibtex_dict = bib_database.get_entry_dict() old_keys = list(bibtex_dict.keys()) new_keys = [] for key in ub.ProgIter(old_keys, label='fixing keys'): new_key = key new_key = new_key.replace(':', '') new_key = new_key.replace('-', '_') new_key = re.sub('__*', '_', new_key) new_keys.append(new_key) # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict' assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict' for key, new_key in zip(old_keys, new_keys): if key != new_key: entry = bibtex_dict[key] entry['ID'] = new_key bibtex_dict[new_key] = entry del bibtex_dict[key] # The bibtext is now clean. Print it to stdout #print(clean_text) verbose = None if verbose is None: verbose = 1 # Find citations from the tex documents key_list = None if key_list is None: cacher = ub.Cacher('texcite1', enabled=0) data = cacher.tryload() if data is None: fpaths = testdata_fpaths() key_list, inverse = find_used_citations(fpaths, return_inverse=True) # ignore = ['JP', '?', 'hendrick'] # for item in ignore: # try: # key_list.remove(item) # except ValueError: # pass if verbose: print('Found %d citations used in the document' % (len(key_list), )) data = key_list, inverse cacher.save(data) key_list, inverse = data # else: # key_list = None unknown_pubkeys = [] debug_author = ub.argval('--debug-author', default=None) # ./fix_bib.py --debug_author=Kappes if verbose: print('Fixing %d/%d bibtex entries' % (len(key_list), len(bibtex_dict))) # debug = True debug = False if debug_author is not None: debug = False known_keys = list(bibtex_dict.keys()) missing_keys = set(key_list) - set(known_keys) if extra_dict is not None: missing_keys.difference_update(set(extra_dict.keys())) if missing_keys: print('The library is missing keys found in tex files %s' % (ub.repr2(missing_keys), )) # Search for possible typos: candidate_typos = {} sedlines = [] for key in missing_keys: candidates = ut.closet_words(key, known_keys, num=3, subset=True) if len(candidates) > 1: top = candidates[0] if ut.edit_distance(key, top) == 1: # "sed -i -e 's/{}/{}/g' *.tex".format(key, top) import os replpaths = ' '.join( [relpath(p, os.getcwd()) for p in inverse[key]]) sedlines.append("sed -i -e 's/{}/{}/g' {}".format( key, top, replpaths)) candidate_typos[key] = candidates print('Cannot find key = %r' % (key, )) print('Did you mean? %r' % (candidates, )) print('Quick fixes') print('\n'.join(sedlines)) # group by file just = max([0] + list(map(len, missing_keys))) missing_fpaths = [inverse[key] for key in missing_keys] for fpath in sorted(set(ub.flatten(missing_fpaths))): # ut.fix_embed_globals() subkeys = [k for k in missing_keys if fpath in inverse[k]] print('') ut.cprint('--- Missing Keys ---', 'blue') ut.cprint('fpath = %r' % (fpath, ), 'blue') ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'), 'blue') for key in subkeys: print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'), ' '.join(candidate_typos[key]))) # for key in list(bibtex_dict.keys()): if extra_dict is not None: # Extra database takes precidence over regular key_list = list(ut.unique(key_list + list(extra_dict.keys()))) for k, v in extra_dict.items(): bibtex_dict[k] = v full = ub.argflag('--full') for key in key_list: try: entry = bibtex_dict[key] except KeyError: continue self = BibTexCleaner(key, entry, full=full) if debug_author is not None: debug = debug_author in entry.get('author', '') if debug: ut.cprint(' --- ENTRY ---', 'yellow') print(ub.repr2(entry, nl=1)) entry = self.fix() # self.clip_abstract() # self.shorten_keys() # self.fix_authors() # self.fix_year() # old_pubval = self.fix_pubkey() # if old_pubval: # unknown_pubkeys.append(old_pubval) # self.fix_arxiv() # self.fix_general() # self.fix_paper_types() if debug: print(ub.repr2(entry, nl=1)) ut.cprint(' --- END ENTRY ---', 'yellow') bibtex_dict[key] = entry unwanted_keys = set(bibtex_dict.keys()) - set(key_list) if verbose: print('Removing unwanted %d entries' % (len(unwanted_keys))) ut.delete_dict_keys(bibtex_dict, unwanted_keys) if 0: d1 = bibtex_dict.copy() full = True for key, entry in d1.items(): self = BibTexCleaner(key, entry, full=full) pub = self.publication() if pub is None: print(self.entry['ENTRYTYPE']) old = self.fix_pubkey() x1 = self._pubval() x2 = self.standard_pubval(full=full) # if x2 is not None and len(x2) > 5: # print(ub.repr2(self.entry)) if x1 != x2: print('x2 = %r' % (x2, )) print('x1 = %r' % (x1, )) print(ub.repr2(self.entry)) # if 'CVPR' in self.entry.get('booktitle', ''): # if 'CVPR' != self.entry.get('booktitle', ''): # break if old: print('old = %r' % (old, )) d1[key] = self.entry if full: d1 = bibtex_dict.copy() import numpy as np import pandas as pd df = pd.DataFrame.from_dict(d1, orient='index') paged_items = df[~pd.isnull(df['pub_accro'])] has_pages = ~pd.isnull(paged_items['pages']) print('have pages {} / {}'.format(has_pages.sum(), len(has_pages))) print(ub.repr2(paged_items[~has_pages]['title'].values.tolist())) entrytypes = dict(list(df.groupby('pub_type'))) if False: # entrytypes['misc'] g = entrytypes['online'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] entrytypes['book'] entrytypes['thesis'] g = entrytypes['article'] g = entrytypes['incollection'] g = entrytypes['conference'] def lookup_pub(e): if e == 'article': return 'journal', 'journal' elif e == 'incollection': return 'booksection', 'booktitle' elif e == 'conference': return 'conference', 'booktitle' return None, None for e, g in entrytypes.items(): print('e = %r' % (e, )) g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] if 'pub_full' in g.columns: place_title = g['pub_full'].tolist() print(ub.repr2(ub.dict_hist(place_title))) else: print('Unknown publications') if 'report' in entrytypes: g = entrytypes['report'] missing = g[pd.isnull(g['title'])] if len(missing): print('Missing Title') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'journal' in entrytypes: g = entrytypes['journal'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['journal'])] if len(missing): print('Missing Journal') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'conference' in entrytypes: g = entrytypes['conference'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'incollection' in entrytypes: g = entrytypes['incollection'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'thesis' in entrytypes: g = entrytypes['thesis'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['institution'])] if len(missing): print('Missing Institution') print(ub.repr2(missing[['title', 'author']].values.tolist())) # import utool # utool.embed() # Overwrite BibDatabase structure bib_database._entries_dict = bibtex_dict bib_database.entries = list(bibtex_dict.values()) #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ub.repr2(conftitle_to_types_set_hist)) print('Unknown conference keys:') print(ub.repr2(sorted(unknown_pubkeys))) print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) # Need to check #jegou_aggregating_2012 # Fix the Journal Abreviations # References: # https://www.ieee.org/documents/trans_journal_names.pdf # Write out clean bibfile in ascii format clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean') if not ub.argflag('--dryrun'): ut.writeto(clean_bib_fpath, new_bibtex_str)
def get_dbinfo(ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: CommandLine: python -m ibeis.other.dbinfo --exec-get_dbinfo:0 python -m ibeis.other.dbinfo --test-get_dbinfo:1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> defaultdb = 'testdb1' >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from ibeis.expt import cfghelpers >>> #from ibeis.expt import annotation_configs >>> #from ibeis.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> verbose = True >>> short = True >>> #ibs = ibeis.opendb(db='GZ_ALL') >>> #ibs = ibeis.opendb(db='PZ_Master0') >>> ibs = ibeis.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = ibeis.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] print('Specified custom aids via acfgname %s' % (acfg_name_list,)) from ibeis.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) #aid_list = if verbose: print('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) #associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation FILTER_HACK = True if FILTER_HACK: # HUGE HACK - get only images and names with filtered aids valid_aids_ = ibs.filter_aids_custom(valid_aids) valid_nids_ = ibs.filter_nids_custom(valid_nids) valid_gids_ = ibs.filter_gids_custom(valid_gids) if verbose: print('Filtered %d names' % (len(valid_nids) - len(valid_nids_))) print('Filtered %d images' % (len(valid_gids) - len(valid_gids_))) print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_))) valid_gids = valid_gids_ valid_nids = valid_nids_ valid_aids = valid_aids_ #associated_nids = ut.compress(associated_nids, map(any, #ibs.unflat_map(ibs.get_annot_custom_filterflags, # ibs.get_name_aids(associated_nids)))) # Image info if verbose: print('Checking Image Info') gx2_aids = ibs.get_image_aids(valid_gids) if FILTER_HACK: gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: print('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if FILTER_HACK: nx2_aids = [ibs.filter_aids_custom(aids) for aids in nx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1} nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1} singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True) resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True) try: aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0) undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1])) pair_tag_info['num_reviewed'] = num_reviewed_pairs except Exception: pair_tag_info = {} #print(ut.dict_str(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: print('Checking Annot Species') unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids)) species_list = ibs.get_annot_species_texts(valid_aids) species2_aids = ut.group_items(valid_aids, species_list) species2_nAids = {key: len(val) for key, val in species2_aids.items()} if verbose: print('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: print('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: print('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = OrderedDict( [( 'max', wh_list.max(0)), ( 'min', wh_list.min(0)), ('mean', wh_list.mean(0)), ( 'std', wh_list.std(0))]) def arr2str(var): return ('[' + ( ', '.join(list(map(lambda x: '%.1f' % x, var))) ) + ']') ret = (',\n '.join([ '%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items() ])) return '{\n ' + ret + '\n}' print('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: print('Building Stats String') multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True) # Time stats unixtime_list = ibs.get_image_unixtime(valid_gids) unixtime_list = ut.list_replace(unixtime_list, -1, float('nan')) #valid_unixtime_list = [time for time in unixtime_list if time != -1] #unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots if verbose: print('Checking Other Annot Stats') qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids) yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: print('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags) contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags) contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)} contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)} if verbose: print('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_aids) num_annots = len(valid_aids) if with_bytes: if verbose: print('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: print('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_aids) _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' #if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex(ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ]) raise # Get contributor statistics contrib_rowids = ibs.get_valid_contrib_rowids() num_contributors = len(contrib_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.dict_str(dict_) return align2(str_) header_block_lines = ( [('+============================'), ] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] occurrence_block_lines = [ ('--' * num_tabs), ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] contrib_block_lines = [ '# Images per contributor = ' + align_dict2(contrib_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contrib_tag_to_nAnnots), '# Quality per contributor = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True), ] if with_contrib else [] img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), #('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contrib_block_lines + imgsize_stat_lines + [('L============================'), ] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: print(info_str2) locals_ = locals() return locals_
def clean_tags(): zotero = get_libzotero() # dict of all zotero items # items = zotero.index # get sql cursor cur = zotero.cur if False: sorted(ut.util_sqlite.get_tablenames(cur)) ut.print_database_structure(cur) # Debug info about tags table in sql # The `tags` table stores all tags # The itemTags table stores the association between items and tags ut.get_table_columninfo_list(cur, 'fields') # ut.get_table_columninfo_list(cur, 'relations') ut.get_table_columninfo_list(cur, 'fieldsCombined') ut.get_table_columninfo_list(cur, 'itemData') ut.get_table_columninfo_list(cur, 'itemDataValues') ut.get_table_columninfo_list(cur, 'tags') ut.get_table_columninfo_list(cur, 'itemTags') import pandas as pd pd.options.display.max_colwidth = 40 pd.options.display.max_rows = 20 def pandas_sql(table, columns): return pd.DataFrame(ut.get_table_rows(cur, table, columns), columns=columns) item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False) tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False) itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID')) itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID')) itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID') field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID') itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values titles = itemData_df[itemData_df['fieldName'] == 'title'] assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1 # itemTag_df.groupby('itemID').count() # Find how often each tag is used tagid_to_count = itemTag_df.groupby('tagID').count() tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'}) tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name'] tagid_to_count = tagid_to_count.sort_values('nItems') bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len) tagid_to_count = tagid_to_count.sort_values('tag_ncharsize') bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist() def clean_tags2(): api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' library_id = '1279414' library_type = 'user' from pyzotero import zotero zot = zotero.Zotero(library_id, library_type, api_key) for chunk in ut.ProgChunks(bad_tags, 50): zot.delete_tags(*chunk) if False: api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' user_id = '1279414' userOrGroupPrefix = '/users/' + user_id params = {'v': 3, 'key': api_key} items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params) print(items_resp.content) print(items_resp) json_tags = [] get_url = base_url + userOrGroupPrefix + '/tags' while True: print('get_url = %r' % (get_url,)) tag_resp = requests.get(get_url, params=params) if tag_resp.status_code != 200: break json_tags.extend(tag_resp.json()) if 'next' in tag_resp.links: get_url = tag_resp.links['next']['url'] else: break version_to_tags = ut.ddict(list) bad_tags = [] for tag in ut.ProgIter(json_tags, label='parsing tags'): # x = requests.get(tag['links']['self']['href'], params=params) if tag['meta']['numItems'] == 1: import urllib2 try: bad_tags.append(urllib2.quote(tag['tag'])) except Exception as ex: print('cant encode tag=%r' % (tag,)) pass for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50): search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk) r = requests.get(search_url, params=params) matching_items = r.json() # assert len(matching_items) == 1 for item in matching_items: version = item['version'] version_to_tags[item['version']].append(tag['tag']) # DELETE MULTIPLE TAGS import requests for chunk in ut.ichunks(bad_tags['name'], 50): import urllib2 encoded_chunk = [] for t in chunk: try: encoded_chunk.append(urllib2.quote(t)) except Exception: print(t) suffix = ' || '.join(encoded_chunk) delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix print('delete_url = %r' % (delete_url,)) resp = requests.delete(delete_url, params=params) bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] bad_tags['tagID'] = bad_tags.index for tagid in bad_tags: delete from itemTags where tagID in (select tagID from tags where type=1); pass for name in k['name'].values.tolist() item_df['title'] = titles.set_index('itemID')['value'] for idx, item in zotero.index.items(): sql_title = item_df.loc[item.id]['title'] if item.title != sql_title: if pd.isnull(sql_title) and item.title is not None: print(item.__dict__) print(item_df.loc[item.id]) print('item.title = %r' % (item.title,)) print('sql_title = %r' % (sql_title,)) assert False duplicate_tags = [ (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2 ] tagname_to_tagid = tags_df.groupby('name', sort=True).first() new_to_oldtags = {} # Determine which tagi to use for each name for tagname, idxs in duplicate_tags: tags_subdf = tags_df.iloc[idxs] mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])] tag_hist = mapping.groupby('tagID').count() best_tagid = tag_hist['itemID'].idxmax() new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid} tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid] # for col in tagname_to_tagid.columns: # tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col] # tags_df.loc[best_tagid] if False: # Update tagIds for newid, oldids in new_to_oldtags.items(): for oldid in oldids: # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,)) import sqlite3 try: cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?' args = (newid, oldid) print('(%s) args = %r' % (cmd, args,)) cur.execute(cmd, args) print(cur.fetchall()) except sqlite3.IntegrityError: print('error') pass # tags_df.groupby('name', sort=True) # itemTag_df.groupby('itemID') # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # duptags['tagID'] # flags = itemTag_df['tagID'].isin(duptags['tagID']) # dup_rel = itemTag_df[flags] # item_df['title'].loc[dup_rel['itemID']].values # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # tags_df[tags_df['type'] == 1] # tags_df[tags_df['type'] == 0] # tags_df['libraryID'].unique() # tags_df['type'].unique() ''' SELECT SELECT FROM itemTags WHERE name in (animals) ''' item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID')) # Group tags by item itemid_to_tagids = ut.group_pairs(item_tag_pairs) # Group items by tags tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs))) # mapping from tagid to name tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name'))) tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items()) ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals') tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)
def intraoccurrence_connected(): r""" CommandLine: python -m ibeis.scripts.specialdraw intraoccurrence_connected --show python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = intraoccurrence_connected() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import plottool as pt from ibeis.viz import viz_graph import networkx as nx pt.ensure_pylab_qt4() ibs = ibeis.opendb(defaultdb='PZ_Master1') nid2_aid = { #4880: [3690, 3696, 3703, 3706, 3712, 3721], 4880: [3690, 3696, 3703], 6537: [3739], 6653: [7671], 6610: [7566, 7408], #6612: [7664, 7462, 7522], #6624: [7465, 7360], #6625: [7746, 7383, 7390, 7477, 7376, 7579], 6630: [7586, 7377, 7464, 7478], #6677: [7500] } nid2_dbaids = { 4880: [33, 6120, 7164], 6537: [7017, 7206], 6653: [7660] } if ut.get_argflag('--small') or ut.get_argflag('--smaller'): del nid2_aid[6630] del nid2_aid[6537] del nid2_dbaids[6537] if ut.get_argflag('--smaller'): nid2_dbaids[4880].remove(33) nid2_aid[4880].remove(3690) nid2_aid[6610].remove(7408) #del nid2_aid[4880] #del nid2_dbaids[4880] aids = ut.flatten(nid2_aid.values()) temp_nids = [1] * len(aids) postcut = ut.get_argflag('--postcut') aids_list = ibs.group_annots_by_name(aids)[0] ensure_edges = 'all' if True or not postcut else None unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups( ibs, aids_list, #invis_edges=invis_edges, ensure_edges=ensure_edges, temp_nids=temp_nids) viz_graph.color_by_nids(unlabeled_graph, unique_nids=[1] * len(list(unlabeled_graph.nodes()))) viz_graph.ensure_node_images(ibs, unlabeled_graph) nx.set_node_attributes(unlabeled_graph, 'shape', 'rect') #unlabeled_graph = unlabeled_graph.to_undirected() # Find the "database exemplars for these annots" if False: gt_aids = ibs.get_annot_groundtruth(aids) gt_aids = [ut.setdiff(s, aids) for s in gt_aids] dbaids = ut.unique(ut.flatten(gt_aids)) dbaids = ibs.filter_annots_general(dbaids, minqual='good') ibs.get_annot_quality_texts(dbaids) else: dbaids = ut.flatten(nid2_dbaids.values()) exemplars = nx.DiGraph() #graph = exemplars # NOQA exemplars.add_nodes_from(dbaids) def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}): edge_list = ut.upper_diag_self_prodx(nodes) graph.add_edges_from(edge_list, **edgeattrs) return edge_list for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)): add_clique(exemplars, aids_) viz_graph.ensure_node_images(ibs, exemplars) viz_graph.color_by_nids(exemplars, ibs=ibs) nx.set_node_attributes(unlabeled_graph, 'framewidth', False) nx.set_node_attributes(exemplars, 'framewidth', 4.0) nx.set_node_attributes(unlabeled_graph, 'group', 'unlab') nx.set_node_attributes(exemplars, 'group', 'exemp') #big_graph = nx.compose_all([unlabeled_graph]) big_graph = nx.compose_all([exemplars, unlabeled_graph]) # add sparse connections from unlabeled to exemplars import numpy as np rng = np.random.RandomState(0) if True or not postcut: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes()))) flags = np.logical_or(exnids == nid_, flags) exmatches = ut.compress(list(exemplars.nodes()), flags) big_graph.add_edges_from(list(ut.product([aid_], exmatches)), color=pt.ORANGE, implicit=True) else: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 exmatches = ut.compress(list(exemplars.nodes()), flags) nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(exmatches)) exmatches = ut.compress(exmatches, exnids == nid_) big_graph.add_edges_from(list(ut.product([aid_], exmatches))) pass nx.set_node_attributes(big_graph, 'shape', 'rect') #if False and postcut: # ut.nx_delete_node_attr(big_graph, 'nid') # ut.nx_delete_edge_attr(big_graph, 'color') # viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs) # viz_graph.color_by_nids(big_graph, ibs=ibs) # big_graph = big_graph.to_undirected() layoutkw = { 'sep' : 1 / 5, 'prog': 'neato', 'overlap': 'false', #'splines': 'ortho', 'splines': 'spline', } as_directed = False #as_directed = True #hacknode = True hacknode = 0 graph = big_graph ut.nx_ensure_agraph_color(graph) if hacknode: nx.set_edge_attributes(graph, 'taillabel', {e: str(e[0]) for e in graph.edges()}) nx.set_edge_attributes(graph, 'headlabel', {e: str(e[1]) for e in graph.edges()}) explicit_graph = pt.get_explicit_graph(graph) _, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, inplace=True, **layoutkw) if ut.get_argflag('--smaller'): graph.node[7660]['pos'] = np.array([550, 350]) graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) elif ut.get_argflag('--small'): graph.node[7660]['pos'] = np.array([750, 350]) graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400]) graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) if not postcut: #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw, # as_directed=False) #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw, # as_directed=as_directed, hacknode=hacknode) pt.show_nx(graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) else: #explicit_graph = pt.get_explicit_graph(graph) #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, # **layoutkw) #layout_info['edge']['alpha'] = .8 #pt.apply_graph_layout_attrs(graph, layout_info) #graph_layout_attrs = layout_info['graph'] ##edge_layout_attrs = layout_info['edge'] ##node_layout_attrs = layout_info['node'] #for key, vals in layout_info['node'].items(): # #print('[special] key = %r' % (key,)) # nx.set_node_attributes(graph, key, vals) #for key, vals in layout_info['edge'].items(): # #print('[special] key = %r' % (key,)) # nx.set_edge_attributes(graph, key, vals) #nx.set_edge_attributes(graph, 'alpha', .8) #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'polyline' # graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'line' cut_graph = graph.copy() edge_list = list(cut_graph.edges()) edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list)) cut_flags = edge_nids.T[0] != edge_nids.T[1] cut_edges = ut.compress(edge_list, cut_flags) cut_graph.remove_edges_from(cut_edges) ut.nx_delete_node_attr(cut_graph, 'nid') viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs) #ut.nx_get_default_node_attributes(exemplars, 'color', None) ut.nx_delete_node_attr(cut_graph, 'color', nodes=unlabeled_graph.nodes()) aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color', None) nid2_colors = ut.group_items(aid2_color.values(), ibs.get_annot_nids(aid2_color.keys())) nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors) nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors) #for val in nid2_colors.values(): # assert len(val) <= 1 # Get initial colors nid2_color_ = {nid: colors_[0] for nid, colors_ in nid2_colors.items() if len(colors_) == 1} graph = cut_graph viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_) nx.set_node_attributes(cut_graph, 'framewidth', 4) pt.show_nx(cut_graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) pt.zoom_factory()