def get_cfgstr(nnindexer, noquery=False): r""" returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m wbia.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> cfgdict = dict(fg_on=False) >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params( nnindexer.flann_params['algorithm']) # flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.update_existing(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = ut.odict([ (shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean) ]) flann_valsig_ = ut.repr2(short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') # flann_valsig_ = str(list(flann_params.values())) # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
def assert_unique(item_list, ignore=[], name='list', verbose=None): import utool as ut dups = ut.find_duplicate_items(item_list) ut.delete_dict_keys(dups, ignore) if len(dups) > 0: raise AssertionError('Found duplicate items in %s: %s' % (name, ut.repr4(dups))) if verbose: print('No duplicates found in %s' % (name, ))
def assert_unique(item_list, ignore=[], name='list', verbose=None): import utool as ut dups = ut.find_duplicate_items(item_list) ut.delete_dict_keys(dups, ignore) if len(dups) > 0: raise AssertionError( 'Found duplicate items in %s: %s' % ( name, ut.repr4(dups))) if verbose: print('No duplicates found in %s' % (name,))
def get_cfgstr(nnindexer, noquery=False): r""" returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m ibeis.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> import ibeis >>> cfgdict = dict(fg_on=False) >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm']) #flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.updateif_haskey(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = ut.odict([(shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean)]) flann_valsig_ = ut.dict_str( short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') #flann_valsig_ = str(list(flann_params.values())) #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
def on_pick(event, infr=None): import wbia.plottool as pt logger.info('ON PICK: %r' % (event, )) artist = event.artist plotdat = pt.get_plotdat_dict(artist) if plotdat: if 'node' in plotdat: all_node_data = ut.sort_dict(plotdat['node_data'].copy()) visual_node_data = ut.dict_subset(all_node_data, infr.visual_node_attrs, None) node_data = ut.delete_dict_keys(all_node_data, infr.visual_node_attrs) node = plotdat['node'] node_data['degree'] = infr.graph.degree(node) node_label = infr.pos_graph.node_label(node) logger.info('visual_node_data: ' + ut.repr2(visual_node_data, nl=1)) logger.info('node_data: ' + ut.repr2(node_data, nl=1)) ut.cprint('node: ' + ut.repr2(plotdat['node']), 'blue') logger.info('(pcc) node_label = %r' % (node_label, )) logger.info('artist = %r' % (artist, )) elif 'edge' in plotdat: all_edge_data = ut.sort_dict(plotdat['edge_data'].copy()) logger.info(infr.repr_edge_data(all_edge_data)) ut.cprint('edge: ' + ut.repr2(plotdat['edge']), 'blue') logger.info('artist = %r' % (artist, )) else: logger.info('???: ' + ut.repr2(plotdat)) logger.info(ut.get_timestamp())
def shorten_keys(cleaner): # Remove Keys detail_level = { 'thesis': 3, 'journal': 2, 'conference': 1, } level = detail_level[cleaner.FOR] remove_keys = [] if level <= 3: remove_keys += [ 'note', 'isbn', 'shorttitle', 'copyright', 'language', 'rights', 'langid', 'keywords', 'shortjournal', 'issn', 'file', ] remove_keys += [ # Pretty sure I needed these on for my thesis, but I think they # are ugly and don't add much, so turn them off for future work 'series', 'publisher', 'editor', 'address', ] if level <= 2: remove_keys += ['number', 'pages', 'volume'] cleaner.entry = ut.delete_dict_keys(cleaner.entry, remove_keys)
def unused_important(): important_authors = [ 'hinton', 'chum', 'Jegou', 'zisserman', 'schmid', 'sivic', 'matas', 'lowe', 'perronnin', 'douze', ] for key in unused_keys: entry = bibtex_dict[key] author = entry.get('author', '') #authors = author.split(' and ') hasimportant = any(auth in author.lower() for auth in important_authors) if hasimportant or 'smk' in str(entry).lower(): toremove = [ 'note', 'month', 'type', 'pages', 'urldate', 'language', 'volume', 'number', 'publisher' ] entry = ut.delete_dict_keys(entry, toremove) print( ut.dict_str(entry, strvals=True, key_order=['title', 'author', 'id']))
def wrp_cache_invalidator(self, *args, **kwargs): # the class must have a table_cache property colscache_ = self.table_cache[tblname] colnames_ = list( six.iterkeys(colscache_)) if colnames is None else colnames if DEBUG_API_CACHE: indenter = ut.Indenter('[%s]' % (tblname, )) indenter.start() print('+------') print( 'INVALIDATING tblname=%r, colnames=%r, rowidx=%r, force=%r' % (tblname, colnames, rowidx, force)) print('self = %r' % (self, )) print('args = %r' % (args, )) print('kwargs = %r' % (kwargs, )) print('colscache_ = ' + ut.dict_str(colscache_, truncate=1)) # Clear the cache of any specified colname # when the invalidator is called if rowidx is None: for colname in colnames_: kwargs_cache_ = colscache_[colname] # We dont know the rowsids so clear everything for cache_ in six.itervalues(kwargs_cache_): cache_.clear() else: rowid_list = args[rowidx] for colname in colnames_: kwargs_cache_ = colscache_[colname] # We know the rowids to delete # iterate over all getter kwargs values for cache_ in six.itervalues(kwargs_cache_): ut.delete_dict_keys(cache_, rowid_list) # Preform set/delete action if DEBUG_API_CACHE: print('After:') print('colscache_ = ' + ut.dict_str(colscache_, truncate=1)) print('L__________') writer_result = writer_func(self, *args, **kwargs) if DEBUG_API_CACHE: indenter.stop() return writer_result
def wrp_cache_invalidator(self, *args, **kwargs): # the class must have a table_cache property colscache_ = self.table_cache[tblname] colnames_ = list(six.iterkeys(colscache_)) if colnames is None else colnames if DEBUG_API_CACHE: indenter = ut.Indenter("[%s]" % (tblname,)) indenter.start() print("+------") print("INVALIDATING tblname=%r, colnames=%r, rowidx=%r, force=%r" % (tblname, colnames, rowidx, force)) print("self = %r" % (self,)) print("args = %r" % (args,)) print("kwargs = %r" % (kwargs,)) print("colscache_ = " + ut.dict_str(colscache_, truncate=1)) # Clear the cache of any specified colname # when the invalidator is called if rowidx is None: for colname in colnames_: kwargs_cache_ = colscache_[colname] # We dont know the rowsids so clear everything for cache_ in six.itervalues(kwargs_cache_): cache_.clear() else: rowid_list = args[rowidx] for colname in colnames_: kwargs_cache_ = colscache_[colname] # We know the rowids to delete # iterate over all getter kwargs values for cache_ in six.itervalues(kwargs_cache_): ut.delete_dict_keys(cache_, rowid_list) # Preform set/delete action if DEBUG_API_CACHE: print("After:") print("colscache_ = " + ut.dict_str(colscache_, truncate=1)) print("L__________") writer_result = writer_func(self, *args, **kwargs) if DEBUG_API_CACHE: indenter.stop() return writer_result
def get_nonvisual_edge_data(infr, edge, on_missing='filter'): data = infr.get_edge_data(edge) if data is not None: data = ut.delete_dict_keys(data.copy(), infr.visual_edge_attrs) else: if on_missing == 'filter': data = None elif on_missing == 'default': data = {} elif on_missing == 'error': raise KeyError('graph does not have edge %r ' % (edge,)) return data
def repr_edge_data(infr, all_edge_data, visual=True): visual_edge_data = { k: v for k, v in all_edge_data.items() if k in infr.visual_edge_attrs } edge_data = ut.delete_dict_keys(all_edge_data.copy(), infr.visual_edge_attrs) lines = [] if visual: lines += [('visual_edge_data: ' + ut.repr2(visual_edge_data, nl=1)) ] lines += [('edge_data: ' + ut.repr2(edge_data, nl=1))] return '\n'.join(lines)
def main(bib_fpath=None): r""" intro point to fixbib script CommmandLine: fixbib python -m fixtex bib python -m fixtex bib --dryrun python -m fixtex bib --dryrun --debug """ if bib_fpath is None: bib_fpath = 'My Library.bib' # DEBUG = ub.argflag('--debug') # Read in text and ensure ascii format dirty_text = ut.readfrom(bib_fpath) from fixtex.fix_tex import find_used_citations, testdata_fpaths if exists('custom_extra.bib'): extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False) parser = bparser.BibTexParser() ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing extra bibtex file') extra_text = ut.readfrom('custom_extra.bib') extra_database = extra_parser.parse(extra_text, partial=False) print('Finished parsing extra') extra_dict = extra_database.get_entry_dict() else: extra_dict = None #udata = dirty_text.decode("utf-8") #dirty_text = udata.encode("ascii", "ignore") #dirty_text = udata # parser = bparser.BibTexParser() # bib_database = parser.parse(dirty_text) # d = bib_database.get_entry_dict() print('BIBTEXPARSER LOAD') parser = bparser.BibTexParser(ignore_nonstandard_types=False, common_strings=True) ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing bibtex file') bib_database = parser.parse(dirty_text, partial=False) print('Finished parsing') bibtex_dict = bib_database.get_entry_dict() old_keys = list(bibtex_dict.keys()) new_keys = [] for key in ub.ProgIter(old_keys, label='fixing keys'): new_key = key new_key = new_key.replace(':', '') new_key = new_key.replace('-', '_') new_key = re.sub('__*', '_', new_key) new_keys.append(new_key) # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict' assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict' for key, new_key in zip(old_keys, new_keys): if key != new_key: entry = bibtex_dict[key] entry['ID'] = new_key bibtex_dict[new_key] = entry del bibtex_dict[key] # The bibtext is now clean. Print it to stdout #print(clean_text) verbose = None if verbose is None: verbose = 1 # Find citations from the tex documents key_list = None if key_list is None: cacher = ub.Cacher('texcite1', enabled=0) data = cacher.tryload() if data is None: fpaths = testdata_fpaths() key_list, inverse = find_used_citations(fpaths, return_inverse=True) # ignore = ['JP', '?', 'hendrick'] # for item in ignore: # try: # key_list.remove(item) # except ValueError: # pass if verbose: print('Found %d citations used in the document' % (len(key_list), )) data = key_list, inverse cacher.save(data) key_list, inverse = data # else: # key_list = None unknown_pubkeys = [] debug_author = ub.argval('--debug-author', default=None) # ./fix_bib.py --debug_author=Kappes if verbose: print('Fixing %d/%d bibtex entries' % (len(key_list), len(bibtex_dict))) # debug = True debug = False if debug_author is not None: debug = False known_keys = list(bibtex_dict.keys()) missing_keys = set(key_list) - set(known_keys) if extra_dict is not None: missing_keys.difference_update(set(extra_dict.keys())) if missing_keys: print('The library is missing keys found in tex files %s' % (ub.repr2(missing_keys), )) # Search for possible typos: candidate_typos = {} sedlines = [] for key in missing_keys: candidates = ut.closet_words(key, known_keys, num=3, subset=True) if len(candidates) > 1: top = candidates[0] if ut.edit_distance(key, top) == 1: # "sed -i -e 's/{}/{}/g' *.tex".format(key, top) import os replpaths = ' '.join( [relpath(p, os.getcwd()) for p in inverse[key]]) sedlines.append("sed -i -e 's/{}/{}/g' {}".format( key, top, replpaths)) candidate_typos[key] = candidates print('Cannot find key = %r' % (key, )) print('Did you mean? %r' % (candidates, )) print('Quick fixes') print('\n'.join(sedlines)) # group by file just = max([0] + list(map(len, missing_keys))) missing_fpaths = [inverse[key] for key in missing_keys] for fpath in sorted(set(ub.flatten(missing_fpaths))): # ut.fix_embed_globals() subkeys = [k for k in missing_keys if fpath in inverse[k]] print('') ut.cprint('--- Missing Keys ---', 'blue') ut.cprint('fpath = %r' % (fpath, ), 'blue') ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'), 'blue') for key in subkeys: print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'), ' '.join(candidate_typos[key]))) # for key in list(bibtex_dict.keys()): if extra_dict is not None: # Extra database takes precidence over regular key_list = list(ut.unique(key_list + list(extra_dict.keys()))) for k, v in extra_dict.items(): bibtex_dict[k] = v full = ub.argflag('--full') for key in key_list: try: entry = bibtex_dict[key] except KeyError: continue self = BibTexCleaner(key, entry, full=full) if debug_author is not None: debug = debug_author in entry.get('author', '') if debug: ut.cprint(' --- ENTRY ---', 'yellow') print(ub.repr2(entry, nl=1)) entry = self.fix() # self.clip_abstract() # self.shorten_keys() # self.fix_authors() # self.fix_year() # old_pubval = self.fix_pubkey() # if old_pubval: # unknown_pubkeys.append(old_pubval) # self.fix_arxiv() # self.fix_general() # self.fix_paper_types() if debug: print(ub.repr2(entry, nl=1)) ut.cprint(' --- END ENTRY ---', 'yellow') bibtex_dict[key] = entry unwanted_keys = set(bibtex_dict.keys()) - set(key_list) if verbose: print('Removing unwanted %d entries' % (len(unwanted_keys))) ut.delete_dict_keys(bibtex_dict, unwanted_keys) if 0: d1 = bibtex_dict.copy() full = True for key, entry in d1.items(): self = BibTexCleaner(key, entry, full=full) pub = self.publication() if pub is None: print(self.entry['ENTRYTYPE']) old = self.fix_pubkey() x1 = self._pubval() x2 = self.standard_pubval(full=full) # if x2 is not None and len(x2) > 5: # print(ub.repr2(self.entry)) if x1 != x2: print('x2 = %r' % (x2, )) print('x1 = %r' % (x1, )) print(ub.repr2(self.entry)) # if 'CVPR' in self.entry.get('booktitle', ''): # if 'CVPR' != self.entry.get('booktitle', ''): # break if old: print('old = %r' % (old, )) d1[key] = self.entry if full: d1 = bibtex_dict.copy() import numpy as np import pandas as pd df = pd.DataFrame.from_dict(d1, orient='index') paged_items = df[~pd.isnull(df['pub_accro'])] has_pages = ~pd.isnull(paged_items['pages']) print('have pages {} / {}'.format(has_pages.sum(), len(has_pages))) print(ub.repr2(paged_items[~has_pages]['title'].values.tolist())) entrytypes = dict(list(df.groupby('pub_type'))) if False: # entrytypes['misc'] g = entrytypes['online'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] entrytypes['book'] entrytypes['thesis'] g = entrytypes['article'] g = entrytypes['incollection'] g = entrytypes['conference'] def lookup_pub(e): if e == 'article': return 'journal', 'journal' elif e == 'incollection': return 'booksection', 'booktitle' elif e == 'conference': return 'conference', 'booktitle' return None, None for e, g in entrytypes.items(): print('e = %r' % (e, )) g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] if 'pub_full' in g.columns: place_title = g['pub_full'].tolist() print(ub.repr2(ub.dict_hist(place_title))) else: print('Unknown publications') if 'report' in entrytypes: g = entrytypes['report'] missing = g[pd.isnull(g['title'])] if len(missing): print('Missing Title') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'journal' in entrytypes: g = entrytypes['journal'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['journal'])] if len(missing): print('Missing Journal') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'conference' in entrytypes: g = entrytypes['conference'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'incollection' in entrytypes: g = entrytypes['incollection'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'thesis' in entrytypes: g = entrytypes['thesis'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['institution'])] if len(missing): print('Missing Institution') print(ub.repr2(missing[['title', 'author']].values.tolist())) # import utool # utool.embed() # Overwrite BibDatabase structure bib_database._entries_dict = bibtex_dict bib_database.entries = list(bibtex_dict.values()) #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ub.repr2(conftitle_to_types_set_hist)) print('Unknown conference keys:') print(ub.repr2(sorted(unknown_pubkeys))) print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) # Need to check #jegou_aggregating_2012 # Fix the Journal Abreviations # References: # https://www.ieee.org/documents/trans_journal_names.pdf # Write out clean bibfile in ascii format clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean') if not ub.argflag('--dryrun'): ut.writeto(clean_bib_fpath, new_bibtex_str)
def double_depcache_graph(): r""" CommandLine: python -m ibeis.scripts.specialdraw double_depcache_graph --show --testmode python -m ibeis.scripts.specialdraw double_depcache_graph --save=figures5/doubledepc.png --dpath ~/latex/cand/ --diskshow --figsize=8,20 --dpi=220 --testmode --show --clipwhite python -m ibeis.scripts.specialdraw double_depcache_graph --save=figures5/doubledepc.png --dpath ~/latex/cand/ --diskshow --figsize=8,20 --dpi=220 --testmode --show --clipwhite --arrow-width=.5 python -m ibeis.scripts.specialdraw double_depcache_graph --save=figures5/doubledepc.png --dpath ~/latex/cand/ --diskshow --figsize=8,20 --dpi=220 --testmode --show --clipwhite --arrow-width=5 Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = double_depcache_graph() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import networkx as nx import plottool as pt pt.ensure_pylab_qt4() # pt.plt.xkcd() ibs = ibeis.opendb('testdb1') reduced = True implicit = True annot_graph = ibs.depc_annot.make_graph(reduced=reduced, implicit=implicit) image_graph = ibs.depc_image.make_graph(reduced=reduced, implicit=implicit) to_rename = ut.isect(image_graph.nodes(), annot_graph.nodes()) nx.relabel_nodes(annot_graph, {x: 'annot_' + x for x in to_rename}, copy=False) nx.relabel_nodes(image_graph, {x: 'image_' + x for x in to_rename}, copy=False) graph = nx.compose_all([image_graph, annot_graph]) #graph = nx.union_all([image_graph, annot_graph], rename=('image', 'annot')) # userdecision = ut.nx_makenode(graph, 'user decision', shape='rect', color=pt.DARK_YELLOW, style='diagonals') # userdecision = ut.nx_makenode(graph, 'user decision', shape='circle', color=pt.DARK_YELLOW) userdecision = ut.nx_makenode( graph, 'User decision', shape='rect', #width=100, height=100, color=pt.YELLOW, style='diagonals') #longcat = True longcat = False #edge = ('feat', 'neighbor_index') #data = graph.get_edge_data(*edge)[0] #print('data = %r' % (data,)) #graph.remove_edge(*edge) ## hack #graph.add_edge('featweight', 'neighbor_index', **data) graph.add_edge('detections', userdecision, constraint=longcat, color=pt.PINK) graph.add_edge(userdecision, 'annotations', constraint=longcat, color=pt.PINK) # graph.add_edge(userdecision, 'annotations', implicit=True, color=[0, 0, 0]) if not longcat: pass #graph.add_edge('images', 'annotations', style='invis') #graph.add_edge('thumbnails', 'annotations', style='invis') #graph.add_edge('thumbnails', userdecision, style='invis') graph.remove_node('Has_Notch') graph.remove_node('annotmask') layoutkw = { 'ranksep': 5, 'nodesep': 5, 'dpi': 96, # 'nodesep': 1, } ns = 1000 ut.nx_set_default_node_attributes(graph, 'fontsize', 72) ut.nx_set_default_node_attributes(graph, 'fontname', 'Ubuntu') ut.nx_set_default_node_attributes(graph, 'style', 'filled') ut.nx_set_default_node_attributes(graph, 'width', ns * ut.PHI) ut.nx_set_default_node_attributes(graph, 'height', ns * (1 / ut.PHI)) #for u, v, d in graph.edge(data=True): for u, vkd in graph.edge.items(): for v, dk in vkd.items(): for k, d in dk.items(): localid = d.get('local_input_id') if localid: # d['headlabel'] = localid if localid not in ['1']: d['taillabel'] = localid #d['label'] = localid if d.get('taillabel') in {'1'}: del d['taillabel'] node_alias = { 'chips': 'Chip', 'images': 'Image', 'feat': 'Feat', 'featweight': 'Feat Weights', 'thumbnails': 'Thumbnail', 'detections': 'Detections', 'annotations': 'Annotation', 'Notch_Tips': 'Notch Tips', 'probchip': 'Prob Chip', 'Cropped_Chips': 'Croped Chip', 'Trailing_Edge': 'Trailing\nEdge', 'Block_Curvature': 'Block\nCurvature', # 'BC_DTW': 'block curvature /\n dynamic time warp', 'BC_DTW': 'DTW Distance', 'vsone': 'Hots vsone', 'feat_neighbs': 'Nearest\nNeighbors', 'neighbor_index': 'Neighbor\nIndex', 'vsmany': 'Hots vsmany', 'annot_labeler': 'Annot Labeler', 'labeler': 'Labeler', 'localizations': 'Localizations', 'classifier': 'Classifier', 'sver': 'Spatial\nVerification', 'Classifier': 'Existence', 'image_labeler': 'Image Labeler', } node_alias = { 'Classifier': 'existence', 'feat_neighbs': 'neighbors', 'sver': 'spatial_verification', 'Cropped_Chips': 'cropped_chip', 'BC_DTW': 'dtw_distance', 'Block_Curvature': 'curvature', 'Trailing_Edge': 'trailing_edge', 'Notch_Tips': 'notch_tips', 'thumbnails': 'thumbnail', 'images': 'image', 'annotations': 'annotation', 'chips': 'chip', #userdecision: 'User de' } node_alias = ut.delete_dict_keys( node_alias, ut.setdiff(node_alias.keys(), graph.nodes())) nx.relabel_nodes(graph, node_alias, copy=False) fontkw = dict(fontname='Ubuntu', fontweight='normal', fontsize=12) #pt.gca().set_aspect('equal') #pt.figure() pt.show_nx(graph, layoutkw=layoutkw, fontkw=fontkw) pt.zoom_factory()
def get_layer_info(layer): r""" Args: layer (?): Returns: ?: layer_info CommandLine: python -m ibeis_cnn.net_strs get_layer_info --show Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.net_strs import * # NOQA >>> from ibeis_cnn import models >>> model = models.mnist.MNISTModel(batch_size=8, data_shape=(24, 24, 1), output_dims=10) >>> model.init_arch() >>> nn_layers = model.get_all_layers() >>> for layer in nn_layers: >>> layer_info = get_layer_info(layer) >>> print(ut.repr3(layer_info, nl=1)) """ import operator import ibeis_cnn.__LASAGNE__ as lasagne # Information that contributes to RAM usage import numpy as np # Get basic layer infos output_shape = lasagne.layers.get_output_shape(layer) input_shape = getattr(layer, 'input_shape', []) # Get number of outputs ignoring the batch size num_outputs = functools.reduce(operator.mul, output_shape[1:]) if len(input_shape): num_inputs = functools.reduce(operator.mul, input_shape[1:]) else: num_inputs = 0 # TODO: if we can ever support non float32 calculations this must change #layer_type = 'float32' layer_dtype = np.dtype('float32') # Get individual param infos param_infos = [] for param, tags in layer.params.items(): value = param.get_value() pbasename = param_basename(layer, param) param_info = ut.odict([ ('name', param.name), ('basename', pbasename), ('tags', tags), ('shape', value.shape), ('size', value.size), ('itemsize', value.dtype.itemsize), ('dtype', str(value.dtype)), ('bytes', value.size * value.dtype.itemsize), ]) def initializer_info(initclass): initclassname = initclass.__class__.__name__ if initclassname == 'Constant': spec = initclass.val else: spec = ut.odict() spec['type'] = initclassname for key, val in initclass.__dict__.items(): if isinstance(val, lasagne.init.Initializer): spec[key] = initializer_info(val) elif isinstance(val, type) and issubclass(val, lasagne.init.Initializer): spec[key] = val.__name__ #initializer_info(val()) else: spec[key] = val return spec if hasattr(layer, '_initializers'): #print('layer = %r' % (layer,)) initclass = layer._initializers[param] spec = initializer_info(initclass) param_info['init'] = spec param_infos.append(param_info) # Combine param infos param_str = surround(', '.join( [paramstr(layer, p, tags) for p, tags in layer.params.items()]), '[]') param_type_str = surround(', '.join( [repr(p.type) for p, tags in layer.params.items()]), '[]') num_params = sum([info['size'] for info in param_infos]) classalias_map = { 'ElemwiseSumLayer': 'ElemwiseSum', 'Conv2DCCLayer' : 'Conv2D', 'Conv2DDNNLayer' : 'Conv2D', 'Conv2DLayer' : 'Conv2D', 'MaxPool2DLayer': 'MaxPool2D', 'MaxPool2DCCLayer' : 'MaxPool2D', 'MaxPool2DDNNLayer' : 'MaxPool2D', 'LeakyRectify' : 'LReLU', 'InputLayer' : 'Input', 'GaussianNoiseLayer': 'Noise', 'DropoutLayer' : 'Dropout', 'DenseLayer' : 'Dense', 'NonlinearityLayer' : 'Nonlinearity', 'FlattenLayer' : 'Flatten', 'L2NormalizeLayer' : 'L2Norm', 'BatchNormLayer' : 'BatchNorm', 'BatchNormLayer2' : 'BatchNorm', } layer_attrs_ignore_dict = { 'MaxPool2D' : ['mode', 'ignore_border'], 'Dropout' : ['rescale'], 'Conv2D' : ['convolution'], 'BatchNorm': ['epsilon', 'mean', 'inv_std', 'axes', 'beta', 'gamma'], 'BatchNorm2': ['epsilon', 'mean', 'inv_std', 'axes', 'beta', 'gamma'], #'ElemwiseSum': ['merge_function', 'cropping'], #'ElemwiseSum': [], 'FeaturePoolLayer': ['axis'], } layer_attrs_dict = { #'ElemwiseSum': ['coeffs'], #'ElemwiseSum': ['coeffs', 'merge_function', 'cropping'], 'Noise' : ['sigma'], 'Input' : ['shape'], 'Dropout' : ['p', 'shared_axes'], 'Conv2D' : ['num_filters', 'filter_size', 'stride', 'output_shape', 'num_groups'], 'MaxPool2D' : ['stride', 'pool_size', 'output_shape'], # 'mode'], 'Dense' : ['num_units', 'num_leading_axes'], 'SoftMax' : ['num_units', 'num_leading_axes'], 'L2Norm' : ['axis'], 'BatchNorm' : ['alpha'], 'BatchNorm2' : ['alpha'], 'FeaturePoolLayer': ['pool_size', 'pool_function'] } #layer_attrs_dict = {} all_ignore_attrs = ['nonlinearity', 'b', 'W', 'get_output_kwargs', 'name', 'input_shapes', 'input_layers', 'input_shape', 'input_layer', 'input_var', 'untie_biases', '_initializers', 'flip_filters', 'pad', 'params', 'n', '_is_main_layer'] classname = layer.__class__.__name__ classalias = classalias_map.get(classname, classname) #if classalias == 'FeaturePoolLayer' and ut.get_funcname(layer.pool_function) == 'max': # classalias = 'MaxOut' if classalias == 'Dense' and ut.get_funcname(layer.nonlinearity) == 'softmax': classalias = 'SoftMax' layer_attrs = ut.odict([ (key, getattr(layer, key)) for key in layer_attrs_dict.get(classalias, []) ]) ignore_attrs = (all_ignore_attrs + layer_attrs_ignore_dict.get(classalias, [])) if classalias not in layer_attrs_dict or (classalias == classname and len(layer_attrs) == 0): layer_attrs = layer.__dict__.copy() ut.delete_dict_keys(layer_attrs, ignore_attrs) for key in list(layer_attrs.keys()): val = layer_attrs[key] if ut.is_funclike(val): layer_attrs[key] = ut.get_funcname(val) attr_key_list = list(layer_attrs.keys()) missing_keys = (set(layer.__dict__.keys()) - set(ignore_attrs) - set(attr_key_list)) missing_keys = [k for k in missing_keys if not k.startswith('_')] #if layer_type == 'Conv2DCCLayer': # ut.embed() DEBUG = True if DEBUG and len(missing_keys) > 0: print('---') print(' * ' + classname) print(' * missing keys: %r' % (missing_keys,)) print(' * has keys: %r' % (attr_key_list,)) if True: #import utool #with utool.embed_on_exception_context: #raise AssertionError('MISSING KEYS') pass # handle None batch sizes if output_shape[0] is None: size = np.prod(output_shape[1:]) else: size = np.prod(output_shape) layer_info = ut.odict([ ('name', layer.name), ('classname', classname), ('classalias', classalias), ('output_shape', output_shape), ('input_shape', input_shape), ('num_outputs', num_outputs), ('num_inputs', num_inputs), ('size', size), ('itemsize', layer_dtype.itemsize), ('dtype', str(layer_dtype)), ('num_params', num_params), ('param_infos', param_infos), ('param_str', param_str), ('param_type_str', param_type_str), ('layer_attrs', layer_attrs), ('nonlinearity', None), ]) if hasattr(layer, 'nonlinearity'): try: nonlinearity = layer.nonlinearity.__name__ except AttributeError: nonlinearity = layer.nonlinearity.__class__.__name__ layer_info['nonlinearity'] = ut.odict([]) layer_info['nonlinearity']['type'] = nonlinearity layer_info['nonlinearity'].update(layer.nonlinearity.__dict__) #attr_str_list.append('nonlinearity={0}'.format(nonlinearity)) param_bytes = sum([info['bytes'] for info in param_infos]) layer_bytes = layer_info['size'] * layer_info['itemsize'] #if classname in ['BatchNormLayer', 'NonlinearityLayer']: # layer_bytes = 0 layer_info['bytes'] = layer_bytes layer_info['param_bytes'] = param_bytes layer_info['total_bytes'] = layer_bytes + param_bytes layer_info['total_memory'] = ut.byte_str2(layer_info['total_bytes']) return layer_info
def iters_until_threshold(): """ How many iterations of ewma until you hit the poisson / biniomal threshold This establishes a principled way to choose the threshold for the refresh criterion in my thesis. There are paramters --- moving parts --- that we need to work with: `a` the patience, `s` the span, and `mu` our ewma. `s` is a span paramter indicating how far we look back. `mu` is the average number of label-changing reviews in roughly the last `s` manual decisions. These numbers are used to estimate the probability that any of the next `a` manual decisions will be label-chanigng. When that probability falls below a threshold we terminate. The goal is to choose `a`, `s`, and the threshold `t`, such that the probability will fall below the threshold after a maximum of `a` consecutive non-label-chaning reviews. IE we want to tie the patience paramter (how far we look ahead) to how far we actually are willing to go. """ import numpy as np import utool as ut import sympy as sym i = sym.symbols('i', integer=True, nonnegative=True, finite=True) # mu_i = sym.symbols('mu_i', integer=True, nonnegative=True, finite=True) s = sym.symbols('s', integer=True, nonnegative=True, finite=True) # NOQA thresh = sym.symbols('tau', real=True, nonnegative=True, finite=True) # NOQA alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True) # NOQA c_alpha = sym.symbols('c_alpha', real=True, nonnegative=True, finite=True) # patience a = sym.symbols('a', real=True, nonnegative=True, finite=True) available_subs = { a: 20, s: a, alpha: 2 / (s + 1), c_alpha: (1 - alpha), } def dosubs(expr, d=available_subs): """ recursive expression substitution """ expr1 = expr.subs(d) if expr == expr1: return expr1 else: return dosubs(expr1, d=d) # mu is either the support for the poisson distribution # or is is the p in the binomial distribution # It is updated at timestep i based on ewma, assuming each incoming responce is 0 mu_0 = 1.0 mu_i = c_alpha**i # Estimate probability that any event will happen in the next `a` reviews # at time `i`. poisson_i = 1 - sym.exp(-mu_i * a) binom_i = 1 - (1 - mu_i)**a # Expand probabilities to be a function of i, s, and a part = ut.delete_dict_keys(available_subs.copy(), [a, s]) mu_i = dosubs(mu_i, d=part) poisson_i = dosubs(poisson_i, d=part) binom_i = dosubs(binom_i, d=part) if True: # ewma of mu at time i if review is always not label-changing (meaningful) mu_1 = c_alpha * mu_0 # NOQA mu_2 = c_alpha * mu_1 # NOQA if True: i_vals = np.arange(0, 100) mu_vals = np.array( [dosubs(mu_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA binom_vals = np.array( [dosubs(binom_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA poisson_vals = np.array( [dosubs(poisson_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA # Find how many iters it actually takes my expt to terminate thesis_draft_thresh = np.exp(-2) np.where(mu_vals < thesis_draft_thresh)[0] np.where(binom_vals < thesis_draft_thresh)[0] np.where(poisson_vals < thesis_draft_thresh)[0] sym.pprint(sym.simplify(mu_i)) sym.pprint(sym.simplify(binom_i)) sym.pprint(sym.simplify(poisson_i)) # Find the thresholds that force termination after `a` reviews have passed # do this by setting i=a poisson_thresh = poisson_i.subs({i: a}) binom_thresh = binom_i.subs({i: a}) print('Poisson thresh') print(sym.latex(sym.Eq(thresh, poisson_thresh))) print(sym.latex(sym.Eq(thresh, sym.simplify(poisson_thresh)))) poisson_thresh.subs({a: 115, s: 30}).evalf() sym.pprint(sym.Eq(thresh, poisson_thresh)) sym.pprint(sym.Eq(thresh, sym.simplify(poisson_thresh))) print('Binomial thresh') sym.pprint(sym.simplify(binom_thresh)) sym.pprint(sym.simplify(poisson_thresh.subs({s: a}))) def taud(coeff): return coeff * 360 if 'poisson_cache' not in vars(): poisson_cache = {} binom_cache = {} S, A = np.meshgrid(np.arange(1, 150, 1), np.arange(0, 150, 1)) import plottool as pt SA_coords = list(zip(S.ravel(), A.ravel())) for sval, aval in ut.ProgIter(SA_coords): if (sval, aval) not in poisson_cache: poisson_cache[(sval, aval)] = float( poisson_thresh.subs({ a: aval, s: sval }).evalf()) poisson_zdata = np.array([ poisson_cache[(sval, aval)] for sval, aval in SA_coords ]).reshape(A.shape) fig = pt.figure(fnum=1, doclf=True) pt.gca().set_axis_off() pt.plot_surface3d(S, A, poisson_zdata, xlabel='s', ylabel='a', rstride=3, cstride=3, zlabel='poisson', mode='wire', contour=True, title='poisson3d') pt.gca().set_zlim(0, 1) pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8)) fig.set_size_inches(10, 6) fig.savefig('a-s-t-poisson3d.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) for sval, aval in ut.ProgIter(SA_coords): if (sval, aval) not in binom_cache: binom_cache[(sval, aval)] = float( binom_thresh.subs({ a: aval, s: sval }).evalf()) binom_zdata = np.array([ binom_cache[(sval, aval)] for sval, aval in SA_coords ]).reshape(A.shape) fig = pt.figure(fnum=2, doclf=True) pt.gca().set_axis_off() pt.plot_surface3d(S, A, binom_zdata, xlabel='s', ylabel='a', rstride=3, cstride=3, zlabel='binom', mode='wire', contour=True, title='binom3d') pt.gca().set_zlim(0, 1) pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8)) fig.set_size_inches(10, 6) fig.savefig('a-s-t-binom3d.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # Find point on the surface that achieves a reasonable threshold # Sympy can't solve this # sym.solve(sym.Eq(binom_thresh.subs({s: 50}), .05)) # sym.solve(sym.Eq(poisson_thresh.subs({s: 50}), .05)) # Find a numerical solution def solve_numeric(expr, target, solve_for, fixed={}, method=None, bounds=None): """ Args: expr (Expr): symbolic expression target (float): numberic value solve_for (sympy.Symbol): The symbol you care about fixed (dict): fixed values of the symbol solve_numeric(poisson_thresh, .05, {s: 30}, method=None) solve_numeric(poisson_thresh, .05, {s: 30}, method='Nelder-Mead') solve_numeric(poisson_thresh, .05, {s: 30}, method='BFGS') """ import scipy.optimize # Find the symbol you want to solve for want_symbols = expr.free_symbols - set(fixed.keys()) # TODO: can probably extend this to multiple params assert len(want_symbols) == 1, 'specify all but one var' assert solve_for == list(want_symbols)[0] fixed_expr = expr.subs(fixed) def func(a1): expr_value = float(fixed_expr.subs({solve_for: a1}).evalf()) return (expr_value - target)**2 if not fixed: a1 = 0 else: a1 = list(fixed.values())[0] # if method is None: # method = 'Nelder-Mead' # method = 'Newton-CG' # method = 'BFGS' result = scipy.optimize.minimize(func, x0=a1, method=method, bounds=bounds) if not result.success: print('\n') print(result) print('\n') return result # Numeric measurments of thie line thresh_vals = [.001, .01, .05, .1, .135] svals = np.arange(1, 100) target_poisson_plots = {} for target in ut.ProgIter(thresh_vals, bs=False, freq=1): poisson_avals = [] for sval in ut.ProgIter(svals, 'poisson', freq=1): expr = poisson_thresh fixed = {s: sval} want = a aval = solve_numeric(expr, target, want, fixed, method='Nelder-Mead').x[0] poisson_avals.append(aval) target_poisson_plots[target] = (svals, poisson_avals) fig = pt.figure(fnum=3) for target, dat in target_poisson_plots.items(): pt.plt.plot(*dat, label='prob={}'.format(target)) pt.gca().set_xlabel('s') pt.gca().set_ylabel('a') pt.legend() pt.gca().set_title('poisson') fig.set_size_inches(5, 3) fig.savefig('a-vs-s-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) target_binom_plots = {} for target in ut.ProgIter(thresh_vals, bs=False, freq=1): binom_avals = [] for sval in ut.ProgIter(svals, 'binom', freq=1): aval = solve_numeric(binom_thresh, target, a, { s: sval }, method='Nelder-Mead').x[0] binom_avals.append(aval) target_binom_plots[target] = (svals, binom_avals) fig = pt.figure(fnum=4) for target, dat in target_binom_plots.items(): pt.plt.plot(*dat, label='prob={}'.format(target)) pt.gca().set_xlabel('s') pt.gca().set_ylabel('a') pt.legend() pt.gca().set_title('binom') fig.set_size_inches(5, 3) fig.savefig('a-vs-s-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # ---- if True: fig = pt.figure(fnum=5, doclf=True) s_vals = [1, 2, 3, 10, 20, 30, 40, 50] for sval in s_vals: pp = poisson_thresh.subs({s: sval}) a_vals = np.arange(0, 200) pp_vals = np.array( [float(pp.subs({ a: aval }).evalf()) for aval in a_vals]) # NOQA pt.plot(a_vals, pp_vals, label='s=%r' % (sval, )) pt.legend() pt.gca().set_xlabel('a') pt.gca().set_ylabel('poisson prob after a reviews') fig.set_size_inches(5, 3) fig.savefig('a-vs-thresh-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) fig = pt.figure(fnum=6, doclf=True) s_vals = [1, 2, 3, 10, 20, 30, 40, 50] for sval in s_vals: pp = binom_thresh.subs({s: sval}) a_vals = np.arange(0, 200) pp_vals = np.array( [float(pp.subs({ a: aval }).evalf()) for aval in a_vals]) # NOQA pt.plot(a_vals, pp_vals, label='s=%r' % (sval, )) pt.legend() pt.gca().set_xlabel('a') pt.gca().set_ylabel('binom prob after a reviews') fig.set_size_inches(5, 3) fig.savefig('a-vs-thresh-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # ------- fig = pt.figure(fnum=5, doclf=True) a_vals = [1, 2, 3, 10, 20, 30, 40, 50] for aval in a_vals: pp = poisson_thresh.subs({a: aval}) s_vals = np.arange(1, 200) pp_vals = np.array( [float(pp.subs({ s: sval }).evalf()) for sval in s_vals]) # NOQA pt.plot(s_vals, pp_vals, label='a=%r' % (aval, )) pt.legend() pt.gca().set_xlabel('s') pt.gca().set_ylabel('poisson prob') fig.set_size_inches(5, 3) fig.savefig('s-vs-thresh-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) fig = pt.figure(fnum=5, doclf=True) a_vals = [1, 2, 3, 10, 20, 30, 40, 50] for aval in a_vals: pp = binom_thresh.subs({a: aval}) s_vals = np.arange(1, 200) pp_vals = np.array( [float(pp.subs({ s: sval }).evalf()) for sval in s_vals]) # NOQA pt.plot(s_vals, pp_vals, label='a=%r' % (aval, )) pt.legend() pt.gca().set_xlabel('s') pt.gca().set_ylabel('binom prob') fig.set_size_inches(5, 3) fig.savefig('s-vs-thresh-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) #--------------------- # Plot out a table mu_i.subs({s: 75, a: 75}).evalf() poisson_thresh.subs({s: 75, a: 75}).evalf() sval = 50 for target, dat in target_poisson_plots.items(): slope = np.median(np.diff(dat[1])) aval = int(np.ceil(sval * slope)) thresh = float(poisson_thresh.subs({s: sval, a: aval}).evalf()) print('aval={}, sval={}, thresh={}, target={}'.format( aval, sval, thresh, target)) for target, dat in target_binom_plots.items(): slope = np.median(np.diff(dat[1])) aval = int(np.ceil(sval * slope)) pass
def start_new_viz(simp, nRows, nCols, fnum=None): import plottool as pt rchip1, rchip2, kpts1, vecs1, kpts2, vecs2, dlen_sqrd2 = simp.testtup fm_ORIG, fs_ORIG, fm_RAT, fs_RAT, fm_SV, fs_SV, H_RAT = simp.basetup fm_SC, fs_SC, fm_SCR, fs_SCR, fm_SCRSV, fs_SCRSV, H_SCR = simp.nexttup fm_norm_RAT, fm_norm_SV = simp.base_meta fm_norm_SC, fm_norm_SCR, fm_norm_SVSCR = simp.next_meta locals_ = ut.delete_dict_keys(locals(), ['title']) keytitle_tups = [ ('ORIG', 'initial neighbors'), ('RAT', 'ratio filtered'), ('SV', 'ratio filtered + SV'), ('SC', 'spatially constrained'), ('SCR', 'spatially constrained + ratio'), ('SCRSV', 'spatially constrained + SV'), ] keytitle_dict = dict(keytitle_tups) key_list = ut.get_list_column(keytitle_tups, 0) matchtup_dict = { key: (locals_['fm_' + key], locals_['fs_' + key]) for key in key_list } normtup_dict = { key: locals_.get('fm_norm_' + key, None) for key in key_list } next_pnum = pt.make_pnum_nextgen(nRows=nRows, nCols=nCols) if fnum is None: fnum = pt.next_fnum() INTERACTIVE = True if INTERACTIVE: from plottool import interact_helpers as ih fig = ih.begin_interaction('qres', fnum) ih.connect_callback(fig, 'button_press_event', on_single_match_clicked) else: pt.figure(fnum=fnum, doclf=True, docla=True) def show_matches_(key, **kwargs): assert key in key_list, 'unknown key=%r' % (key,) showkw = locals_.copy() pnum = next_pnum() showkw['pnum'] = pnum showkw['fnum'] = fnum showkw.update(kwargs) _fm, _fs = matchtup_dict[key] title = keytitle_dict[key] if kwargs.get('coverage'): from vtool import coverage_kpts kpts2, rchip2 = ut.dict_get(locals_, ('kpts2', 'rchip2')) kpts2_m = kpts2.take(_fm.T[1], axis=0) chipshape2 = rchip2.shape chipsize2 = chipshape2[0:2][::-1] coverage_mask = coverage_kpts.make_kpts_coverage_mask(kpts2_m, chipsize2, fx2_score=_fs, resize=True, return_patch=False) pt.imshow(coverage_mask * 255, pnum=pnum, fnum=fnum) else: if kwargs.get('norm', False): _fm = normtup_dict[key] assert _fm is not None, key showkw['cmap'] = 'cool' title += ' normalizers' show_matches(_fm, _fs, title=title, key=key, **showkw) # state hack #show_matches_.next_pnum = next_pnum return show_matches_
def fix_conference_title_names(clean_text, key_list=None): """ mass bibtex fixes CommandLine: ./fix_bib.py """ # Find citations from the tex documents if key_list is None: key_list = find_used_citations(testdata_fpaths()) key_list = list(set(key_list)) ignore = ['JP', '?'] for item in ignore: try: key_list.remove(item) except ValueError: pass unknown_confkeys = [] conference_keys = [ 'journal', 'booktitle', ] ignore_confkey = [] bib_database = bibtexparser.loads(clean_text) bibtex_dict = bib_database.get_entry_dict() isect = set(ignore_confkey).intersection( set(constants_tex_fixes.CONFERENCE_TITLE_MAPS.keys())) assert len(isect) == 0, repr(isect) #ut.embed() #conftitle_to_types_hist = ut.ddict(list) type_key = 'ENTRYTYPE' debug_author = ut.get_argval('--debug-author', type_=str, default=None) # ./fix_bib.py --debug_author=Kappes for key in bibtex_dict.keys(): entry = bibtex_dict[key] if debug_author is not None: debug = debug_author in entry.get('author', '') else: debug = False if debug: print(' --- ENTRY ---') print(ut.repr3(entry)) #if type_key not in entry: # #entry[type_key] = entry['ENTRYTYPE'] # ut.embed() # Clip abstrat if 'abstract' in entry: entry['abstract'] = ' '.join(entry['abstract'].split(' ')[0:7]) # Remove Keys remove_keys = [ 'note', 'urldate', 'series', 'publisher', 'isbn', 'editor', 'shorttitle', 'copyright', 'language', 'month', # These will be put back in #'number', #'pages', #'volume', ] entry = ut.delete_dict_keys(entry, remove_keys) # Fix conference names confkeys = list(set(entry.keys()).intersection(set(conference_keys))) #entry = ut.delete_dict_keys(entry, ['abstract']) # TODO: FIX THESE IF NEEDBE #if len(confkeys) == 0: # print(ut.dict_str(entry)) # print(entry.keys()) if len(confkeys) == 1: confkey = confkeys[0] old_confval = entry[confkey] # Remove curly braces old_confval = old_confval.replace('{', '').replace('}', '') if old_confval in ignore_confkey: print(ut.dict_str(entry)) continue new_confval_candiates = [] if old_confval.startswith('arXiv'): continue # for conf_title, patterns in constants_tex_fixes.CONFERENCE_TITLE_MAPS.items(): for conf in constants_tex_fixes.CONFERENCES: if conf.matches(old_confval): conf_title = conf.accro() if debug: print('old_confval = %r' % (old_confval, )) print('conf_title = %r' % (conf_title, )) new_confval = conf_title new_confval_candiates.append(new_confval) if len(new_confval_candiates) == 0: new_confval = None elif len(new_confval_candiates) == 1: new_confval = new_confval_candiates[0] else: assert False, 'double match' if new_confval is None: if key in key_list: unknown_confkeys.append(old_confval) #print(old_confval) else: # Overwrite old confval entry[confkey] = new_confval # Record info about types of conferneces true_confval = entry[confkey].replace('{', '').replace('}', '') # FIX ENTRIES THAT SHOULD BE CONFERENCES if true_confval in constants_tex_fixes.CONFERENCE_LIST: if entry[type_key] == 'inproceedings': pass #print(confkey) #print(ut.dict_str(entry)) elif entry[type_key] == 'article': entry['booktitle'] = entry['journal'] del entry['journal'] #print(ut.dict_str(entry)) elif entry[type_key] == 'incollection': pass else: raise AssertionError('UNKNOWN TYPE: %r' % (entry[type_key], )) if 'booktitle' not in entry: print('DOES NOT HAVE CORRECT CONFERENCE KEY') print(ut.dict_str(entry)) assert 'journal' not in entry, 'should not have journal' #print(entry['type']) entry[type_key] = 'inproceedings' # FIX ENTRIES THAT SHOULD BE JOURNALS if true_confval in constants_tex_fixes.JOURNAL_LIST: if entry[type_key] == 'article': pass elif entry[type_key] == 'inproceedings': pass #print(ut.dict_str(entry)) elif entry[type_key] == 'incollection': pass else: raise AssertionError('UNKNOWN TYPE: %r' % (entry['type'], )) if 'journal' not in entry: print('DOES NOT HAVE CORRECT CONFERENCE KEY') print(ut.dict_str(entry)) assert 'booktitle' not in entry, 'should not have booktitle' #print(entry['type']) #entry['type'] = 'article' #conftitle_to_types_hist[true_confval].append(entry['type']) elif len(confkeys) > 1: raise AssertionError('more than one confkey=%r' % (confkeys, )) # Fix Authors if 'author' in entry: authors = six.text_type(entry['author']) for truename, alias_list in constants_tex_fixes.AUTHOR_NAME_MAPS.items( ): pattern = six.text_type( ut.regex_or([ ut.util_regex.whole_word(alias) for alias in alias_list ])) authors = re.sub(pattern, six.text_type(truename), authors, flags=re.UNICODE) entry['author'] = authors """ article = journal inprocedings = converence paper """ #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ut.dict_str(conftitle_to_types_set_hist)) print(ut.list_str(sorted(unknown_confkeys))) print('len(unknown_confkeys) = %r' % (len(unknown_confkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) return new_bibtex_str
def start_new_viz(simp, nRows, nCols, fnum=None): import plottool as pt rchip1, rchip2, kpts1, vecs1, kpts2, vecs2, dlen_sqrd2 = simp.testtup fm_ORIG, fs_ORIG, fm_RAT, fs_RAT, fm_SV, fs_SV, H_RAT = simp.basetup fm_SC, fs_SC, fm_SCR, fs_SCR, fm_SCRSV, fs_SCRSV, H_SCR = simp.nexttup fm_norm_RAT, fm_norm_SV = simp.base_meta fm_norm_SC, fm_norm_SCR, fm_norm_SVSCR = simp.next_meta locals_ = ut.delete_dict_keys(locals(), ['title']) keytitle_tups = [ ('ORIG', 'initial neighbors'), ('RAT', 'ratio filtered'), ('SV', 'ratio filtered + SV'), ('SC', 'spatially constrained'), ('SCR', 'spatially constrained + ratio'), ('SCRSV', 'spatially constrained + SV'), ] keytitle_dict = dict(keytitle_tups) key_list = ut.get_list_column(keytitle_tups, 0) matchtup_dict = { key: (locals_['fm_' + key], locals_['fs_' + key]) for key in key_list } normtup_dict = { key: locals_.get('fm_norm_' + key, None) for key in key_list } next_pnum = pt.make_pnum_nextgen(nRows=nRows, nCols=nCols) if fnum is None: fnum = pt.next_fnum() INTERACTIVE = True if INTERACTIVE: from plottool import interact_helpers as ih fig = ih.begin_interaction('qres', fnum) ih.connect_callback(fig, 'button_press_event', on_single_match_clicked) else: pt.figure(fnum=fnum, doclf=True, docla=True) def show_matches_(key, **kwargs): assert key in key_list, 'unknown key=%r' % (key, ) showkw = locals_.copy() pnum = next_pnum() showkw['pnum'] = pnum showkw['fnum'] = fnum showkw.update(kwargs) _fm, _fs = matchtup_dict[key] title = keytitle_dict[key] if kwargs.get('coverage'): from vtool import coverage_kpts kpts2, rchip2 = ut.dict_get(locals_, ('kpts2', 'rchip2')) kpts2_m = kpts2.take(_fm.T[1], axis=0) chipshape2 = rchip2.shape chipsize2 = chipshape2[0:2][::-1] coverage_mask = coverage_kpts.make_kpts_coverage_mask( kpts2_m, chipsize2, fx2_score=_fs, resize=True, return_patch=False) pt.imshow(coverage_mask * 255, pnum=pnum, fnum=fnum) else: if kwargs.get('norm', False): _fm = normtup_dict[key] assert _fm is not None, key showkw['cmap'] = 'cool' title += ' normalizers' show_matches(_fm, _fs, title=title, key=key, **showkw) # state hack #show_matches_.next_pnum = next_pnum return show_matches_
def bigcache_vsone(qreq_, hyper_params): """ Cached output of one-vs-one matches >>> from wbia.scripts.script_vsone import * # NOQA >>> self = OneVsOneProblem() >>> qreq_ = self.qreq_ >>> hyper_params = self.hyper_params """ import vtool as vt import wbia # Get a set of training pairs ibs = qreq_.ibs cm_list = qreq_.execute() infr = wbia.AnnotInference.from_qreq_(qreq_, cm_list, autoinit=True) # Per query choose a set of correct, incorrect, and random training pairs aid_pairs_ = infr._cm_training_pairs( rng=np.random.RandomState(42), **hyper_params.pair_sample ) aid_pairs_ = vt.unique_rows(np.array(aid_pairs_), directed=False).tolist() pb_aid_pairs_ = photobomb_samples(ibs) # TODO: try to add in more non-comparable samples aid_pairs_ = pb_aid_pairs_ + aid_pairs_ aid_pairs_ = vt.unique_rows(np.array(aid_pairs_)) # ====================================== # Compute one-vs-one scores and local_measures # ====================================== # Prepare lazy attributes for annotations qreq_ = infr.qreq_ ibs = qreq_.ibs qconfig2_ = qreq_.extern_query_config2 dconfig2_ = qreq_.extern_data_config2 qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_) dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_) # Remove any pairs missing features if dannot_cfg == qannot_cfg: unique_annots = ibs.annots(np.unique(np.array(aid_pairs_)), config=dannot_cfg) bad_aids = unique_annots.compress(~np.array(unique_annots.num_feats) > 0).aids bad_aids = set(bad_aids) else: annots1_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 0)), config=qannot_cfg) annots2_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 1)), config=dannot_cfg) bad_aids1 = annots1_.compress(~np.array(annots1_.num_feats) > 0).aids bad_aids2 = annots2_.compress(~np.array(annots2_.num_feats) > 0).aids bad_aids = set(bad_aids1 + bad_aids2) subset_idxs = np.where( [not (a1 in bad_aids or a2 in bad_aids) for a1, a2 in aid_pairs_] )[0] # Keep only a random subset if hyper_params.subsample: rng = np.random.RandomState(3104855634) num_max = hyper_params.subsample if num_max < len(subset_idxs): subset_idxs = rng.choice(subset_idxs, size=num_max, replace=False) subset_idxs = sorted(subset_idxs) # Take the current selection aid_pairs = ut.take(aid_pairs_, subset_idxs) if True: # NEW WAY config = hyper_params.vsone_assign # TODO: ensure annot probs like chips and features can be appropriately # set via qreq_ config or whatever matches = infr.exec_vsone_subset(aid_pairs, config=config) else: query_aids = ut.take_column(aid_pairs, 0) data_aids = ut.take_column(aid_pairs, 1) # OLD WAY # Determine a unique set of annots per config configured_aids = ut.ddict(set) configured_aids[qannot_cfg].update(query_aids) configured_aids[dannot_cfg].update(data_aids) # Make efficient annot-object representation configured_obj_annots = {} for config, aids in configured_aids.items(): annots = ibs.annots(sorted(list(aids)), config=config) configured_obj_annots[config] = annots annots1 = configured_obj_annots[qannot_cfg].loc(query_aids) annots2 = configured_obj_annots[dannot_cfg].loc(data_aids) # Get hash based on visual annotation appearence of each pair # as well as algorithm configurations used to compute those properties qvuuids = annots1.visual_uuids dvuuids = annots2.visual_uuids qcfgstr = annots1._config.get_cfgstr() dcfgstr = annots2._config.get_cfgstr() annots_cfgstr = ut.hashstr27(qcfgstr) + ut.hashstr27(dcfgstr) vsone_uuids = [ ut.combine_uuids(uuids, salt=annots_cfgstr) for uuids in ut.ProgIter( zip(qvuuids, dvuuids), length=len(qvuuids), label='hashing ids' ) ] # Combine into a big cache for the entire 1-v-1 matching run big_uuid = ut.hashstr_arr27(vsone_uuids, '', pathsafe=True) cacher = ut.Cacher('vsone_v7', cfgstr=str(big_uuid), appname='vsone_rf_train') cached_data = cacher.tryload() if cached_data is not None: # Caching doesn't work 100% for PairwiseMatch object, so we need to do # some postprocessing configured_lazy_annots = ut.ddict(dict) for config, annots in configured_obj_annots.items(): annot_dict = configured_lazy_annots[config] for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'): annot_dict[_annot.aid] = _annot._make_lazy_dict() # Extract pairs of annot objects (with shared caches) lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids) lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids) # Create a set of PairwiseMatches with the correct annot properties matches = [ vt.PairwiseMatch(annot1, annot2) for annot1, annot2 in zip(lazy_annots1, lazy_annots2) ] # Updating a new matches dictionary ensure the annot1/annot2 properties # are set correctly for key, cached_matches in list(cached_data.items()): fixed_matches = [match.copy() for match in matches] for fixed, internal in zip(fixed_matches, cached_matches): dict_ = internal.__dict__ ut.delete_dict_keys(dict_, ['annot1', 'annot2']) fixed.__dict__.update(dict_) cached_data[key] = fixed_matches else: cached_data = vsone_( qreq_, query_aids, data_aids, qannot_cfg, dannot_cfg, configured_obj_annots, hyper_params, ) cacher.save(cached_data) # key_ = 'SV_LNBNN' key_ = 'RAT_SV' # for key in list(cached_data.keys()): # if key != 'SV_LNBNN': # del cached_data[key] matches = cached_data[key_] return matches, infr
def double_depcache_graph(): r""" CommandLine: python -m ibeis.scripts.specialdraw double_depcache_graph --show --testmode python -m ibeis.scripts.specialdraw double_depcache_graph --save=figures5/doubledepc.png --dpath ~/latex/cand/ --diskshow --figsize=8,20 --dpi=220 --testmode --show --clipwhite python -m ibeis.scripts.specialdraw double_depcache_graph --save=figures5/doubledepc.png --dpath ~/latex/cand/ --diskshow --figsize=8,20 --dpi=220 --testmode --show --clipwhite --arrow-width=.5 python -m ibeis.scripts.specialdraw double_depcache_graph --save=figures5/doubledepc.png --dpath ~/latex/cand/ --diskshow --figsize=8,20 --dpi=220 --testmode --show --clipwhite --arrow-width=5 Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = double_depcache_graph() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import networkx as nx import plottool as pt pt.ensure_pylab_qt4() # pt.plt.xkcd() ibs = ibeis.opendb('testdb1') reduced = True implicit = True annot_graph = ibs.depc_annot.make_graph(reduced=reduced, implicit=implicit) image_graph = ibs.depc_image.make_graph(reduced=reduced, implicit=implicit) to_rename = ut.isect(image_graph.nodes(), annot_graph.nodes()) nx.relabel_nodes(annot_graph, {x: 'annot_' + x for x in to_rename}, copy=False) nx.relabel_nodes(image_graph, {x: 'image_' + x for x in to_rename}, copy=False) graph = nx.compose_all([image_graph, annot_graph]) #graph = nx.union_all([image_graph, annot_graph], rename=('image', 'annot')) # userdecision = ut.nx_makenode(graph, 'user decision', shape='rect', color=pt.DARK_YELLOW, style='diagonals') # userdecision = ut.nx_makenode(graph, 'user decision', shape='circle', color=pt.DARK_YELLOW) userdecision = ut.nx_makenode(graph, 'User decision', shape='rect', #width=100, height=100, color=pt.YELLOW, style='diagonals') #longcat = True longcat = False #edge = ('feat', 'neighbor_index') #data = graph.get_edge_data(*edge)[0] #print('data = %r' % (data,)) #graph.remove_edge(*edge) ## hack #graph.add_edge('featweight', 'neighbor_index', **data) graph.add_edge('detections', userdecision, constraint=longcat, color=pt.PINK) graph.add_edge(userdecision, 'annotations', constraint=longcat, color=pt.PINK) # graph.add_edge(userdecision, 'annotations', implicit=True, color=[0, 0, 0]) if not longcat: pass #graph.add_edge('images', 'annotations', style='invis') #graph.add_edge('thumbnails', 'annotations', style='invis') #graph.add_edge('thumbnails', userdecision, style='invis') graph.remove_node('Has_Notch') graph.remove_node('annotmask') layoutkw = { 'ranksep': 5, 'nodesep': 5, 'dpi': 96, # 'nodesep': 1, } ns = 1000 ut.nx_set_default_node_attributes(graph, 'fontsize', 72) ut.nx_set_default_node_attributes(graph, 'fontname', 'Ubuntu') ut.nx_set_default_node_attributes(graph, 'style', 'filled') ut.nx_set_default_node_attributes(graph, 'width', ns * ut.PHI) ut.nx_set_default_node_attributes(graph, 'height', ns * (1 / ut.PHI)) #for u, v, d in graph.edge(data=True): for u, vkd in graph.edge.items(): for v, dk in vkd.items(): for k, d in dk.items(): localid = d.get('local_input_id') if localid: # d['headlabel'] = localid if localid not in ['1']: d['taillabel'] = localid #d['label'] = localid if d.get('taillabel') in {'1'}: del d['taillabel'] node_alias = { 'chips': 'Chip', 'images': 'Image', 'feat': 'Feat', 'featweight': 'Feat Weights', 'thumbnails': 'Thumbnail', 'detections': 'Detections', 'annotations': 'Annotation', 'Notch_Tips': 'Notch Tips', 'probchip': 'Prob Chip', 'Cropped_Chips': 'Croped Chip', 'Trailing_Edge': 'Trailing\nEdge', 'Block_Curvature': 'Block\nCurvature', # 'BC_DTW': 'block curvature /\n dynamic time warp', 'BC_DTW': 'DTW Distance', 'vsone': 'Hots vsone', 'feat_neighbs': 'Nearest\nNeighbors', 'neighbor_index': 'Neighbor\nIndex', 'vsmany': 'Hots vsmany', 'annot_labeler': 'Annot Labeler', 'labeler': 'Labeler', 'localizations': 'Localizations', 'classifier': 'Classifier', 'sver': 'Spatial\nVerification', 'Classifier': 'Existence', 'image_labeler': 'Image Labeler', } node_alias = { 'Classifier': 'existence', 'feat_neighbs': 'neighbors', 'sver': 'spatial_verification', 'Cropped_Chips': 'cropped_chip', 'BC_DTW': 'dtw_distance', 'Block_Curvature': 'curvature', 'Trailing_Edge': 'trailing_edge', 'Notch_Tips': 'notch_tips', 'thumbnails': 'thumbnail', 'images': 'image', 'annotations': 'annotation', 'chips': 'chip', #userdecision: 'User de' } node_alias = ut.delete_dict_keys(node_alias, ut.setdiff(node_alias.keys(), graph.nodes())) nx.relabel_nodes(graph, node_alias, copy=False) fontkw = dict(fontname='Ubuntu', fontweight='normal', fontsize=12) #pt.gca().set_aspect('equal') #pt.figure() pt.show_nx(graph, layoutkw=layoutkw, fontkw=fontkw) pt.zoom_factory()