def on_pick(event, infr=None): import wbia.plottool as pt logger.info('ON PICK: %r' % (event, )) artist = event.artist plotdat = pt.get_plotdat_dict(artist) if plotdat: if 'node' in plotdat: all_node_data = ut.sort_dict(plotdat['node_data'].copy()) visual_node_data = ut.dict_subset(all_node_data, infr.visual_node_attrs, None) node_data = ut.delete_dict_keys(all_node_data, infr.visual_node_attrs) node = plotdat['node'] node_data['degree'] = infr.graph.degree(node) node_label = infr.pos_graph.node_label(node) logger.info('visual_node_data: ' + ut.repr2(visual_node_data, nl=1)) logger.info('node_data: ' + ut.repr2(node_data, nl=1)) ut.cprint('node: ' + ut.repr2(plotdat['node']), 'blue') logger.info('(pcc) node_label = %r' % (node_label, )) logger.info('artist = %r' % (artist, )) elif 'edge' in plotdat: all_edge_data = ut.sort_dict(plotdat['edge_data'].copy()) logger.info(infr.repr_edge_data(all_edge_data)) ut.cprint('edge: ' + ut.repr2(plotdat['edge']), 'blue') logger.info('artist = %r' % (artist, )) else: logger.info('???: ' + ut.repr2(plotdat)) logger.info(ut.get_timestamp())
def cheetah_stats(ibs): filters = [ dict(view=['right', 'frontright', 'backright'], minqual='good'), dict(view=['right', 'frontright', 'backright']), ] for filtkw in filters: annots = ibs.annots(ibs.filter_annots_general(**filtkw)) unique_nids, grouped_annots = annots.group(annots.nids) annots_per_name = ut.lmap(len, grouped_annots) annots_per_name_freq = ut.dict_hist(annots_per_name) def bin_mapper(num): if num < 5: return (num, num + 1) else: for bin, mod in [(20, 5), (50, 10)]: if num < bin: low = (num // mod) * mod high = low + mod return (low, high) if num >= bin: return (bin, None) else: assert False, str(num) hist = ut.ddict(lambda: 0) for num in annots_per_name: hist[bin_mapper(num)] += 1 hist = ut.sort_dict(hist) print('------------') print('filters = %s' % ut.repr4(filtkw)) print('num_annots = %r' % (len(annots))) print('num_names = %r' % (len(unique_nids))) print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq))) print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist))) assert sum(hist.values()) == len(unique_nids)
def get_cfgstr(nnindexer, noquery=False): r""" returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m wbia.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> cfgdict = dict(fg_on=False) >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params( nnindexer.flann_params['algorithm']) # flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.update_existing(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = ut.odict([ (shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean) ]) flann_valsig_ = ut.repr2(short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') # flann_valsig_ = str(list(flann_params.values())) # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
def print_size_info(inva): sizes = inva.get_size_info() sizes = ut.sort_dict(sizes, 'vals', ut.identity) total_nbytes = sum(sizes.values()) logger.info( ut.align(ut.repr3(ut.map_dict_vals(ut.byte_str2, sizes), strvals=True), ':') ) logger.info('total_nbytes = %r' % (ut.byte_str2(total_nbytes),))
def get_cfgstr(nnindexer, noquery=False): r""" returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m ibeis.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> import ibeis >>> cfgdict = dict(fg_on=False) >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm']) #flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.updateif_haskey(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = ut.odict([(shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean)]) flann_valsig_ = ut.dict_str( short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') #flann_valsig_ = str(list(flann_params.values())) #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
def graph_info(graph, verbose=False): import utool as ut node_attrs = list(graph.node.values()) edge_attrs = list(ut.take_column(graph.edges(data=True), 2)) node_attr_hist = ut.dict_hist(ut.flatten([attr.keys() for attr in node_attrs])) edge_attr_hist = ut.dict_hist(ut.flatten([attr.keys() for attr in edge_attrs])) node_type_hist = ut.dict_hist(list(map(type, graph.nodes()))) info_dict = ut.odict([ ('directed', graph.is_directed()), ('multi', graph.is_multigraph()), ('num_nodes', len(graph)), ('num_edges', len(list(graph.edges()))), ('edge_attr_hist', ut.sort_dict(edge_attr_hist)), ('node_attr_hist', ut.sort_dict(node_attr_hist)), ('node_type_hist', ut.sort_dict(node_type_hist)), ('graph_attrs', graph.graph), ('graph_name', graph.name), ]) #unique_attrs = ut.map_dict_vals(ut.unique, ut.dict_accum(*node_attrs)) #ut.dict_isect_combine(*node_attrs)) #[list(attrs.keys())] if verbose: print(ut.repr3(info_dict)) return info_dict
def estimate_twoday_count(ibs, day1, day2, filter_kw): #gid_list = ibs.get_valid_gids() all_images = ibs.images() dates = [dt.date() for dt in all_images.datetime] date_to_images = all_images.group_items(dates) date_to_images = ut.sort_dict(date_to_images) #date_hist = ut.map_dict_vals(len, date2_gids) #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),)) verbose = 0 visit_dates = [day1, day2] visit_info_list_ = [] for day in visit_dates: images = date_to_images[day] aids = ut.flatten(images.aids) aids = ibs.filter_annots_general(aids, filter_kw=filter_kw, verbose=verbose) nids = ibs.get_annot_name_rowids(aids) grouped_aids = ut.group_items(aids, nids) unique_nids = ut.unique(list(grouped_aids.keys())) if False: aids_list = ut.take(grouped_aids, unique_nids) for aids in aids_list: if len(aids) > 30: break timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list) # Do the five second rule marked_thresh = 5 flags = [] for nid, timedeltas in zip(unique_nids, timedeltas_list): flags.append(timedeltas.max() > marked_thresh) print('Unmarking %d names' % (len(flags) - sum(flags))) unique_nids = ut.compress(unique_nids, flags) grouped_aids = ut.dict_subset(grouped_aids, unique_nids) unique_aids = ut.flatten(list(grouped_aids.values())) info = { 'unique_nids': unique_nids, 'grouped_aids': grouped_aids, 'unique_aids': unique_aids, } visit_info_list_.append(info) # Estimate statistics from ibeis.other import dbinfo aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids') nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) nsight1 = len(nids_day1) nsight2 = len(nids_day2) resight = len(resight_nids) lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight) if False: from ibeis.other import dbinfo print('DAY 1 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1) # NOQA print('DAY 2 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2) # NOQA print('COMBINED STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2) # NOQA print('%d annots on day 1' % (len(aids_day1)) ) print('%d annots on day 2' % (len(aids_day2)) ) print('%d names on day 1' % (nsight1,)) print('%d names on day 2' % (nsight2,)) print('resight = %r' % (resight,)) print('lp_index = %r ± %r' % (lp_index, lp_error)) return nsight1, nsight2, resight, lp_index, lp_error
def clean_tags(): zotero = get_libzotero() # dict of all zotero items # items = zotero.index # get sql cursor cur = zotero.cur if False: sorted(ut.util_sqlite.get_tablenames(cur)) ut.print_database_structure(cur) # Debug info about tags table in sql # The `tags` table stores all tags # The itemTags table stores the association between items and tags ut.get_table_columninfo_list(cur, 'fields') # ut.get_table_columninfo_list(cur, 'relations') ut.get_table_columninfo_list(cur, 'fieldsCombined') ut.get_table_columninfo_list(cur, 'itemData') ut.get_table_columninfo_list(cur, 'itemDataValues') ut.get_table_columninfo_list(cur, 'tags') ut.get_table_columninfo_list(cur, 'itemTags') import pandas as pd pd.options.display.max_colwidth = 40 pd.options.display.max_rows = 20 def pandas_sql(table, columns): return pd.DataFrame(ut.get_table_rows(cur, table, columns), columns=columns) item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False) tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False) itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID')) itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID')) itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID') field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID') itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values titles = itemData_df[itemData_df['fieldName'] == 'title'] assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1 # itemTag_df.groupby('itemID').count() # Find how often each tag is used tagid_to_count = itemTag_df.groupby('tagID').count() tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'}) tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name'] tagid_to_count = tagid_to_count.sort_values('nItems') bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len) tagid_to_count = tagid_to_count.sort_values('tag_ncharsize') bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist() def clean_tags2(): api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' library_id = '1279414' library_type = 'user' from pyzotero import zotero zot = zotero.Zotero(library_id, library_type, api_key) for chunk in ut.ProgChunks(bad_tags, 50): zot.delete_tags(*chunk) if False: api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' user_id = '1279414' userOrGroupPrefix = '/users/' + user_id params = {'v': 3, 'key': api_key} items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params) print(items_resp.content) print(items_resp) json_tags = [] get_url = base_url + userOrGroupPrefix + '/tags' while True: print('get_url = %r' % (get_url,)) tag_resp = requests.get(get_url, params=params) if tag_resp.status_code != 200: break json_tags.extend(tag_resp.json()) if 'next' in tag_resp.links: get_url = tag_resp.links['next']['url'] else: break version_to_tags = ut.ddict(list) bad_tags = [] for tag in ut.ProgIter(json_tags, label='parsing tags'): # x = requests.get(tag['links']['self']['href'], params=params) if tag['meta']['numItems'] == 1: import urllib2 try: bad_tags.append(urllib2.quote(tag['tag'])) except Exception as ex: print('cant encode tag=%r' % (tag,)) pass for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50): search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk) r = requests.get(search_url, params=params) matching_items = r.json() # assert len(matching_items) == 1 for item in matching_items: version = item['version'] version_to_tags[item['version']].append(tag['tag']) # DELETE MULTIPLE TAGS import requests for chunk in ut.ichunks(bad_tags['name'], 50): import urllib2 encoded_chunk = [] for t in chunk: try: encoded_chunk.append(urllib2.quote(t)) except Exception: print(t) suffix = ' || '.join(encoded_chunk) delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix print('delete_url = %r' % (delete_url,)) resp = requests.delete(delete_url, params=params) bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] bad_tags['tagID'] = bad_tags.index for tagid in bad_tags: delete from itemTags where tagID in (select tagID from tags where type=1); pass for name in k['name'].values.tolist() item_df['title'] = titles.set_index('itemID')['value'] for idx, item in zotero.index.items(): sql_title = item_df.loc[item.id]['title'] if item.title != sql_title: if pd.isnull(sql_title) and item.title is not None: print(item.__dict__) print(item_df.loc[item.id]) print('item.title = %r' % (item.title,)) print('sql_title = %r' % (sql_title,)) assert False duplicate_tags = [ (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2 ] tagname_to_tagid = tags_df.groupby('name', sort=True).first() new_to_oldtags = {} # Determine which tagi to use for each name for tagname, idxs in duplicate_tags: tags_subdf = tags_df.iloc[idxs] mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])] tag_hist = mapping.groupby('tagID').count() best_tagid = tag_hist['itemID'].idxmax() new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid} tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid] # for col in tagname_to_tagid.columns: # tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col] # tags_df.loc[best_tagid] if False: # Update tagIds for newid, oldids in new_to_oldtags.items(): for oldid in oldids: # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,)) import sqlite3 try: cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?' args = (newid, oldid) print('(%s) args = %r' % (cmd, args,)) cur.execute(cmd, args) print(cur.fetchall()) except sqlite3.IntegrityError: print('error') pass # tags_df.groupby('name', sort=True) # itemTag_df.groupby('itemID') # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # duptags['tagID'] # flags = itemTag_df['tagID'].isin(duptags['tagID']) # dup_rel = itemTag_df[flags] # item_df['title'].loc[dup_rel['itemID']].values # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # tags_df[tags_df['type'] == 1] # tags_df[tags_df['type'] == 0] # tags_df['libraryID'].unique() # tags_df['type'].unique() ''' SELECT SELECT FROM itemTags WHERE name in (animals) ''' item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID')) # Group tags by item itemid_to_tagids = ut.group_pairs(item_tag_pairs) # Group items by tags tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs))) # mapping from tagid to name tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name'))) tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items()) ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals') tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)
def get_injured_sharks(): """ >>> from wbia.scripts.getshark import * # NOQA """ import requests url = 'http://www.whaleshark.org/getKeywordImages.jsp' resp = requests.get(url) assert resp.status_code == 200 keywords = resp.json()['keywords'] key_list = ut.take_column(keywords, 'indexName') key_to_nice = {k['indexName']: k['readableName'] for k in keywords} injury_patterns = [ 'injury', 'net', 'hook', 'trunc', 'damage', 'scar', 'nicks', 'bite', ] injury_keys = [ key for key in key_list if any([pat in key for pat in injury_patterns]) ] noninjury_keys = ut.setdiff(key_list, injury_keys) injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA key_list = injury_keys keyed_images = {} for key in ut.ProgIter(key_list, lbl='reading index', bs=True): key_url = url + '?indexName={indexName}'.format(indexName=key) key_resp = requests.get(key_url) assert key_resp.status_code == 200 key_imgs = key_resp.json()['images'] keyed_images[key] = key_imgs key_hist = {key: len(imgs) for key, imgs in keyed_images.items()} key_hist = ut.sort_dict(key_hist, 'vals') logger.info(ut.repr3(key_hist)) nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist) nice_key_hist = ut.sort_dict(nice_key_hist, 'vals') logger.info(ut.repr3(nice_key_hist)) key_to_urls = { key: ut.take_column(vals, 'url') for key, vals in keyed_images.items() } overlaps = {} import itertools overlap_img_list = [] for k1, k2 in itertools.combinations(key_to_urls.keys(), 2): overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2]) num_overlap = len(overlap_imgs) overlaps[(k1, k2)] = num_overlap overlaps[(k1, k1)] = len(key_to_urls[k1]) if num_overlap > 0: # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap)) overlap_img_list.extend(overlap_imgs) all_img_urls = list(set(ut.flatten(key_to_urls.values()))) num_all = len(all_img_urls) # NOQA logger.info('num_all = %r' % (num_all, )) # Determine super-categories categories = ['nicks', 'scar', 'trunc'] # Force these keys into these categories key_to_cat = {'scarbite': 'other_injury'} cat_to_keys = ut.ddict(list) for key in key_to_urls.keys(): flag = 1 if key in key_to_cat: cat = key_to_cat[key] cat_to_keys[cat].append(key) continue for cat in categories: if cat in key: cat_to_keys[cat].append(key) flag = 0 if flag: cat = 'other_injury' cat_to_keys[cat].append(key) cat_urls = ut.ddict(list) for cat, keys in cat_to_keys.items(): for key in keys: cat_urls[cat].extend(key_to_urls[key]) cat_hist = {} for cat in list(cat_urls.keys()): cat_urls[cat] = list(set(cat_urls[cat])) cat_hist[cat] = len(cat_urls[cat]) logger.info(ut.repr3(cat_to_keys)) logger.info(ut.repr3(cat_hist)) key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals]) # ingestset = { # '__class__': 'ImageSet', # 'images': ut.ddict(dict) # } # for key, key_imgs in keyed_images.items(): # for imgdict in key_imgs: # url = imgdict['url'] # encid = imgdict['correspondingEncounterNumber'] # # Make structure # encdict = encounters[encid] # encdict['__class__'] = 'Encounter' # imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber']) # imgdict['__class__'] = 'Image' # cat = key_to_cat[key] # annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]} # annotdict['__class__'] = 'Annotation' # # Ensure structures exist # encdict['images'] = encdict.get('images', []) # imgdict['annots'] = imgdict.get('annots', []) # # Add an image to this encounter # encdict['images'].append(imgdict) # # Add an annotation to this image # imgdict['annots'].append(annotdict) # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111 # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,) # resp = requests.get(get_enc_url) # logger.info(ut.repr3(encdict)) # logger.info(ut.repr3(encounters)) # Download the files to the local disk # fpath_list = all_urls = ut.unique( ut.take_column( ut.flatten( ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()), 'url', )) dldir = ut.truepath('~/tmpsharks') from os.path import commonprefix, basename # NOQA prefix = commonprefix(all_urls) suffix_list = [url_[len(prefix):] for url_ in all_urls] fname_list = [suffix.replace('/', '--') for suffix in suffix_list] fpath_list = [] for url, fname in ut.ProgIter(zip(all_urls, fname_list), lbl='downloading imgs', freq=1): fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False) fpath_list.append(fpath) # Make sure we keep orig info # url_to_keys = ut.ddict(list) url_to_info = ut.ddict(dict) for key, imgdict_list in keyed_images.items(): for imgdict in imgdict_list: url = imgdict['url'] info = url_to_info[url] for k, v in imgdict.items(): info[k] = info.get(k, []) info[k].append(v) info['keys'] = info.get('keys', []) info['keys'].append(key) # url_to_keys[url].append(key) info_list = ut.take(url_to_info, all_urls) for info in info_list: if len(set(info['correspondingEncounterNumber'])) > 1: assert False, 'url with two different encounter nums' # Combine duplicate tags hashid_list = [ ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True) ] groupxs = ut.group_indices(hashid_list)[1] # Group properties by duplicate images # groupxs = [g for g in groupxs if len(g) > 1] fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0) url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0) info_list_ = [ ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_)) for info_ in ut.apply_grouping(info_list, groupxs) ] encid_list_ = [ ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_ ] keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_] cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_] clist = ut.ColumnLists({ 'gpath': fpath_list_, 'url': url_list_, 'encid': encid_list_, 'key': keys_list_, 'cat': cats_list_, }) # for info_ in ut.apply_grouping(info_list, groupxs): # info = ut.dict_accum(*info_) # info = ut.map_dict_vals(ut.flatten, info) # x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber'])) # if len(x) > 1: # info = info.copy() # del info['keys'] # logger.info(ut.repr3(info)) flags = ut.lmap(ut.fpath_has_imgext, clist['gpath']) clist = clist.compress(flags) import wbia ibs = wbia.opendb('WS_Injury', allow_newdir=True) gid_list = ibs.add_images(clist['gpath']) clist['gid'] = gid_list failed_flags = ut.flag_None_items(clist['gid']) logger.info('# failed %s' % (sum(failed_flags), )) passed_flags = ut.not_list(failed_flags) clist = clist.compress(passed_flags) ut.assert_all_not_None(clist['gid']) # ibs.get_image_uris_original(clist['gid']) ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True) # ut.zipflat(clist['cat'], clist['key']) if False: # Can run detection instead clist['tags'] = ut.zipflat(clist['cat']) aid_list = ibs.use_images_as_annotations(clist['gid'], adjust_percent=0.01, tags_list=clist['tags']) aid_list import wbia.plottool as pt from wbia import core_annots pt.qt4ensure() # annots = ibs.annots() # aids = [1, 2] # ibs.depc_annot.get('hog', aids , 'hog') # ibs.depc_annot.get('chip', aids, 'img') for aid in ut.InteractiveIter(ibs.get_valid_aids()): hogs = ibs.depc_annot.d.get_hog_hog([aid]) chips = ibs.depc_annot.d.get_chips_img([aid]) chip = chips[0] hogimg = core_annots.make_hog_block_image(hogs[0]) pt.clf() pt.imshow(hogimg, pnum=(1, 2, 1)) pt.imshow(chip, pnum=(1, 2, 2)) fig = pt.gcf() fig.show() fig.canvas.draw() # logger.info(len(groupxs)) # if False: # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values() # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs))) # # FIX # for fpath, fname in zip(fpath_list, fname_list): # if ut.checkpath(fpath): # ut.move(fpath, join(dirname(fpath), fname)) # logger.info('fpath = %r' % (fpath,)) # import wbia # from wbia.dbio import ingest_dataset # dbdir = wbia.sysres.lookup_dbdir('WS_ALL') # self = ingest_dataset.Ingestable2(dbdir) if False: # Show overlap matrix import wbia.plottool as pt import pandas as pd import numpy as np dict_ = overlaps s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps)) df = s.unstack() lhs, rhs = df.align(df.T) df = lhs.add(rhs, fill_value=0).fillna(0) label_texts = df.columns.values def label_ticks(label_texts): import wbia.plottool as pt truncated_labels = [repr(lbl[0:100]) for lbl in label_texts] ax = pt.gca() ax.set_xticks(list(range(len(label_texts)))) ax.set_xticklabels(truncated_labels) [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()] [ lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels() ] # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts))) # pt.plot_surface3d(xgrid, ygrid, disjoint_mat) ax.set_yticks(list(range(len(label_texts)))) ax.set_yticklabels(truncated_labels) [ lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels() ] [ lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels() ] # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()] # df = df.sort(axis=0) # df = df.sort(axis=1) sortx = np.argsort(df.sum(axis=1).values)[::-1] df = df.take(sortx, axis=0) df = df.take(sortx, axis=1) fig = pt.figure(fnum=1) fig.clf() mat = df.values.astype(np.int32) mat[np.diag_indices(len(mat))] = 0 vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max() import matplotlib.colors norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True) pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none') pt.plt.colorbar() pt.plt.grid('off') label_ticks(label_texts) fig.tight_layout() # overlap_df = pd.DataFrame.from_dict(overlap_img_list) class TmpImage(ut.NiceRepr): pass from skimage.feature import hog from skimage import data, color, exposure import wbia.plottool as pt image2 = color.rgb2gray(data.astronaut()) # NOQA fpath = './GOPR1120.JPG' import vtool as vt for fpath in [fpath]: """ http://scikit-image.org/docs/dev/auto_examples/plot_hog.html """ image = vt.imread(fpath, grayscale=True) image = pt.color_funcs.to_base01(image) fig = pt.figure(fnum=2) fd, hog_image = hog( image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualise=True, ) fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) ax1.axis('off') ax1.imshow(image, cmap=pt.plt.cm.gray) ax1.set_title('Input image') ax1.set_adjustable('box-forced') # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) ax2.axis('off') ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray) ax2.set_title('Histogram of Oriented Gradients') ax1.set_adjustable('box-forced') pt.plt.show()