def _debug_repr_cpd(cpd): import re import utool as ut code_fmt = ut.codeblock( ''' {variable} = pgmpy.factors.TabularCPD( variable={variable_repr}, variable_card={variable_card_repr}, values={get_cpd_repr}, evidence={evidence_repr}, evidence_card={evidence_card_repr}, ) ''') keys = ['variable', 'variable_card', 'values', 'evidence', 'evidence_card'] dict_ = ut.odict(zip(keys, [getattr(cpd, key) for key in keys])) # HACK dict_['values'] = cpd.get_cpd() r = ut.repr2(dict_, explicit=True, nobraces=True, nl=True) print(r) # Parse props that are needed for this fmtstr fmt_keys = [match.groups()[0] for match in re.finditer('{(.*?)}', code_fmt)] need_reprs = [key[:-5] for key in fmt_keys if key.endswith('_repr')] need_keys = [key for key in fmt_keys if not key.endswith('_repr')] # Get corresponding props # Call methods if needbe tmp = [(prop, getattr(cpd, prop)) for prop in need_reprs] tmp = [(x, y()) if ut.is_funclike(y) else (x, y) for (x, y) in tmp] fmtdict = dict(tmp) fmtdict = ut.map_dict_vals(ut.repr2, fmtdict) fmtdict = ut.map_dict_keys(lambda x: x + '_repr', fmtdict) tmp2 = [(prop, getattr(cpd, prop)) for prop in need_keys] fmtdict.update(dict(tmp2)) code = code_fmt.format(**fmtdict) return code
def get_varied_acfg_labels(acfg_list, mainkey='_cfgname', checkname=False): """ >>> from ibeis.expt.annotation_configs import * # NOQA """ #print(ut.list_str(varied_acfg_list, nl=2)) for acfg in acfg_list: assert acfg['qcfg'][mainkey] == acfg['dcfg'][mainkey], ( 'should be the same for now') cfgname_list = [acfg['qcfg'][mainkey] for acfg in acfg_list] if checkname and ut.allsame(cfgname_list): cfgname_list = [None] * len(cfgname_list) # Hack to make common params between q and d appear the same _acfg_list = [compress_aidcfg(acfg) for acfg in acfg_list] flat_acfg_list = flatten_acfg_list(_acfg_list) nonvaried_dict, varied_acfg_list = ut.partition_varied_cfg_list( flat_acfg_list) SUPER_HACK = True if SUPER_HACK: # SUPER HACK, recompress remake the varied list after knowing what is varied _varied_keys = list(set(ut.flatten( [list(ut.flatten( [list(x.keys()) for x in unflatten_acfgdict(cfg).values()] )) for cfg in varied_acfg_list] ))) _acfg_list = [ compress_aidcfg(acfg, force_noncommon=_varied_keys) for acfg in acfg_list] flat_acfg_list = flatten_acfg_list(_acfg_list) nonvaried_dict, varied_acfg_list = ut.partition_varied_cfg_list( flat_acfg_list) shortened_cfg_list = [ #{shorten_to_alias_labels(key): val for key, val in _dict.items()} ut.map_dict_keys(shorten_to_alias_labels, _dict) for _dict in varied_acfg_list] nonlbl_keys = ut.INTERNAL_CFGKEYS nonlbl_keys = [prefix + key for key in nonlbl_keys for prefix in ['', 'q', 'd']] # hack for sorting by q/d stuff first def get_key_order(cfg): keys = [k for k in cfg.keys() if k not in nonlbl_keys] sortorder = [2 * k.startswith('q') + 1 * k.startswith('d') for k in keys] return ut.sortedby(keys, sortorder)[::-1] cfglbl_list = [ ut.get_cfg_lbl(cfg, name, nonlbl_keys, key_order=get_key_order(cfg)) for cfg, name in zip(shortened_cfg_list, cfgname_list)] if checkname: cfglbl_list = [x.lstrip(':') for x in cfglbl_list] return cfglbl_list
def random_test_annot(num_names=5, rng=np.random): """ Create a single test annotation with random properties Args: num_names (int): (default = 5) rng (module): random number generator (default = numpy.random) CommandLine: python -m ibeis.algo.hots.testem random_test_annot --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.testem import * # NOQA >>> num_names = 5 >>> rng = np.random.RandomState(0) >>> result = random_test_annot(num_names, rng) >>> print(result) {u'qual': 1, u'yaw': 0.0, u'nfeats': 1529, u'name': 0, u'view': u'R'} """ from ibeis import constants as const # num_names = 10 valid_names = list(range(num_names)) valid_views = list(const.YAWALIAS.values()) # valid_views.remove(' valid_quals = list(const.QUALITY_INT_TO_TEXT.keys()) ut.delete_list_items(valid_quals, [-1, 0, None]) def sampleone(list_): return ut.random_sample(list_, 1, rng=rng)[0] view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS) case = { 'nfeats': np.clip(rng.normal(1000, 300, size=1)[0], 0, np.inf).astype(np.int), 'name': sampleone(valid_names), 'view': sampleone(valid_views), 'qual': sampleone(valid_quals), } case['yaw'] = view_to_ori[case['view']] return case
def make_test_similarity(test_case): # toy_params = { # True: {'mu': 0.9, 'sigma': .1}, # False: {'mu': 0.1, 'sigma': .4} # } # tau = np.pi * 2 from wbia import constants as const # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS view_to_ori = ut.map_dict_keys( lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS ) # view_to_ori = { # 'F': -1 * tau / 4, # 'L': 0 * tau / 4, # 'B': 1 * tau / 4, # 'R': 2 * tau / 4, # } import vtool as vt nid_list = np.array(ut.dict_take_column(test_case, 'name')) yaw_list = np.array( ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view')) ) rng = np.random.RandomState(0) pmat = [] for idx in range(len(test_case)): nid = nid_list[idx] yaw = yaw_list[idx] p_same = nid == nid_list p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi # estimate noisy measurements p_same_m = np.clip(p_same + rng.normal(0, 0.5, size=len(p_same)), 0, 0.9) p_comp_m = np.clip(p_comp + rng.normal(0, 0.5, size=len(p_comp)), 0, 0.9) # p_same_and_comp = p_same_m * p_comp_m pmat.append(p_same_and_comp) # P = np.array(pmat) P[np.diag_indices(len(P))] = 0 P = P + P.T / 2 P = np.clip(P, 0.01, 0.99) logger.info(ut.hz_str(' P = ', ut.repr2(P, precision=2, max_line_width=140))) return P
def make_test_similarity(test_case): #toy_params = { # True: {'mu': 0.9, 'sigma': .1}, # False: {'mu': 0.1, 'sigma': .4} #} # tau = np.pi * 2 from ibeis import constants as const # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS) # view_to_ori = { # 'F': -1 * tau / 4, # 'L': 0 * tau / 4, # 'B': 1 * tau / 4, # 'R': 2 * tau / 4, # } import vtool as vt nid_list = np.array(ut.dict_take_column(test_case, 'name')) yaw_list = np.array(ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view'))) rng = np.random.RandomState(0) pmat = [] for idx in range(len(test_case)): nid = nid_list[idx] yaw = yaw_list[idx] p_same = nid == nid_list p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi # estimate noisy measurements p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0, .9) p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0, .9) # p_same_and_comp = p_same_m * p_comp_m pmat.append(p_same_and_comp) # P = np.array(pmat) P[np.diag_indices(len(P))] = 0 P = P + P.T / 2 P = np.clip(P, .01, .99) print(ut.hz_str(' P = ', ut.array_repr2(P, precision=2, max_line_width=140))) return P
def _debug_repr_cpd(cpd): import re import utool as ut code_fmt = ut.codeblock(""" {variable} = pgmpy.factors.TabularCPD( variable={variable_repr}, variable_card={variable_card_repr}, values={get_cpd_repr}, evidence={evidence_repr}, evidence_card={evidence_card_repr}, ) """) keys = ['variable', 'variable_card', 'values', 'evidence', 'evidence_card'] dict_ = ut.odict(zip(keys, [getattr(cpd, key) for key in keys])) # HACK dict_['values'] = cpd.get_cpd() r = ut.repr2(dict_, explicit=True, nobraces=True, nl=True) logger.info(r) # Parse props that are needed for this fmtstr fmt_keys = [ match.groups()[0] for match in re.finditer('{(.*?)}', code_fmt) ] need_reprs = [key[:-5] for key in fmt_keys if key.endswith('_repr')] need_keys = [key for key in fmt_keys if not key.endswith('_repr')] # Get corresponding props # Call methods if needbe tmp = [(prop, getattr(cpd, prop)) for prop in need_reprs] tmp = [(x, y()) if ut.is_funclike(y) else (x, y) for (x, y) in tmp] fmtdict = dict(tmp) fmtdict = ut.map_dict_vals(ut.repr2, fmtdict) fmtdict = ut.map_dict_keys(lambda x: x + '_repr', fmtdict) tmp2 = [(prop, getattr(cpd, prop)) for prop in need_keys] fmtdict.update(dict(tmp2)) code = code_fmt.format(**fmtdict) return code
def get_varied_acfg_labels(acfg_list, mainkey='_cfgname', checkname=False): """ >>> from ibeis.expt.annotation_configs import * # NOQA """ #print(ut.list_str(varied_acfg_list, nl=2)) for acfg in acfg_list: assert acfg['qcfg'][mainkey] == acfg['dcfg'][mainkey], ( 'should be the same for now') cfgname_list = [acfg['qcfg'][mainkey] for acfg in acfg_list] if checkname and ut.allsame(cfgname_list): cfgname_list = [None] * len(cfgname_list) # Hack to make common params between q and d appear the same _acfg_list = [compress_aidcfg(acfg) for acfg in acfg_list] flat_acfg_list = flatten_acfg_list(_acfg_list) nonvaried_dict, varied_acfg_list = ut.partition_varied_cfg_list( flat_acfg_list) SUPER_HACK = True if SUPER_HACK: # SUPER HACK, recompress remake the varied list after knowing what is varied _varied_keys = list( set( ut.flatten([ list( ut.flatten([ list(x.keys()) for x in unflatten_acfgdict(cfg).values() ])) for cfg in varied_acfg_list ]))) _acfg_list = [ compress_aidcfg(acfg, force_noncommon=_varied_keys) for acfg in acfg_list ] flat_acfg_list = flatten_acfg_list(_acfg_list) nonvaried_dict, varied_acfg_list = ut.partition_varied_cfg_list( flat_acfg_list) shortened_cfg_list = [ #{shorten_to_alias_labels(key): val for key, val in _dict.items()} ut.map_dict_keys(shorten_to_alias_labels, _dict) for _dict in varied_acfg_list ] nonlbl_keys = ut.INTERNAL_CFGKEYS nonlbl_keys = [ prefix + key for key in nonlbl_keys for prefix in ['', 'q', 'd'] ] # hack for sorting by q/d stuff first def get_key_order(cfg): keys = [k for k in cfg.keys() if k not in nonlbl_keys] sortorder = [ 2 * k.startswith('q') + 1 * k.startswith('d') for k in keys ] return ut.sortedby(keys, sortorder)[::-1] cfglbl_list = [ ut.get_cfg_lbl(cfg, name, nonlbl_keys, key_order=get_key_order(cfg)) for cfg, name in zip(shortened_cfg_list, cfgname_list) ] if checkname: cfglbl_list = [x.lstrip(':') for x in cfglbl_list] return cfglbl_list
def check_database_overlap(ibs1, ibs2): """ CommandLine: python -m wbia.other.dbinfo --test-get_dbinfo:1 --db PZ_MTEST dev.py -t listdbs python -m wbia.dbio.export_subset check_database_overlap --db PZ_MTEST --db2 PZ_MOTHERS CommandLine: python -m wbia.dbio.export_subset check_database_overlap python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_MTEST --db2=PZ_Master0 # NOQA python -m wbia.dbio.export_subset check_database_overlap --db1=NNP_Master3 --db2=PZ_Master0 # NOQA python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_Master0 --db2=GZ_ALL python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_ALL --db2=lewa_grevys python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_FlankHack --db2=PZ_Master1 python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_PB_RF_TRAIN --db2=PZ_Master1 Example: >>> # SCRIPT >>> from wbia.dbio.export_subset import * # NOQA >>> import wbia >>> import utool as ut >>> #ibs1 = wbia.opendb(db='PZ_Master0') >>> #ibs2 = wbia.opendb(dbdir='/raid/work2/Turk/PZ_Master') >>> db1 = ut.get_argval('--db1', str, default='PZ_MTEST') >>> db2 = ut.get_argval('--db2', str, default='testdb1') >>> dbdir1 = ut.get_argval('--dbdir1', str, default=None) >>> dbdir2 = ut.get_argval('--dbdir2', str, default=None) >>> ibs1 = wbia.opendb(db=db1, dbdir=dbdir1) >>> ibs2 = wbia.opendb(db=db2, dbdir=dbdir2) >>> check_database_overlap(ibs1, ibs2) """ import numpy as np def print_isect(items1, items2, lbl=''): set1_ = set(items1) set2_ = set(items2) items_isect = set1_.intersection(set2_) fmtkw1 = dict( part=1, lbl=lbl, num=len(set1_), num_isect=len(items_isect), percent=100 * len(items_isect) / len(set1_), ) fmtkw2 = dict( part=2, lbl=lbl, num=len(set2_), num_isect=len(items_isect), percent=100 * len(items_isect) / len(set2_), ) fmt_a = ' * Num {lbl} {part}: {num_isect} / {num} = {percent:.2f}%' # fmt_b = ' * Num {lbl} isect: {num}' logger.info('Checking {lbl} intersection'.format(lbl=lbl)) logger.info(fmt_a.format(**fmtkw1)) logger.info(fmt_a.format(**fmtkw2)) # logger.info(fmt_b.format(lbl=lbl, num=len(items_isect))) # items = items_isect # list_ = items1 x_list1 = ut.find_list_indexes(items1, items_isect) x_list2 = ut.find_list_indexes(items2, items_isect) return x_list1, x_list2 gids1 = ibs1.images() gids2 = ibs2.images() # Find common images # items1, items2, lbl, = gids1.uuids, gids2.uuids, 'images' gx_list1, gx_list2 = print_isect(gids1.uuids, gids2.uuids, 'images') gids_isect1 = gids1.take(gx_list1) gids_isect2 = gids2.take(gx_list2) assert gids_isect2.uuids == gids_isect1.uuids, 'sequence must be aligned' SHOW_ISECT_GIDS = False if SHOW_ISECT_GIDS: if len(gx_list1) > 0: logger.info('gids_isect1 = %r' % (gids_isect1, )) logger.info('gids_isect2 = %r' % (gids_isect2, )) if False: # Debug code import wbia.viz import wbia.plottool as pt gid_pairs = list(zip(gids_isect1, gids_isect2)) pairs_iter = ut.ichunks(gid_pairs, chunksize=8) for fnum, pairs in enumerate(pairs_iter, start=1): pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) for gid1, gid2 in pairs: wbia.viz.show_image(ibs1, gid1, pnum=pnum_(), fnum=fnum) wbia.viz.show_image(ibs2, gid2, pnum=pnum_(), fnum=fnum) # if False: # aids1 = ibs1.get_valid_aids() # aids2 = ibs2.get_valid_aids() # ibs1.update_annot_visual_uuids(aids1) # ibs2.update_annot_visual_uuids(aids2) # ibs1.update_annot_semantic_uuids(aids1) # ibs2.update_annot_semantic_uuids(aids2) # Check to see which intersecting images have different annotations image_aids_isect1 = gids_isect1.aids image_aids_isect2 = gids_isect2.aids image_avuuids_isect1 = np.array( ibs1.unflat_map(ibs1.get_annot_visual_uuids, image_aids_isect1)) image_avuuids_isect2 = np.array( ibs2.unflat_map(ibs2.get_annot_visual_uuids, image_aids_isect2)) changed_image_xs = np.nonzero( image_avuuids_isect1 != image_avuuids_isect2)[0] if len(changed_image_xs) > 0: logger.info( 'There are %d images with changes in annotation visual information' % (len(changed_image_xs), )) changed_gids1 = ut.take(gids_isect1, changed_image_xs) changed_gids2 = ut.take(gids_isect2, changed_image_xs) SHOW_CHANGED_GIDS = False if SHOW_CHANGED_GIDS: logger.info('gids_isect1 = %r' % (changed_gids2, )) logger.info('gids_isect2 = %r' % (changed_gids1, )) # if False: # # Debug code # import wbia.viz # import wbia.plottool as pt # gid_pairs = list(zip(changed_gids1, changed_gids2)) # pairs_iter = ut.ichunks(gid_pairs, chunksize=8) # for fnum, pairs in enumerate(pairs_iter, start=1): # pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) # for gid1, gid2 in pairs: # wbia.viz.show_image( # ibs1, gid1, pnum=pnum_(), fnum=fnum) # wbia.viz.show_image( # ibs2, gid2, pnum=pnum_(), fnum=fnum) # Check for overlapping annotations (visual info only) in general aids1 = ibs1.annots() aids2 = ibs2.annots() # Check for overlapping annotations (visual + semantic info) in general aux_list1, aux_list2 = print_isect(aids1.uuids, aids2.uuids, 'uuids') avx_list1, avx_list2 = print_isect(aids1.visual_uuids, aids2.visual_uuids, 'vuuids') asx_list1, asx_list2 = print_isect(aids1.semantic_uuids, aids2.semantic_uuids, 'suuids') # Check which images with the same visual uuids have different semantic # uuids changed_ax_list1 = ut.setdiff_ordered(avx_list1, asx_list1) changed_ax_list2 = ut.setdiff_ordered(avx_list2, asx_list2) assert len(changed_ax_list1) == len(changed_ax_list2) assert ut.take(aids1.visual_uuids, changed_ax_list1) == ut.take(aids2.visual_uuids, changed_ax_list2) changed_aids1 = np.array(ut.take(aids1, changed_ax_list1)) changed_aids2 = np.array(ut.take(aids2, changed_ax_list2)) changed_sinfo1 = ibs1.get_annot_semantic_uuid_info(changed_aids1) changed_sinfo2 = ibs2.get_annot_semantic_uuid_info(changed_aids2) sinfo1_arr = np.array(changed_sinfo1) sinfo2_arr = np.array(changed_sinfo2) is_semantic_diff = sinfo2_arr != sinfo1_arr # Inspect semantic differences if np.any(is_semantic_diff): colxs, rowxs = np.nonzero(is_semantic_diff) colx2_rowids = ut.group_items(rowxs, colxs) prop2_rowids = ut.map_dict_keys(changed_sinfo1._fields.__getitem__, colx2_rowids) logger.info('changed_value_counts = ' + ut.repr2(ut.map_dict_vals(len, prop2_rowids))) yawx = changed_sinfo1._fields.index('yaw') # Show change in viewpoints if len(colx2_rowids[yawx]) > 0: vp_category_diff = ibsfuncs.viewpoint_diff( sinfo1_arr[yawx], sinfo2_arr[yawx]).astype(np.float) # Look for category changes # any_diff = np.floor(vp_category_diff) > 0 # _xs = np.nonzero(any_diff)[0] # _aids1 = changed_aids1.take(_xs) # _aids2 = changed_aids2.take(_xs) # Look for significant changes is_significant_diff = np.floor(vp_category_diff) > 1 significant_xs = np.nonzero(is_significant_diff)[0] significant_aids1 = changed_aids1.take(significant_xs) significant_aids2 = changed_aids2.take(significant_xs) logger.info('There are %d significant viewpoint changes' % (len(significant_aids2), )) # vt.ori_distance(sinfo1_arr[yawx], sinfo2_arr[yawx]) # zip(ibs1.get_annot_viewpoint_code(significant_aids1), # ibs2.get_annot_viewpoint_code(significant_aids2)) # logger.info('yawdiff = %r' % ) # if False: # Hack: Apply fixes # good_yaws = ibs2.get_annot_yaws(significant_aids2) # ibs1.set_annot_yaws(significant_aids1, good_yaws) # pass if False: # Debug code import wbia.viz import wbia.plottool as pt # aid_pairs = list(zip(_aids1, _aids2)) aid_pairs = list(zip(significant_aids1, significant_aids2)) pairs_iter = ut.ichunks(aid_pairs, chunksize=8) for fnum, pairs in enumerate(pairs_iter, start=1): pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) for aid1, aid2 in pairs: wbia.viz.show_chip( ibs1, aid1, pnum=pnum_(), fnum=fnum, show_viewcode=True, nokpts=True, ) wbia.viz.show_chip( ibs2, aid2, pnum=pnum_(), fnum=fnum, show_viewcode=True, nokpts=True, ) # nAnnots_per_image1 = np.array(ibs1.get_image_num_annotations(gids1)) nAnnots_per_image2 = np.array(ibs2.get_image_num_annotations(gids2)) # images_without_annots1 = sum(nAnnots_per_image1 == 0) images_without_annots2 = sum(nAnnots_per_image2 == 0) logger.info('images_without_annots1 = %r' % (images_without_annots1, )) logger.info('images_without_annots2 = %r' % (images_without_annots2, )) nAnnots_per_image1
def get_injured_sharks(): """ >>> from wbia.scripts.getshark import * # NOQA """ import requests url = 'http://www.whaleshark.org/getKeywordImages.jsp' resp = requests.get(url) assert resp.status_code == 200 keywords = resp.json()['keywords'] key_list = ut.take_column(keywords, 'indexName') key_to_nice = {k['indexName']: k['readableName'] for k in keywords} injury_patterns = [ 'injury', 'net', 'hook', 'trunc', 'damage', 'scar', 'nicks', 'bite', ] injury_keys = [ key for key in key_list if any([pat in key for pat in injury_patterns]) ] noninjury_keys = ut.setdiff(key_list, injury_keys) injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA key_list = injury_keys keyed_images = {} for key in ut.ProgIter(key_list, lbl='reading index', bs=True): key_url = url + '?indexName={indexName}'.format(indexName=key) key_resp = requests.get(key_url) assert key_resp.status_code == 200 key_imgs = key_resp.json()['images'] keyed_images[key] = key_imgs key_hist = {key: len(imgs) for key, imgs in keyed_images.items()} key_hist = ut.sort_dict(key_hist, 'vals') logger.info(ut.repr3(key_hist)) nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist) nice_key_hist = ut.sort_dict(nice_key_hist, 'vals') logger.info(ut.repr3(nice_key_hist)) key_to_urls = { key: ut.take_column(vals, 'url') for key, vals in keyed_images.items() } overlaps = {} import itertools overlap_img_list = [] for k1, k2 in itertools.combinations(key_to_urls.keys(), 2): overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2]) num_overlap = len(overlap_imgs) overlaps[(k1, k2)] = num_overlap overlaps[(k1, k1)] = len(key_to_urls[k1]) if num_overlap > 0: # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap)) overlap_img_list.extend(overlap_imgs) all_img_urls = list(set(ut.flatten(key_to_urls.values()))) num_all = len(all_img_urls) # NOQA logger.info('num_all = %r' % (num_all, )) # Determine super-categories categories = ['nicks', 'scar', 'trunc'] # Force these keys into these categories key_to_cat = {'scarbite': 'other_injury'} cat_to_keys = ut.ddict(list) for key in key_to_urls.keys(): flag = 1 if key in key_to_cat: cat = key_to_cat[key] cat_to_keys[cat].append(key) continue for cat in categories: if cat in key: cat_to_keys[cat].append(key) flag = 0 if flag: cat = 'other_injury' cat_to_keys[cat].append(key) cat_urls = ut.ddict(list) for cat, keys in cat_to_keys.items(): for key in keys: cat_urls[cat].extend(key_to_urls[key]) cat_hist = {} for cat in list(cat_urls.keys()): cat_urls[cat] = list(set(cat_urls[cat])) cat_hist[cat] = len(cat_urls[cat]) logger.info(ut.repr3(cat_to_keys)) logger.info(ut.repr3(cat_hist)) key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals]) # ingestset = { # '__class__': 'ImageSet', # 'images': ut.ddict(dict) # } # for key, key_imgs in keyed_images.items(): # for imgdict in key_imgs: # url = imgdict['url'] # encid = imgdict['correspondingEncounterNumber'] # # Make structure # encdict = encounters[encid] # encdict['__class__'] = 'Encounter' # imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber']) # imgdict['__class__'] = 'Image' # cat = key_to_cat[key] # annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]} # annotdict['__class__'] = 'Annotation' # # Ensure structures exist # encdict['images'] = encdict.get('images', []) # imgdict['annots'] = imgdict.get('annots', []) # # Add an image to this encounter # encdict['images'].append(imgdict) # # Add an annotation to this image # imgdict['annots'].append(annotdict) # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111 # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,) # resp = requests.get(get_enc_url) # logger.info(ut.repr3(encdict)) # logger.info(ut.repr3(encounters)) # Download the files to the local disk # fpath_list = all_urls = ut.unique( ut.take_column( ut.flatten( ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()), 'url', )) dldir = ut.truepath('~/tmpsharks') from os.path import commonprefix, basename # NOQA prefix = commonprefix(all_urls) suffix_list = [url_[len(prefix):] for url_ in all_urls] fname_list = [suffix.replace('/', '--') for suffix in suffix_list] fpath_list = [] for url, fname in ut.ProgIter(zip(all_urls, fname_list), lbl='downloading imgs', freq=1): fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False) fpath_list.append(fpath) # Make sure we keep orig info # url_to_keys = ut.ddict(list) url_to_info = ut.ddict(dict) for key, imgdict_list in keyed_images.items(): for imgdict in imgdict_list: url = imgdict['url'] info = url_to_info[url] for k, v in imgdict.items(): info[k] = info.get(k, []) info[k].append(v) info['keys'] = info.get('keys', []) info['keys'].append(key) # url_to_keys[url].append(key) info_list = ut.take(url_to_info, all_urls) for info in info_list: if len(set(info['correspondingEncounterNumber'])) > 1: assert False, 'url with two different encounter nums' # Combine duplicate tags hashid_list = [ ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True) ] groupxs = ut.group_indices(hashid_list)[1] # Group properties by duplicate images # groupxs = [g for g in groupxs if len(g) > 1] fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0) url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0) info_list_ = [ ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_)) for info_ in ut.apply_grouping(info_list, groupxs) ] encid_list_ = [ ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_ ] keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_] cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_] clist = ut.ColumnLists({ 'gpath': fpath_list_, 'url': url_list_, 'encid': encid_list_, 'key': keys_list_, 'cat': cats_list_, }) # for info_ in ut.apply_grouping(info_list, groupxs): # info = ut.dict_accum(*info_) # info = ut.map_dict_vals(ut.flatten, info) # x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber'])) # if len(x) > 1: # info = info.copy() # del info['keys'] # logger.info(ut.repr3(info)) flags = ut.lmap(ut.fpath_has_imgext, clist['gpath']) clist = clist.compress(flags) import wbia ibs = wbia.opendb('WS_Injury', allow_newdir=True) gid_list = ibs.add_images(clist['gpath']) clist['gid'] = gid_list failed_flags = ut.flag_None_items(clist['gid']) logger.info('# failed %s' % (sum(failed_flags), )) passed_flags = ut.not_list(failed_flags) clist = clist.compress(passed_flags) ut.assert_all_not_None(clist['gid']) # ibs.get_image_uris_original(clist['gid']) ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True) # ut.zipflat(clist['cat'], clist['key']) if False: # Can run detection instead clist['tags'] = ut.zipflat(clist['cat']) aid_list = ibs.use_images_as_annotations(clist['gid'], adjust_percent=0.01, tags_list=clist['tags']) aid_list import wbia.plottool as pt from wbia import core_annots pt.qt4ensure() # annots = ibs.annots() # aids = [1, 2] # ibs.depc_annot.get('hog', aids , 'hog') # ibs.depc_annot.get('chip', aids, 'img') for aid in ut.InteractiveIter(ibs.get_valid_aids()): hogs = ibs.depc_annot.d.get_hog_hog([aid]) chips = ibs.depc_annot.d.get_chips_img([aid]) chip = chips[0] hogimg = core_annots.make_hog_block_image(hogs[0]) pt.clf() pt.imshow(hogimg, pnum=(1, 2, 1)) pt.imshow(chip, pnum=(1, 2, 2)) fig = pt.gcf() fig.show() fig.canvas.draw() # logger.info(len(groupxs)) # if False: # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values() # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs))) # # FIX # for fpath, fname in zip(fpath_list, fname_list): # if ut.checkpath(fpath): # ut.move(fpath, join(dirname(fpath), fname)) # logger.info('fpath = %r' % (fpath,)) # import wbia # from wbia.dbio import ingest_dataset # dbdir = wbia.sysres.lookup_dbdir('WS_ALL') # self = ingest_dataset.Ingestable2(dbdir) if False: # Show overlap matrix import wbia.plottool as pt import pandas as pd import numpy as np dict_ = overlaps s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps)) df = s.unstack() lhs, rhs = df.align(df.T) df = lhs.add(rhs, fill_value=0).fillna(0) label_texts = df.columns.values def label_ticks(label_texts): import wbia.plottool as pt truncated_labels = [repr(lbl[0:100]) for lbl in label_texts] ax = pt.gca() ax.set_xticks(list(range(len(label_texts)))) ax.set_xticklabels(truncated_labels) [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()] [ lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels() ] # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts))) # pt.plot_surface3d(xgrid, ygrid, disjoint_mat) ax.set_yticks(list(range(len(label_texts)))) ax.set_yticklabels(truncated_labels) [ lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels() ] [ lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels() ] # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()] # df = df.sort(axis=0) # df = df.sort(axis=1) sortx = np.argsort(df.sum(axis=1).values)[::-1] df = df.take(sortx, axis=0) df = df.take(sortx, axis=1) fig = pt.figure(fnum=1) fig.clf() mat = df.values.astype(np.int32) mat[np.diag_indices(len(mat))] = 0 vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max() import matplotlib.colors norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True) pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none') pt.plt.colorbar() pt.plt.grid('off') label_ticks(label_texts) fig.tight_layout() # overlap_df = pd.DataFrame.from_dict(overlap_img_list) class TmpImage(ut.NiceRepr): pass from skimage.feature import hog from skimage import data, color, exposure import wbia.plottool as pt image2 = color.rgb2gray(data.astronaut()) # NOQA fpath = './GOPR1120.JPG' import vtool as vt for fpath in [fpath]: """ http://scikit-image.org/docs/dev/auto_examples/plot_hog.html """ image = vt.imread(fpath, grayscale=True) image = pt.color_funcs.to_base01(image) fig = pt.figure(fnum=2) fd, hog_image = hog( image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualise=True, ) fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) ax1.axis('off') ax1.imshow(image, cmap=pt.plt.cm.gray) ax1.set_title('Input image') ax1.set_adjustable('box-forced') # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) ax2.axis('off') ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray) ax2.set_title('Histogram of Oriented Gradients') ax1.set_adjustable('box-forced') pt.plt.show()
def findcite(): """ prints info about used and unused citations """ tex_fpath_list = testdata_fpaths() citekey_list = find_used_citations(tex_fpath_list) # Find uncited entries #bibtexparser = ut.tryimport('bibtexparser') bib_fpath = 'My_Library_clean.bib' bibtex_str = ut.read_from(bib_fpath) bib_database = bibtexparser.loads(bibtex_str) bibtex_dict = bib_database.get_entry_dict() for key in bibtex_dict.keys(): entry = bibtex_dict[key] entry = ut.map_dict_keys(six.text_type, entry) entry = ut.map_dict_keys(six.text_type.lower, entry) bibtex_dict[key] = entry print('ALL') ignore = ['JP', '?'] citekey_list = ut.setdiff_ordered(sorted(ut.unique(citekey_list)), ignore) #print(ut.indentjoin(citekey_list)) print('len(citekey_list) = %r' % (len(citekey_list), )) unknown_keys = list(set(citekey_list) - set(bibtex_dict.keys())) unused_keys = list(set(bibtex_dict.keys()) - set(citekey_list)) try: if len(unknown_keys) != 0: print('\nUNKNOWN KEYS:') print(ut.list_str(unknown_keys)) raise AssertionError('unknown keys') except AssertionError as ex: ut.printex(ex, iswarning=True, keys=['unknown_keys']) @ut.argv_flag_dec(indent=' ') def close_keys(): if len(unknown_keys) > 0: bibtex_dict.keys() print('\nDid you mean:') for key in unknown_keys: print('---') print(key) print(ut.closet_words(key, bibtex_dict.keys(), 3)) print('L___') else: print('no unkown keys') close_keys() @ut.argv_flag_dec(indent=' ') def print_unused(): print(ut.indentjoin(ut.sortedby(unused_keys, map(len, unused_keys)))) print('len(unused_keys) = %r' % (len(unused_keys), )) print_unused() all_authors = [] for key in bibtex_dict.keys(): entry = bibtex_dict[key] toremove = ['author', '{', '}', r'\\textbackslash'] author = ut.multi_replace(entry.get('author', ''), toremove, '') authors = author.split(' and ') all_authors.extend(authors) @ut.argv_flag_dec(indent=' ') def author_hist(): #print(all_authors) hist_ = ut.dict_hist(all_authors, ordered=True) hist_[''] = None del hist_[''] print('Author histogram') print(ut.dict_str(hist_)[-1000:]) author_hist() @ut.argv_flag_dec(indent=' ') def unused_important(): important_authors = [ 'hinton', 'chum', 'Jegou', 'zisserman', 'schmid', 'sivic', 'matas', 'lowe', 'perronnin', 'douze', ] for key in unused_keys: entry = bibtex_dict[key] author = entry.get('author', '') #authors = author.split(' and ') hasimportant = any(auth in author.lower() for auth in important_authors) if hasimportant or 'smk' in str(entry).lower(): toremove = [ 'note', 'month', 'type', 'pages', 'urldate', 'language', 'volume', 'number', 'publisher' ] entry = ut.delete_dict_keys(entry, toremove) print( ut.dict_str(entry, strvals=True, key_order=['title', 'author', 'id'])) unused_important()