def _make_anygroup_hashes(annots, nids): """ helper function import wbia qreq_ = wbia.testdata_qreq_( defaultdb='PZ_MTEST', qaid_override=[1, 2, 3, 4, 5, 6, 10, 11], daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24], ) import wbia qreq_ = wbia.testdata_qreq_(defaultdb='PZ_Master1') %timeit qreq_._make_namegroup_data_hashes() %timeit qreq_._make_namegroup_data_uuids() """ # make sure items are sorted to ensure same assignment # gives same uuids # annots = qreq_.ibs.annots(sorted(qreq_.daids)) unique_nids, groupxs = vt.group_indices(nids) grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs) group_hashes = [ ut.combine_hashes(sorted(u.bytes for u in uuids), hasher=hashlib.sha1()) for uuids in grouped_visual_uuids ] nid_to_grouphash = dict(zip(unique_nids, group_hashes)) return nid_to_grouphash
def get_match_results(depc, qaid_list, daid_list, score_list, config): """ converts table results into format for ipython notebook """ #qaid_list, daid_list = request.get_parent_rowids() #score_list = request.score_list #config = request.config unique_qaids, groupxs = ut.group_indices(qaid_list) #grouped_qaids_list = ut.apply_grouping(qaid_list, groupxs) grouped_daids = ut.apply_grouping(daid_list, groupxs) grouped_scores = ut.apply_grouping(score_list, groupxs) ibs = depc.controller unique_qnids = ibs.get_annot_nids(unique_qaids) # FIXME: decision should not be part of the config for the one-vs-one # scores decision_func = getattr(np, config['decision']) _iter = zip(unique_qaids, unique_qnids, grouped_daids, grouped_scores) for qaid, qnid, daids, scores in _iter: dnids = ibs.get_annot_nids(daids) # Remove distance to self annot_scores = np.array(scores) daid_list_ = np.array(daids) dnid_list_ = np.array(dnids) is_valid = (daid_list_ != qaid) daid_list_ = daid_list_.compress(is_valid) dnid_list_ = dnid_list_.compress(is_valid) annot_scores = annot_scores.compress(is_valid) # Hacked in version of creating an annot match object match_result = ibeis.AnnotMatch() match_result.qaid = qaid match_result.qnid = qnid match_result.daid_list = daid_list_ match_result.dnid_list = dnid_list_ match_result._update_daid_index() match_result._update_unique_nid_index() grouped_annot_scores = vt.apply_grouping(annot_scores, match_result.name_groupxs) name_scores = np.array( [decision_func(dists) for dists in grouped_annot_scores]) match_result.set_cannonical_name_score(annot_scores, name_scores) yield match_result
def get_match_results(depc, qaid_list, daid_list, score_list, config): """ converts table results into format for ipython notebook """ #qaid_list, daid_list = request.get_parent_rowids() #score_list = request.score_list #config = request.config unique_qaids, groupxs = ut.group_indices(qaid_list) #grouped_qaids_list = ut.apply_grouping(qaid_list, groupxs) grouped_daids = ut.apply_grouping(daid_list, groupxs) grouped_scores = ut.apply_grouping(score_list, groupxs) ibs = depc.controller unique_qnids = ibs.get_annot_nids(unique_qaids) # FIXME: decision should not be part of the config for the one-vs-one # scores decision_func = getattr(np, config['decision']) _iter = zip(unique_qaids, unique_qnids, grouped_daids, grouped_scores) for qaid, qnid, daids, scores in _iter: dnids = ibs.get_annot_nids(daids) # Remove distance to self annot_scores = np.array(scores) daid_list_ = np.array(daids) dnid_list_ = np.array(dnids) is_valid = (daid_list_ != qaid) daid_list_ = daid_list_.compress(is_valid) dnid_list_ = dnid_list_.compress(is_valid) annot_scores = annot_scores.compress(is_valid) # Hacked in version of creating an annot match object match_result = ibeis.AnnotMatch() match_result.qaid = qaid match_result.qnid = qnid match_result.daid_list = daid_list_ match_result.dnid_list = dnid_list_ match_result._update_daid_index() match_result._update_unique_nid_index() grouped_annot_scores = vt.apply_grouping(annot_scores, match_result.name_groupxs) name_scores = np.array([decision_func(dists) for dists in grouped_annot_scores]) match_result.set_cannonical_name_score(annot_scores, name_scores) yield match_result
def consolodate_duplicates(self): fnames = map(basename, self.rel_fpath_list) duplicate_map = ut.find_duplicate_items(fnames) groups = [] for dupname, idxs in duplicate_map.items(): uuids = self.get_prop('uuids', idxs) unique_uuids, groupxs = ut.group_indices(uuids) groups.extend(ut.apply_grouping(idxs, groupxs)) multitons = [g for g in groups if len(g) > 1] # singletons = [g for g in groups if len(g) <= 1] ut.unflat_take(list(self.fpaths()), multitons)
def as_parts(self): if self.parts is not None: return self.parts text = self.as_text() top, header, mid, bot = split_tabular(text) colfmt = self._rectify_colfmt() if colfmt is not None: top = '\\begin{tabular}{%s}' % (colfmt, ) if self.theadify: import textwrap width = self.theadify wrapper = textwrap.TextWrapper(width=width, break_long_words=False) header_lines = header.split('\n') new_lines = [] for line in header_lines: line = line.rstrip('\\') headers = [h.strip() for h in line.split('&')] headers = ['\\\\'.join(wrapper.wrap(h)) for h in headers] headers = [ h if h == '{}' else '\\thead{' + h + '}' for h in headers ] line = ' & '.join(headers) + '\\\\' new_lines.append(line) new_header = '\n'.join(new_lines) header = new_header if True: groupxs = self.groupxs # Put midlines between multi index levels if groupxs is None and isinstance(self._data, pd.DataFrame): index = self._data.index if len(index.names) == 2 and len(mid) == 1: groupxs = ut.group_indices(index.labels[0])[1] # part = '\n\multirow{%d}{*}{%s}\n' % (len(chunk), key,) # part += '\n'.join(['& ' + c for c in chunk]) if groupxs is not None: bodylines = mid[0].split('\n') mid = ut.apply_grouping(bodylines, groupxs) parts = (top, header, mid, bot) return parts
def stratified_label_shuffle_split(y, labels, fractions, y_idx=None, rng=None): """ modified from sklearn to make n splits instaed of 2. Also enforces that labels are not broken into separate groups. Args: y (ndarray): labels labels (?): fractions (?): rng (RandomState): random number generator(default = None) Returns: ?: index_sets CommandLine: python -m ibeis_cnn.dataset stratified_label_shuffle_split --show Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.dataset import * # NOQA >>> y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7] >>> fractions = [.7, .3] >>> rng = np.random.RandomState(0) >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng) """ rng = ut.ensure_rng(rng) #orig_y = y unique_labels, groupxs = ut.group_indices(labels) grouped_ys = ut.apply_grouping(y, groupxs) # Assign each group a probabilistic class unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys] # TODO: should weight the following selection based on size of group #class_weights = [ut.dict_hist(ys) for ys in grouped_ys] unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng) index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs] if y_idx is not None: # These indicies subindex into parent set of indicies index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets] return index_sets
def _make_test_folds(self, X, y=None, groups=None): """ Args: self (?): X (ndarray): data y (ndarray): labels(default = None) groups (None): (default = None) Returns: ?: test_folds CommandLine: python -m ibeis.algo.verif.sklearn_utils _make_test_folds Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.verif.sklearn_utils import * # NOQA >>> import utool as ut >>> rng = ut.ensure_rng(0) >>> groups = [1, 1, 3, 4, 2, 2, 7, 8, 8] >>> y = [1, 1, 1, 1, 2, 2, 2, 3, 3] >>> X = np.empty((len(y), 0)) >>> self = StratifiedGroupKFold(random_state=rng) >>> skf_list = list(self.split(X=X, y=y, groups=groups)) """ # if self.shuffle: # rng = check_random_state(self.random_state) # else: # rng = self.random_state n_splits = self.n_splits y = np.asarray(y) n_samples = y.shape[0] import utool as ut # y_counts = bincount(y_inversed) # min_classes_ = np.min(y_counts) # if np.all(self.n_splits > y_counts): # raise ValueError("All the n_groups for individual classes" # " are less than n_splits=%d." # % (self.n_splits)) # if self.n_splits > min_classes_: # warnings.warn(("The least populated class in y has only %d" # " members, which is too few. The minimum" # " number of groups for any class cannot" # " be less than n_splits=%d." # % (min_classes_, self.n_splits)), Warning) unique_y, y_inversed = np.unique(y, return_inverse=True) n_classes = max(unique_y) + 1 unique_groups, group_idxs = ut.group_indices(groups) # grouped_ids = list(grouping.keys()) grouped_y = ut.apply_grouping(y, group_idxs) grouped_y_counts = np.array([ bincount(y_, minlength=n_classes) for y_ in grouped_y]) target_freq = grouped_y_counts.sum(axis=0) target_ratio = target_freq / target_freq.sum() # Greedilly choose the split assignment that minimizes the local # * squared differences in target from actual frequencies # * and best equalizes the number of items per fold # Distribute groups with most members first split_freq = np.zeros((n_splits, n_classes)) # split_ratios = split_freq / split_freq.sum(axis=1) split_ratios = np.ones(split_freq.shape) / split_freq.shape[1] split_diffs = ((split_freq - target_ratio) ** 2).sum(axis=1) sortx = np.argsort(grouped_y_counts.sum(axis=1))[::-1] grouped_splitx = [] for count, group_idx in enumerate(sortx): # print('---------\n') group_freq = grouped_y_counts[group_idx] cand_freq = split_freq + group_freq cand_ratio = cand_freq / cand_freq.sum(axis=1)[:, None] cand_diffs = ((cand_ratio - target_ratio) ** 2).sum(axis=1) # Compute loss losses = [] # others = np.nan_to_num(split_diffs) other_diffs = np.array([ sum(split_diffs[x + 1:]) + sum(split_diffs[:x]) for x in range(n_splits) ]) # penalize unbalanced splits ratio_loss = other_diffs + cand_diffs # penalize heavy splits freq_loss = split_freq.sum(axis=1) freq_loss = freq_loss / freq_loss.sum() losses = ratio_loss + freq_loss # print('group_freq = %r' % (group_freq,)) # print('freq_loss = %s' % (ut.repr2(freq_loss, precision=2),)) # print('ratio_loss = %s' % (ut.repr2(ratio_loss, precision=2),)) #------- splitx = np.argmin(losses) # print('losses = %r, splitx=%r' % (losses, splitx)) split_freq[splitx] = cand_freq[splitx] split_ratios[splitx] = cand_ratio[splitx] split_diffs[splitx] = cand_diffs[splitx] grouped_splitx.append(splitx) # if count > 4: # break # else: # print('split_freq = \n' + # ut.repr2(split_freq, precision=2, suppress_small=True)) # print('target_ratio = \n' + # ut.repr2(target_ratio, precision=2, suppress_small=True)) # print('split_ratios = \n' + # ut.repr2(split_ratios, precision=2, suppress_small=True)) # print(ut.dict_hist(grouped_splitx)) # final_ratio_loss = ((split_ratios - target_ratio) ** 2).sum(axis=1) # print('split_freq = \n' + # ut.repr2(split_freq, precision=3, suppress_small=True)) # print('target_ratio = \n' + # ut.repr2(target_ratio, precision=3, suppress_small=True)) # print('split_ratios = \n' + # ut.repr2(split_ratios, precision=3, suppress_small=True)) # print(ut.dict_hist(grouped_splitx)) test_folds = np.empty(n_samples, dtype=np.int) for group_idx, splitx in zip(sortx, grouped_splitx): idxs = group_idxs[group_idx] test_folds[idxs] = splitx return test_folds
def stratified_kfold_label_split(y, labels, n_folds=2, y_idx=None, rng=None): """ Also enforces that labels are not broken into separate groups. Args: y (ndarray): labels labels (?): y_idx (array): indexes associated with y if it was already presampled rng (RandomState): random number generator(default = None) Returns: ?: index_sets CommandLine: python -m ibeis_cnn.dataset stratified_label_shuffle_split --show Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.dataset import * # NOQA >>> y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7] >>> fractions = [.7, .3] >>> rng = np.random.RandomState(0) >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng) """ rng = ut.ensure_rng(rng) #orig_y = y unique_labels, groupxs = ut.group_indices(labels) grouped_ys = ut.apply_grouping(y, groupxs) # Assign each group a probabilistic class unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys] # TODO: should weight the following selection based on size of group #class_weights = [ut.dict_hist(ys) for ys in grouped_ys] import sklearn.cross_validation xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng) skf = sklearn.cross_validation.StratifiedKFold(unique_ys, **xvalkw) _iter = skf folded_index_sets = [] for label_idx_set in _iter: index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in label_idx_set] folded_index_sets.append(index_sets) for train_idx, test_idx in folded_index_sets: train_labels = set(ut.take(labels, train_idx)) test_labels = set(ut.take(labels, test_idx)) assert len(test_labels.intersection(train_labels)) == 0, 'same labels appeared in both train and test' pass if y_idx is not None: # These indicies subindex into parent set of indicies folded_index_sets2 = [] for index_sets in folded_index_sets: index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets] folded_index_sets2.append(index_sets) folded_index_sets = folded_index_sets2 #import sklearn.model_selection #skf = sklearn.model_selection.StratifiedKFold(**xvalkw) #_iter = skf.split(X=np.empty(len(target)), y=target) #unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng) #index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs] #if idx is not None: # # These indicies subindex into parent set of indicies # index_sets = [np.take(idx, idxs, axis=0) for idxs in index_sets] return folded_index_sets
def merge_level_order(level_orders, topsort): """ Merge orders of individual subtrees into a total ordering for computation. >>> level_orders = { >>> 'multi_chip_multitest': [['dummy_annot'], ['chip'], ['multitest'], >>> ['multitest_score'], ], >>> 'multi_fgweight_multitest': [ ['dummy_annot'], ['chip', 'probchip'], >>> ['keypoint'], ['fgweight'], ['multitest'], ['multitest_score'], ], >>> 'multi_keypoint_nnindexer': [ ['dummy_annot'], ['chip'], ['keypoint'], >>> ['nnindexer'], ['multitest'], ['multitest_score'], ], >>> 'normal': [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'], >>> ['fgweight'], ['spam'], ['multitest'], ['multitest_score'], ], >>> 'nwise_notch_multitest_1': [ ['dummy_annot'], ['notch'], ['multitest'], >>> ['multitest_score'], ], >>> 'nwise_notch_multitest_2': [ ['dummy_annot'], ['notch'], ['multitest'], >>> ['multitest_score'], ], >>> 'nwise_notch_notchpair_1': [ ['dummy_annot'], ['notch'], ['notchpair'], >>> ['multitest'], ['multitest_score'], ], >>> 'nwise_notch_notchpair_2': [ ['dummy_annot'], ['notch'], ['notchpair'], >>> ['multitest'], ['multitest_score'], ], >>> } >>> topsort = [u'dummy_annot', u'notch', u'probchip', u'chip', u'keypoint', >>> u'fgweight', u'nnindexer', u'spam', u'notchpair', u'multitest', >>> u'multitest_score'] >>> print(ut.repr3(ut.merge_level_order(level_orders, topsort))) EG2: level_orders = {u'normal': [[u'dummy_annot'], [u'chip', u'probchip'], [u'keypoint'], [u'fgweight'], [u'spam']]} topsort = [u'dummy_annot', u'probchip', u'chip', u'keypoint', u'fgweight', u'spam'] """ import utool as ut if False: compute_order = [] level_orders = ut.map_dict_vals(ut.total_flatten, level_orders) level_sets = ut.map_dict_vals(set, level_orders) for tablekey in topsort: compute_order.append((tablekey, [groupkey for groupkey, set_ in level_sets.items() if tablekey in set_])) return compute_order else: # Do on common subgraph import itertools # Pointer to current level.: Start at the end and # then work your way up. main_ptr = len(topsort) - 1 stack = [] #from six.moves import zip_longest keys = list(level_orders.keys()) type_to_ptr = {key: -1 for key in keys} print('level_orders = %s' % (ut.repr3(level_orders),)) for count in itertools.count(0): print('----') print('count = %r' % (count,)) ptred_levels = [] for key in keys: levels = level_orders[key] ptr = type_to_ptr[key] try: level = tuple(levels[ptr]) except IndexError: level = None ptred_levels.append(level) print('ptred_levels = %r' % (ptred_levels,)) print('main_ptr = %r' % (main_ptr,)) # groupkeys, groupxs = ut.group_indices(ptred_levels) # Group keys are tablenames # They point to the (type) of the input # num_levelkeys = len(ut.total_flatten(ptred_levels)) groupkeys, groupxs = ut.group_indices(ptred_levels) main_idx = None while main_idx is None and main_ptr >= 0: target = topsort[main_ptr] print('main_ptr = %r' % (main_ptr,)) print('target = %r' % (target,)) # main_idx = ut.listfind(groupkeys, (target,)) # if main_idx is None: possible_idxs = [idx for idx, keytup in enumerate(groupkeys) if keytup is not None and target in keytup] if len(possible_idxs) == 1: main_idx = possible_idxs[0] else: main_idx = None if main_idx is None: main_ptr -= 1 if main_idx is None: print('break I') break found_groups = ut.apply_grouping(keys, groupxs)[main_idx] print('found_groups = %r' % (found_groups,)) stack.append((target, found_groups)) for k in found_groups: type_to_ptr[k] -= 1 if len(found_groups) == len(keys): main_ptr -= 1 if main_ptr < 0: print('break E') break print('stack = %s' % (ut.repr3(stack),)) print('have = %r' % (sorted(ut.take_column(stack, 0)),)) print('need = %s' % (sorted(ut.total_flatten(level_orders.values())),)) compute_order = stack[::-1] return compute_order
def get_injured_sharks(): """ >>> from wbia.scripts.getshark import * # NOQA """ import requests url = 'http://www.whaleshark.org/getKeywordImages.jsp' resp = requests.get(url) assert resp.status_code == 200 keywords = resp.json()['keywords'] key_list = ut.take_column(keywords, 'indexName') key_to_nice = {k['indexName']: k['readableName'] for k in keywords} injury_patterns = [ 'injury', 'net', 'hook', 'trunc', 'damage', 'scar', 'nicks', 'bite', ] injury_keys = [ key for key in key_list if any([pat in key for pat in injury_patterns]) ] noninjury_keys = ut.setdiff(key_list, injury_keys) injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA key_list = injury_keys keyed_images = {} for key in ut.ProgIter(key_list, lbl='reading index', bs=True): key_url = url + '?indexName={indexName}'.format(indexName=key) key_resp = requests.get(key_url) assert key_resp.status_code == 200 key_imgs = key_resp.json()['images'] keyed_images[key] = key_imgs key_hist = {key: len(imgs) for key, imgs in keyed_images.items()} key_hist = ut.sort_dict(key_hist, 'vals') logger.info(ut.repr3(key_hist)) nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist) nice_key_hist = ut.sort_dict(nice_key_hist, 'vals') logger.info(ut.repr3(nice_key_hist)) key_to_urls = { key: ut.take_column(vals, 'url') for key, vals in keyed_images.items() } overlaps = {} import itertools overlap_img_list = [] for k1, k2 in itertools.combinations(key_to_urls.keys(), 2): overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2]) num_overlap = len(overlap_imgs) overlaps[(k1, k2)] = num_overlap overlaps[(k1, k1)] = len(key_to_urls[k1]) if num_overlap > 0: # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap)) overlap_img_list.extend(overlap_imgs) all_img_urls = list(set(ut.flatten(key_to_urls.values()))) num_all = len(all_img_urls) # NOQA logger.info('num_all = %r' % (num_all, )) # Determine super-categories categories = ['nicks', 'scar', 'trunc'] # Force these keys into these categories key_to_cat = {'scarbite': 'other_injury'} cat_to_keys = ut.ddict(list) for key in key_to_urls.keys(): flag = 1 if key in key_to_cat: cat = key_to_cat[key] cat_to_keys[cat].append(key) continue for cat in categories: if cat in key: cat_to_keys[cat].append(key) flag = 0 if flag: cat = 'other_injury' cat_to_keys[cat].append(key) cat_urls = ut.ddict(list) for cat, keys in cat_to_keys.items(): for key in keys: cat_urls[cat].extend(key_to_urls[key]) cat_hist = {} for cat in list(cat_urls.keys()): cat_urls[cat] = list(set(cat_urls[cat])) cat_hist[cat] = len(cat_urls[cat]) logger.info(ut.repr3(cat_to_keys)) logger.info(ut.repr3(cat_hist)) key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals]) # ingestset = { # '__class__': 'ImageSet', # 'images': ut.ddict(dict) # } # for key, key_imgs in keyed_images.items(): # for imgdict in key_imgs: # url = imgdict['url'] # encid = imgdict['correspondingEncounterNumber'] # # Make structure # encdict = encounters[encid] # encdict['__class__'] = 'Encounter' # imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber']) # imgdict['__class__'] = 'Image' # cat = key_to_cat[key] # annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]} # annotdict['__class__'] = 'Annotation' # # Ensure structures exist # encdict['images'] = encdict.get('images', []) # imgdict['annots'] = imgdict.get('annots', []) # # Add an image to this encounter # encdict['images'].append(imgdict) # # Add an annotation to this image # imgdict['annots'].append(annotdict) # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111 # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,) # resp = requests.get(get_enc_url) # logger.info(ut.repr3(encdict)) # logger.info(ut.repr3(encounters)) # Download the files to the local disk # fpath_list = all_urls = ut.unique( ut.take_column( ut.flatten( ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()), 'url', )) dldir = ut.truepath('~/tmpsharks') from os.path import commonprefix, basename # NOQA prefix = commonprefix(all_urls) suffix_list = [url_[len(prefix):] for url_ in all_urls] fname_list = [suffix.replace('/', '--') for suffix in suffix_list] fpath_list = [] for url, fname in ut.ProgIter(zip(all_urls, fname_list), lbl='downloading imgs', freq=1): fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False) fpath_list.append(fpath) # Make sure we keep orig info # url_to_keys = ut.ddict(list) url_to_info = ut.ddict(dict) for key, imgdict_list in keyed_images.items(): for imgdict in imgdict_list: url = imgdict['url'] info = url_to_info[url] for k, v in imgdict.items(): info[k] = info.get(k, []) info[k].append(v) info['keys'] = info.get('keys', []) info['keys'].append(key) # url_to_keys[url].append(key) info_list = ut.take(url_to_info, all_urls) for info in info_list: if len(set(info['correspondingEncounterNumber'])) > 1: assert False, 'url with two different encounter nums' # Combine duplicate tags hashid_list = [ ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True) ] groupxs = ut.group_indices(hashid_list)[1] # Group properties by duplicate images # groupxs = [g for g in groupxs if len(g) > 1] fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0) url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0) info_list_ = [ ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_)) for info_ in ut.apply_grouping(info_list, groupxs) ] encid_list_ = [ ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_ ] keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_] cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_] clist = ut.ColumnLists({ 'gpath': fpath_list_, 'url': url_list_, 'encid': encid_list_, 'key': keys_list_, 'cat': cats_list_, }) # for info_ in ut.apply_grouping(info_list, groupxs): # info = ut.dict_accum(*info_) # info = ut.map_dict_vals(ut.flatten, info) # x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber'])) # if len(x) > 1: # info = info.copy() # del info['keys'] # logger.info(ut.repr3(info)) flags = ut.lmap(ut.fpath_has_imgext, clist['gpath']) clist = clist.compress(flags) import wbia ibs = wbia.opendb('WS_Injury', allow_newdir=True) gid_list = ibs.add_images(clist['gpath']) clist['gid'] = gid_list failed_flags = ut.flag_None_items(clist['gid']) logger.info('# failed %s' % (sum(failed_flags), )) passed_flags = ut.not_list(failed_flags) clist = clist.compress(passed_flags) ut.assert_all_not_None(clist['gid']) # ibs.get_image_uris_original(clist['gid']) ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True) # ut.zipflat(clist['cat'], clist['key']) if False: # Can run detection instead clist['tags'] = ut.zipflat(clist['cat']) aid_list = ibs.use_images_as_annotations(clist['gid'], adjust_percent=0.01, tags_list=clist['tags']) aid_list import wbia.plottool as pt from wbia import core_annots pt.qt4ensure() # annots = ibs.annots() # aids = [1, 2] # ibs.depc_annot.get('hog', aids , 'hog') # ibs.depc_annot.get('chip', aids, 'img') for aid in ut.InteractiveIter(ibs.get_valid_aids()): hogs = ibs.depc_annot.d.get_hog_hog([aid]) chips = ibs.depc_annot.d.get_chips_img([aid]) chip = chips[0] hogimg = core_annots.make_hog_block_image(hogs[0]) pt.clf() pt.imshow(hogimg, pnum=(1, 2, 1)) pt.imshow(chip, pnum=(1, 2, 2)) fig = pt.gcf() fig.show() fig.canvas.draw() # logger.info(len(groupxs)) # if False: # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values() # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs))) # # FIX # for fpath, fname in zip(fpath_list, fname_list): # if ut.checkpath(fpath): # ut.move(fpath, join(dirname(fpath), fname)) # logger.info('fpath = %r' % (fpath,)) # import wbia # from wbia.dbio import ingest_dataset # dbdir = wbia.sysres.lookup_dbdir('WS_ALL') # self = ingest_dataset.Ingestable2(dbdir) if False: # Show overlap matrix import wbia.plottool as pt import pandas as pd import numpy as np dict_ = overlaps s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps)) df = s.unstack() lhs, rhs = df.align(df.T) df = lhs.add(rhs, fill_value=0).fillna(0) label_texts = df.columns.values def label_ticks(label_texts): import wbia.plottool as pt truncated_labels = [repr(lbl[0:100]) for lbl in label_texts] ax = pt.gca() ax.set_xticks(list(range(len(label_texts)))) ax.set_xticklabels(truncated_labels) [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()] [ lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels() ] # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts))) # pt.plot_surface3d(xgrid, ygrid, disjoint_mat) ax.set_yticks(list(range(len(label_texts)))) ax.set_yticklabels(truncated_labels) [ lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels() ] [ lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels() ] # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()] # df = df.sort(axis=0) # df = df.sort(axis=1) sortx = np.argsort(df.sum(axis=1).values)[::-1] df = df.take(sortx, axis=0) df = df.take(sortx, axis=1) fig = pt.figure(fnum=1) fig.clf() mat = df.values.astype(np.int32) mat[np.diag_indices(len(mat))] = 0 vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max() import matplotlib.colors norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True) pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none') pt.plt.colorbar() pt.plt.grid('off') label_ticks(label_texts) fig.tight_layout() # overlap_df = pd.DataFrame.from_dict(overlap_img_list) class TmpImage(ut.NiceRepr): pass from skimage.feature import hog from skimage import data, color, exposure import wbia.plottool as pt image2 = color.rgb2gray(data.astronaut()) # NOQA fpath = './GOPR1120.JPG' import vtool as vt for fpath in [fpath]: """ http://scikit-image.org/docs/dev/auto_examples/plot_hog.html """ image = vt.imread(fpath, grayscale=True) image = pt.color_funcs.to_base01(image) fig = pt.figure(fnum=2) fd, hog_image = hog( image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualise=True, ) fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) ax1.axis('off') ax1.imshow(image, cmap=pt.plt.cm.gray) ax1.set_title('Input image') ax1.set_adjustable('box-forced') # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) ax2.axis('off') ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray) ax2.set_title('Histogram of Oriented Gradients') ax1.set_adjustable('box-forced') pt.plt.show()
def get_query_result_info(qreq_): """ Helper function. Runs queries of a specific configuration returns the best rank of each query Args: qaids (list) : query annotation ids daids (list) : database annotation ids Returns: qx2_bestranks CommandLine: python -m ibeis.expt.harness --test-get_query_result_info python -m ibeis.expt.harness --test-get_query_result_info:0 python -m ibeis.expt.harness --test-get_query_result_info:1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5']) >>> #ibs = ibeis.opendb('PZ_MTEST') >>> #qaids = ibs.get_valid_aids()[0:3] >>> #daids = ibs.get_valid_aids()[0:5] >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> #cfgdict = dict(codename='vsone') >>> # ibs.cfg.query_cfg.codename = 'vsone' >>> qaids = ibs.get_valid_aids()[0:3] >>> daids = ibs.get_valid_aids()[0:5] >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Ignore: ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW --show --debug-depc ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc --clear-all-depcache """ try: ibs = qreq_.ibs except AttributeError: ibs = qreq_.depc.controller import vtool as vt cm_list = qreq_.execute() #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False) qx2_cm = cm_list qaids = qreq_.qaids #qaids2 = [cm.qaid for cm in cm_list] qnids = ibs.get_annot_name_rowids(qaids) import utool with utool.embed_on_exception_context: unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids)) unique_qnids, groupxs = vt.group_indices(qnids) cm_group_list = ut.apply_grouping(cm_list, groupxs) qnid2_aggnamescores = {} qnx2_nameres_info = [] #import utool #utool.embed() # Ranked list aggregation-ish nameres_info_list = [] for qnid, cm_group in zip(unique_qnids, cm_group_list): nid2_name_score_group = [ dict([(nid, cm.name_score_list[nidx]) for nid, nidx in cm.nid2_nidx.items()]) for cm in cm_group ] aligned_name_scores = np.array([ ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf) for nid2_name_score in nid2_name_score_group ]).T name_score_list = np.nanmax(aligned_name_scores, axis=1) qnid2_aggnamescores[qnid] = name_score_list # sort sortx = name_score_list.argsort()[::-1] sorted_namescores = name_score_list[sortx] sorted_dnids = unique_dnids[sortx] ## infer agg name results is_positive = sorted_dnids == qnid is_negative = np.logical_and(~is_positive, sorted_dnids > 0) gt_name_rank = None if not np.any(is_positive) else np.where( is_positive)[0][0] gf_name_rank = None if not np.any(is_negative) else np.nonzero( is_negative)[0][0] gt_nid = sorted_dnids[gt_name_rank] gf_nid = sorted_dnids[gf_name_rank] gt_name_score = sorted_namescores[gt_name_rank] gf_name_score = sorted_namescores[gf_name_rank] qnx2_nameres_info = {} qnx2_nameres_info['qnid'] = qnid qnx2_nameres_info['gt_nid'] = gt_nid qnx2_nameres_info['gf_nid'] = gf_nid qnx2_nameres_info['gt_name_rank'] = gt_name_rank qnx2_nameres_info['gf_name_rank'] = gf_name_rank qnx2_nameres_info['gt_name_score'] = gt_name_score qnx2_nameres_info['gf_name_score'] = gf_name_score nameres_info_list.append(qnx2_nameres_info) nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_') qaids = qreq_.qaids daids = qreq_.daids qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids) # Get the groundtruth ranks and accuracy measures qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm] cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_') #for key in qx2_qresinfo[0].keys(): # 'qx2_' + key # ut.get_list_column(qx2_qresinfo, key) if False: qx2_avepercision = np.array([ cm.get_average_percision(ibs=ibs, gt_aids=gt_aids) for (cm, gt_aids) in zip(qx2_cm, qx2_gtaids) ]) cfgres_info['qx2_avepercision'] = qx2_avepercision # Compute mAP score # TODO: use mAP score # (Actually map score doesn't make much sense if using name scoring #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean() # NOQA cfgres_info['qx2_bestranks'] = ut.replace_nones( cfgres_info['qx2_bestranks'], -1) cfgres_info.update(nameres_info) return cfgres_info
def get_query_result_info(qreq_): """ Helper function. Runs queries of a specific configuration returns the best rank of each query Args: qaids (list) : query annotation ids daids (list) : database annotation ids Returns: qx2_bestranks CommandLine: python -m ibeis.expt.harness --test-get_query_result_info python -m ibeis.expt.harness --test-get_query_result_info:0 python -m ibeis.expt.harness --test-get_query_result_info:1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5']) >>> #ibs = ibeis.opendb('PZ_MTEST') >>> #qaids = ibs.get_valid_aids()[0:3] >>> #daids = ibs.get_valid_aids()[0:5] >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> #cfgdict = dict(codename='vsone') >>> # ibs.cfg.query_cfg.codename = 'vsone' >>> qaids = ibs.get_valid_aids()[0:3] >>> daids = ibs.get_valid_aids()[0:5] >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Ignore: ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW --show --debug-depc ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc --clear-all-depcache """ try: ibs = qreq_.ibs except AttributeError: ibs = qreq_.depc.controller import vtool as vt cm_list = qreq_.execute() #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False) qx2_cm = cm_list qaids = qreq_.qaids #qaids2 = [cm.qaid for cm in cm_list] qnids = ibs.get_annot_name_rowids(qaids) import utool with utool.embed_on_exception_context: unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids)) unique_qnids, groupxs = vt.group_indices(qnids) cm_group_list = ut.apply_grouping(cm_list, groupxs) qnid2_aggnamescores = {} qnx2_nameres_info = [] #import utool #utool.embed() # Ranked list aggregation-ish nameres_info_list = [] for qnid, cm_group in zip(unique_qnids, cm_group_list): nid2_name_score_group = [ dict([(nid, cm.name_score_list[nidx]) for nid, nidx in cm.nid2_nidx.items()]) for cm in cm_group ] aligned_name_scores = np.array([ ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf) for nid2_name_score in nid2_name_score_group ]).T name_score_list = np.nanmax(aligned_name_scores, axis=1) qnid2_aggnamescores[qnid] = name_score_list # sort sortx = name_score_list.argsort()[::-1] sorted_namescores = name_score_list[sortx] sorted_dnids = unique_dnids[sortx] ## infer agg name results is_positive = sorted_dnids == qnid is_negative = np.logical_and(~is_positive, sorted_dnids > 0) gt_name_rank = None if not np.any(is_positive) else np.where(is_positive)[0][0] gf_name_rank = None if not np.any(is_negative) else np.nonzero(is_negative)[0][0] gt_nid = sorted_dnids[gt_name_rank] gf_nid = sorted_dnids[gf_name_rank] gt_name_score = sorted_namescores[gt_name_rank] gf_name_score = sorted_namescores[gf_name_rank] qnx2_nameres_info = {} qnx2_nameres_info['qnid'] = qnid qnx2_nameres_info['gt_nid'] = gt_nid qnx2_nameres_info['gf_nid'] = gf_nid qnx2_nameres_info['gt_name_rank'] = gt_name_rank qnx2_nameres_info['gf_name_rank'] = gf_name_rank qnx2_nameres_info['gt_name_score'] = gt_name_score qnx2_nameres_info['gf_name_score'] = gf_name_score nameres_info_list.append(qnx2_nameres_info) nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_') qaids = qreq_.qaids daids = qreq_.daids qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids) # Get the groundtruth ranks and accuracy measures qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm] cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_') #for key in qx2_qresinfo[0].keys(): # 'qx2_' + key # ut.get_list_column(qx2_qresinfo, key) if False: qx2_avepercision = np.array( [cm.get_average_percision(ibs=ibs, gt_aids=gt_aids) for (cm, gt_aids) in zip(qx2_cm, qx2_gtaids)]) cfgres_info['qx2_avepercision'] = qx2_avepercision # Compute mAP score # TODO: use mAP score # (Actually map score doesn't make much sense if using name scoring #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean() # NOQA cfgres_info['qx2_bestranks'] = ut.replace_nones(cfgres_info['qx2_bestranks'] , -1) cfgres_info.update(nameres_info) return cfgres_info
def draw_feat_scoresep(testres, f=None, disttype=None): r""" SeeAlso: ibeis.algo.hots.scorenorm.train_featscore_normalizer CommandLine: python -m ibeis --tf TestResult.draw_feat_scoresep --show python -m ibeis --tf TestResult.draw_feat_scoresep --show -t default:sv_on=[True,False] python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift,fg python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=True python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=False python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift -t best:SV=False utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=1:2 utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=0:1 utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=False,bar_l2_on=True --fsvx=0:1 # We want to query the oxford annots taged query # and we want the database to contain # K correct images per query, as well as the distractors python -m ibeis --tf TestResult.draw_feat_scoresep --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=ok python -m ibeis --tf TestResult.draw_feat_scoresep --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=good python -m ibeis --tf get_annotcfg_list --db PZ_Master1 -a timectrl --acfginfo --verbtd --veryverbtd --nocache-aid python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=ratio Example: >>> # SCRIPT >>> from ibeis.expt.test_result import * # NOQA >>> from ibeis.init import main_helpers >>> disttype = ut.get_argval('--disttype', type_=list, default=None) >>> ibs, testres = main_helpers.testdata_expts( >>> defaultdb='PZ_MTEST', a=['timectrl'], t=['best']) >>> f = ut.get_argval(('--filt', '-f'), type_=list, default=['']) >>> testres.draw_feat_scoresep(f=f) >>> ut.show_if_requested() """ print('[testres] draw_feat_scoresep') import plottool as pt def load_feat_scores(qreq_, qaids): import ibeis # NOQA from os.path import dirname, join # NOQA # HACKY CACHE cfgstr = qreq_.get_cfgstr(with_input=True) cache_dir = join(dirname(dirname(ibeis.__file__)), 'TMP_FEATSCORE_CACHE') namemode = ut.get_argval('--namemode', default=True) fsvx = ut.get_argval('--fsvx', type_='fuzzy_subset', default=slice(None, None, None)) threshx = ut.get_argval('--threshx', type_=int, default=None) thresh = ut.get_argval('--thresh', type_=float, default=.9) num = ut.get_argval('--num', type_=int, default=1) cfg_components = [ cfgstr, disttype, namemode, fsvx, threshx, thresh, f, num ] cache_cfgstr = ','.join(ut.lmap(six.text_type, cfg_components)) cache_hashid = ut.hashstr27(cache_cfgstr + '_v1') cache_name = ('get_cfgx_feat_scores_' + cache_hashid) @ut.cached_func(cache_name, cache_dir=cache_dir, key_argx=[], use_cache=True) def get_cfgx_feat_scores(qreq_, qaids): from ibeis.algo.hots import scorenorm cm_list = qreq_.execute(qaids) # print('Done loading cached chipmatches') tup = scorenorm.get_training_featscores(qreq_, cm_list, disttype, namemode, fsvx, threshx, thresh, num=num) # print(ut.depth_profile(tup)) tp_scores, tn_scores, scorecfg = tup return tp_scores, tn_scores, scorecfg tp_scores, tn_scores, scorecfg = get_cfgx_feat_scores(qreq_, qaids) return tp_scores, tn_scores, scorecfg valid_case_pos = testres.case_sample2(filt_cfg=f, return_mask=False) cfgx2_valid_qxs = ut.group_items(valid_case_pos.T[0], valid_case_pos.T[1]) test_qaids = testres.get_test_qaids() cfgx2_valid_qaids = ut.map_dict_vals(ut.partial(ut.take, test_qaids), cfgx2_valid_qxs) join_acfgs = True # TODO: option to average over pipeline configurations if join_acfgs: groupxs = testres.get_cfgx_groupxs() else: groupxs = list(zip(range(len(testres.cfgx2_qreq_)))) grouped_qreqs = ut.apply_grouping(testres.cfgx2_qreq_, groupxs) grouped_scores = [] for cfgxs, qreq_group in zip(groupxs, grouped_qreqs): # testres.print_pcfg_info() score_group = [] for cfgx, qreq_ in zip(cfgxs, testres.cfgx2_qreq_): print('Loading cached chipmatches') qaids = cfgx2_valid_qaids[cfgx] tp_scores, tn_scores, scorecfg = load_feat_scores(qreq_, qaids) score_group.append((tp_scores, tn_scores, scorecfg)) grouped_scores.append(score_group) cfgx2_shortlbl = testres.get_short_cfglbls(join_acfgs=join_acfgs) for score_group, lbl in zip(grouped_scores, cfgx2_shortlbl): tp_scores = np.hstack(ut.take_column(score_group, 0)) tn_scores = np.hstack(ut.take_column(score_group, 1)) scorecfg = '+++'.join(ut.unique(ut.take_column(score_group, 2))) score_group # TODO: learn this score normalizer as a model # encoder = vt.ScoreNormalizer(adjust=4, monotonize=False) encoder = vt.ScoreNormalizer(adjust=2, monotonize=True) encoder.fit_partitioned(tp_scores, tn_scores, verbose=False) figtitle = 'Feature Scores: %s, %s' % (scorecfg, lbl) fnum = None vizkw = {} sephack = ut.get_argflag('--sephack') if not sephack: vizkw['target_tpr'] = .95 vizkw['score_range'] = (0, 1.0) encoder.visualize( figtitle=figtitle, fnum=fnum, with_scores=False, #with_prebayes=True, with_prebayes=False, with_roc=True, with_postbayes=False, #with_postbayes=True, **vizkw) icon = testres.ibs.get_database_icon() if icon is not None: pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0)) if ut.get_argflag('--contextadjust'): pt.adjust_subplots(left=.1, bottom=.25, wspace=.2, hspace=.2) pt.adjust_subplots(use_argv=True) return encoder