Esempio n. 1
0
    def _make_anygroup_hashes(annots, nids):
        """ helper function

            import wbia
            qreq_ = wbia.testdata_qreq_(
                defaultdb='PZ_MTEST',
                qaid_override=[1, 2, 3, 4, 5, 6, 10, 11],
                daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24],
                )

            import wbia
            qreq_ = wbia.testdata_qreq_(defaultdb='PZ_Master1')
            %timeit qreq_._make_namegroup_data_hashes()
            %timeit qreq_._make_namegroup_data_uuids()

        """
        # make sure items are sorted to ensure same assignment
        # gives same uuids
        # annots = qreq_.ibs.annots(sorted(qreq_.daids))
        unique_nids, groupxs = vt.group_indices(nids)
        grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs)
        group_hashes = [
            ut.combine_hashes(sorted(u.bytes for u in uuids), hasher=hashlib.sha1())
            for uuids in grouped_visual_uuids
        ]
        nid_to_grouphash = dict(zip(unique_nids, group_hashes))
        return nid_to_grouphash
Esempio n. 2
0
def get_match_results(depc, qaid_list, daid_list, score_list, config):
    """ converts table results into format for ipython notebook """
    #qaid_list, daid_list = request.get_parent_rowids()
    #score_list = request.score_list
    #config = request.config

    unique_qaids, groupxs = ut.group_indices(qaid_list)
    #grouped_qaids_list = ut.apply_grouping(qaid_list, groupxs)
    grouped_daids = ut.apply_grouping(daid_list, groupxs)
    grouped_scores = ut.apply_grouping(score_list, groupxs)

    ibs = depc.controller
    unique_qnids = ibs.get_annot_nids(unique_qaids)
    # FIXME: decision should not be part of the config for the one-vs-one
    # scores
    decision_func = getattr(np, config['decision'])
    _iter = zip(unique_qaids, unique_qnids, grouped_daids, grouped_scores)
    for qaid, qnid, daids, scores in _iter:
        dnids = ibs.get_annot_nids(daids)

        # Remove distance to self
        annot_scores = np.array(scores)
        daid_list_ = np.array(daids)
        dnid_list_ = np.array(dnids)

        is_valid = (daid_list_ != qaid)
        daid_list_ = daid_list_.compress(is_valid)
        dnid_list_ = dnid_list_.compress(is_valid)
        annot_scores = annot_scores.compress(is_valid)

        # Hacked in version of creating an annot match object
        match_result = ibeis.AnnotMatch()
        match_result.qaid = qaid
        match_result.qnid = qnid
        match_result.daid_list = daid_list_
        match_result.dnid_list = dnid_list_
        match_result._update_daid_index()
        match_result._update_unique_nid_index()

        grouped_annot_scores = vt.apply_grouping(annot_scores,
                                                 match_result.name_groupxs)
        name_scores = np.array(
            [decision_func(dists) for dists in grouped_annot_scores])
        match_result.set_cannonical_name_score(annot_scores, name_scores)
        yield match_result
Esempio n. 3
0
def get_match_results(depc, qaid_list, daid_list, score_list, config):
    """ converts table results into format for ipython notebook """
    #qaid_list, daid_list = request.get_parent_rowids()
    #score_list = request.score_list
    #config = request.config

    unique_qaids, groupxs = ut.group_indices(qaid_list)
    #grouped_qaids_list = ut.apply_grouping(qaid_list, groupxs)
    grouped_daids = ut.apply_grouping(daid_list, groupxs)
    grouped_scores = ut.apply_grouping(score_list, groupxs)

    ibs = depc.controller
    unique_qnids = ibs.get_annot_nids(unique_qaids)
    # FIXME: decision should not be part of the config for the one-vs-one
    # scores
    decision_func = getattr(np, config['decision'])
    _iter = zip(unique_qaids, unique_qnids, grouped_daids, grouped_scores)
    for qaid, qnid, daids, scores in _iter:
        dnids = ibs.get_annot_nids(daids)

        # Remove distance to self
        annot_scores = np.array(scores)
        daid_list_ = np.array(daids)
        dnid_list_ = np.array(dnids)

        is_valid = (daid_list_ != qaid)
        daid_list_ = daid_list_.compress(is_valid)
        dnid_list_ = dnid_list_.compress(is_valid)
        annot_scores = annot_scores.compress(is_valid)

        # Hacked in version of creating an annot match object
        match_result = ibeis.AnnotMatch()
        match_result.qaid = qaid
        match_result.qnid = qnid
        match_result.daid_list = daid_list_
        match_result.dnid_list = dnid_list_
        match_result._update_daid_index()
        match_result._update_unique_nid_index()

        grouped_annot_scores = vt.apply_grouping(annot_scores, match_result.name_groupxs)
        name_scores = np.array([decision_func(dists) for dists in grouped_annot_scores])
        match_result.set_cannonical_name_score(annot_scores, name_scores)
        yield match_result
Esempio n. 4
0
    def consolodate_duplicates(self):
        fnames = map(basename, self.rel_fpath_list)
        duplicate_map = ut.find_duplicate_items(fnames)
        groups = []
        for dupname, idxs in duplicate_map.items():
            uuids = self.get_prop('uuids', idxs)
            unique_uuids, groupxs = ut.group_indices(uuids)
            groups.extend(ut.apply_grouping(idxs, groupxs))
        multitons = [g for g in groups if len(g) > 1]
        # singletons = [g for g in groups if len(g) <= 1]

        ut.unflat_take(list(self.fpaths()), multitons)
Esempio n. 5
0
    def consolodate_duplicates(self):
        fnames = map(basename, self.rel_fpath_list)
        duplicate_map = ut.find_duplicate_items(fnames)
        groups = []
        for dupname, idxs in duplicate_map.items():
            uuids = self.get_prop('uuids', idxs)
            unique_uuids, groupxs = ut.group_indices(uuids)
            groups.extend(ut.apply_grouping(idxs, groupxs))
        multitons  = [g for g in groups if len(g) > 1]
        # singletons = [g for g in groups if len(g) <= 1]

        ut.unflat_take(list(self.fpaths()), multitons)
Esempio n. 6
0
    def as_parts(self):
        if self.parts is not None:
            return self.parts
        text = self.as_text()
        top, header, mid, bot = split_tabular(text)
        colfmt = self._rectify_colfmt()
        if colfmt is not None:
            top = '\\begin{tabular}{%s}' % (colfmt, )

        if self.theadify:
            import textwrap

            width = self.theadify
            wrapper = textwrap.TextWrapper(width=width, break_long_words=False)

            header_lines = header.split('\n')
            new_lines = []
            for line in header_lines:
                line = line.rstrip('\\')
                headers = [h.strip() for h in line.split('&')]
                headers = ['\\\\'.join(wrapper.wrap(h)) for h in headers]
                headers = [
                    h if h == '{}' else '\\thead{' + h + '}' for h in headers
                ]
                line = ' & '.join(headers) + '\\\\'
                new_lines.append(line)
            new_header = '\n'.join(new_lines)
            header = new_header
        if True:
            groupxs = self.groupxs
            # Put midlines between multi index levels
            if groupxs is None and isinstance(self._data, pd.DataFrame):
                index = self._data.index
                if len(index.names) == 2 and len(mid) == 1:
                    groupxs = ut.group_indices(index.labels[0])[1]
                    # part = '\n\multirow{%d}{*}{%s}\n' % (len(chunk), key,)
                    # part += '\n'.join(['& ' + c for c in chunk])
            if groupxs is not None:
                bodylines = mid[0].split('\n')
                mid = ut.apply_grouping(bodylines, groupxs)
        parts = (top, header, mid, bot)
        return parts
Esempio n. 7
0
def stratified_label_shuffle_split(y, labels, fractions, y_idx=None, rng=None):
    """
    modified from sklearn to make n splits instaed of 2.
    Also enforces that labels are not broken into separate groups.

    Args:
        y (ndarray):  labels
        labels (?):
        fractions (?):
        rng (RandomState):  random number generator(default = None)

    Returns:
        ?: index_sets

    CommandLine:
        python -m ibeis_cnn.dataset stratified_label_shuffle_split --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.dataset import *  # NOQA
        >>> y      = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7]
        >>> fractions = [.7, .3]
        >>> rng = np.random.RandomState(0)
        >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng)
    """
    rng = ut.ensure_rng(rng)
    #orig_y = y
    unique_labels, groupxs = ut.group_indices(labels)
    grouped_ys = ut.apply_grouping(y, groupxs)
    # Assign each group a probabilistic class
    unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys]
    # TODO: should weight the following selection based on size of group
    #class_weights = [ut.dict_hist(ys) for ys in grouped_ys]

    unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng)
    index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs]
    if y_idx is not None:
        # These indicies subindex into parent set of indicies
        index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets]
    return index_sets
Esempio n. 8
0
    def _make_test_folds(self, X, y=None, groups=None):
        """
        Args:
            self (?):
            X (ndarray):  data
            y (ndarray):  labels(default = None)
            groups (None): (default = None)

        Returns:
            ?: test_folds

        CommandLine:
            python -m ibeis.algo.verif.sklearn_utils _make_test_folds

        Example:
            >>> # DISABLE_DOCTEST
            >>> from ibeis.algo.verif.sklearn_utils import *  # NOQA
            >>> import utool as ut
            >>> rng = ut.ensure_rng(0)
            >>> groups = [1, 1, 3, 4, 2, 2, 7, 8, 8]
            >>> y      = [1, 1, 1, 1, 2, 2, 2, 3, 3]
            >>> X = np.empty((len(y), 0))
            >>> self = StratifiedGroupKFold(random_state=rng)
            >>> skf_list = list(self.split(X=X, y=y, groups=groups))
        """
        # if self.shuffle:
        #     rng = check_random_state(self.random_state)
        # else:
        #     rng = self.random_state
        n_splits = self.n_splits
        y = np.asarray(y)
        n_samples = y.shape[0]

        import utool as ut

        # y_counts = bincount(y_inversed)
        # min_classes_ = np.min(y_counts)
        # if np.all(self.n_splits > y_counts):
        #     raise ValueError("All the n_groups for individual classes"
        #                      " are less than n_splits=%d."
        #                      % (self.n_splits))
        # if self.n_splits > min_classes_:
        #     warnings.warn(("The least populated class in y has only %d"
        #                    " members, which is too few. The minimum"
        #                    " number of groups for any class cannot"
        #                    " be less than n_splits=%d."
        #                    % (min_classes_, self.n_splits)), Warning)

        unique_y, y_inversed = np.unique(y, return_inverse=True)
        n_classes = max(unique_y) + 1
        unique_groups, group_idxs = ut.group_indices(groups)
        # grouped_ids = list(grouping.keys())
        grouped_y = ut.apply_grouping(y, group_idxs)
        grouped_y_counts = np.array([
            bincount(y_, minlength=n_classes) for y_ in grouped_y])

        target_freq = grouped_y_counts.sum(axis=0)
        target_ratio = target_freq / target_freq.sum()

        # Greedilly choose the split assignment that minimizes the local
        # * squared differences in target from actual frequencies
        # * and best equalizes the number of items per fold
        # Distribute groups with most members first
        split_freq = np.zeros((n_splits, n_classes))
        # split_ratios = split_freq / split_freq.sum(axis=1)
        split_ratios = np.ones(split_freq.shape) / split_freq.shape[1]
        split_diffs = ((split_freq - target_ratio) ** 2).sum(axis=1)
        sortx = np.argsort(grouped_y_counts.sum(axis=1))[::-1]
        grouped_splitx = []
        for count, group_idx in enumerate(sortx):
            # print('---------\n')
            group_freq = grouped_y_counts[group_idx]
            cand_freq = split_freq + group_freq
            cand_ratio = cand_freq / cand_freq.sum(axis=1)[:, None]
            cand_diffs = ((cand_ratio - target_ratio) ** 2).sum(axis=1)
            # Compute loss
            losses = []
            # others = np.nan_to_num(split_diffs)
            other_diffs = np.array([
                sum(split_diffs[x + 1:]) + sum(split_diffs[:x])
                for x in range(n_splits)
            ])
            # penalize unbalanced splits
            ratio_loss = other_diffs + cand_diffs
            # penalize heavy splits
            freq_loss = split_freq.sum(axis=1)
            freq_loss = freq_loss / freq_loss.sum()
            losses = ratio_loss + freq_loss
            # print('group_freq = %r' % (group_freq,))
            # print('freq_loss = %s' % (ut.repr2(freq_loss, precision=2),))
            # print('ratio_loss = %s' % (ut.repr2(ratio_loss, precision=2),))
            #-------
            splitx = np.argmin(losses)
            # print('losses = %r, splitx=%r' % (losses, splitx))
            split_freq[splitx] = cand_freq[splitx]
            split_ratios[splitx] = cand_ratio[splitx]
            split_diffs[splitx] = cand_diffs[splitx]
            grouped_splitx.append(splitx)

            # if count > 4:
            #     break
            # else:
            #     print('split_freq = \n' +
            #           ut.repr2(split_freq, precision=2, suppress_small=True))
            #     print('target_ratio = \n' +
            #           ut.repr2(target_ratio, precision=2, suppress_small=True))
            #     print('split_ratios = \n' +
            #           ut.repr2(split_ratios, precision=2, suppress_small=True))
            #     print(ut.dict_hist(grouped_splitx))

        # final_ratio_loss = ((split_ratios - target_ratio) ** 2).sum(axis=1)
        # print('split_freq = \n' +
        #       ut.repr2(split_freq, precision=3, suppress_small=True))
        # print('target_ratio = \n' +
        #       ut.repr2(target_ratio, precision=3, suppress_small=True))
        # print('split_ratios = \n' +
        #       ut.repr2(split_ratios, precision=3, suppress_small=True))
        # print(ut.dict_hist(grouped_splitx))

        test_folds = np.empty(n_samples, dtype=np.int)
        for group_idx, splitx in zip(sortx, grouped_splitx):
            idxs = group_idxs[group_idx]
            test_folds[idxs] = splitx

        return test_folds
Esempio n. 9
0
def stratified_kfold_label_split(y, labels, n_folds=2, y_idx=None, rng=None):
    """
    Also enforces that labels are not broken into separate groups.

    Args:
        y (ndarray):  labels
        labels (?):
        y_idx (array): indexes associated with y if it was already presampled
        rng (RandomState):  random number generator(default = None)

    Returns:
        ?: index_sets

    CommandLine:
        python -m ibeis_cnn.dataset stratified_label_shuffle_split --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.dataset import *  # NOQA
        >>> y      = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7]
        >>> fractions = [.7, .3]
        >>> rng = np.random.RandomState(0)
        >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng)
    """

    rng = ut.ensure_rng(rng)
    #orig_y = y
    unique_labels, groupxs = ut.group_indices(labels)
    grouped_ys = ut.apply_grouping(y, groupxs)
    # Assign each group a probabilistic class
    unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys]
    # TODO: should weight the following selection based on size of group
    #class_weights = [ut.dict_hist(ys) for ys in grouped_ys]

    import sklearn.cross_validation
    xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng)
    skf = sklearn.cross_validation.StratifiedKFold(unique_ys, **xvalkw)
    _iter = skf

    folded_index_sets = []

    for label_idx_set in _iter:
        index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs)))
                      for idxs in label_idx_set]
        folded_index_sets.append(index_sets)

    for train_idx, test_idx in folded_index_sets:
        train_labels = set(ut.take(labels, train_idx))
        test_labels = set(ut.take(labels, test_idx))
        assert len(test_labels.intersection(train_labels)) == 0, 'same labels appeared in both train and test'
        pass

    if y_idx is not None:
        # These indicies subindex into parent set of indicies
        folded_index_sets2 = []
        for index_sets in folded_index_sets:
            index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets]
            folded_index_sets2.append(index_sets)
        folded_index_sets = folded_index_sets2
    #import sklearn.model_selection
    #skf = sklearn.model_selection.StratifiedKFold(**xvalkw)
    #_iter = skf.split(X=np.empty(len(target)), y=target)

    #unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng)
    #index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs]
    #if idx is not None:
    #    # These indicies subindex into parent set of indicies
    #    index_sets = [np.take(idx, idxs, axis=0) for idxs in index_sets]
    return folded_index_sets
Esempio n. 10
0
def merge_level_order(level_orders, topsort):
    """
    Merge orders of individual subtrees into a total ordering for
    computation.

    >>> level_orders = {
    >>>     'multi_chip_multitest': [['dummy_annot'], ['chip'], ['multitest'],
    >>>         ['multitest_score'], ],
    >>>     'multi_fgweight_multitest': [ ['dummy_annot'], ['chip', 'probchip'],
    >>>         ['keypoint'], ['fgweight'], ['multitest'], ['multitest_score'], ],
    >>>     'multi_keypoint_nnindexer': [ ['dummy_annot'], ['chip'], ['keypoint'],
    >>>         ['nnindexer'], ['multitest'], ['multitest_score'], ],
    >>>     'normal': [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'],
    >>>         ['fgweight'], ['spam'], ['multitest'], ['multitest_score'], ],
    >>>     'nwise_notch_multitest_1': [ ['dummy_annot'], ['notch'], ['multitest'],
    >>>         ['multitest_score'], ],
    >>>     'nwise_notch_multitest_2': [ ['dummy_annot'], ['notch'], ['multitest'],
    >>>         ['multitest_score'], ],
    >>>     'nwise_notch_notchpair_1': [ ['dummy_annot'], ['notch'], ['notchpair'],
    >>>         ['multitest'], ['multitest_score'], ],
    >>>     'nwise_notch_notchpair_2': [ ['dummy_annot'], ['notch'], ['notchpair'],
    >>>         ['multitest'], ['multitest_score'], ],
    >>> }
    >>> topsort = [u'dummy_annot', u'notch', u'probchip', u'chip', u'keypoint',
    >>>            u'fgweight', u'nnindexer', u'spam', u'notchpair', u'multitest',
    >>>            u'multitest_score']
    >>> print(ut.repr3(ut.merge_level_order(level_orders, topsort)))

    EG2:
        level_orders = {u'normal': [[u'dummy_annot'], [u'chip', u'probchip'], [u'keypoint'], [u'fgweight'], [u'spam']]}
        topsort = [u'dummy_annot', u'probchip', u'chip', u'keypoint', u'fgweight', u'spam']
    """

    import utool as ut
    if False:
        compute_order = []
        level_orders = ut.map_dict_vals(ut.total_flatten, level_orders)
        level_sets = ut.map_dict_vals(set, level_orders)
        for tablekey in topsort:
            compute_order.append((tablekey, [groupkey for groupkey, set_ in level_sets.items() if tablekey in set_]))
        return compute_order
    else:
        # Do on common subgraph
        import itertools
        # Pointer to current level.: Start at the end and
        # then work your way up.
        main_ptr = len(topsort) - 1
        stack = []
        #from six.moves import zip_longest
        keys = list(level_orders.keys())
        type_to_ptr = {key: -1 for key in keys}
        print('level_orders = %s' % (ut.repr3(level_orders),))
        for count in itertools.count(0):
            print('----')
            print('count = %r' % (count,))
            ptred_levels = []
            for key in keys:
                levels = level_orders[key]
                ptr = type_to_ptr[key]
                try:
                    level = tuple(levels[ptr])
                except IndexError:
                    level = None
                ptred_levels.append(level)
            print('ptred_levels = %r' % (ptred_levels,))
            print('main_ptr = %r' % (main_ptr,))
            # groupkeys, groupxs = ut.group_indices(ptred_levels)
            # Group keys are tablenames
            # They point to the (type) of the input
            # num_levelkeys = len(ut.total_flatten(ptred_levels))
            groupkeys, groupxs = ut.group_indices(ptred_levels)
            main_idx = None
            while main_idx is None and main_ptr >= 0:
                target = topsort[main_ptr]
                print('main_ptr = %r' % (main_ptr,))
                print('target = %r' % (target,))
                # main_idx = ut.listfind(groupkeys, (target,))
                # if main_idx is None:
                possible_idxs = [idx for idx, keytup in enumerate(groupkeys) if keytup is not None and target in keytup]
                if len(possible_idxs) == 1:
                    main_idx = possible_idxs[0]
                else:
                    main_idx = None
                if main_idx is None:
                    main_ptr -= 1
            if main_idx is None:
                print('break I')
                break
            found_groups = ut.apply_grouping(keys, groupxs)[main_idx]
            print('found_groups = %r' % (found_groups,))
            stack.append((target, found_groups))
            for k in found_groups:
                type_to_ptr[k] -= 1

            if len(found_groups) == len(keys):
                main_ptr -= 1
                if main_ptr < 0:
                    print('break E')
                    break
        print('stack = %s' % (ut.repr3(stack),))
        print('have = %r' % (sorted(ut.take_column(stack, 0)),))
        print('need = %s' % (sorted(ut.total_flatten(level_orders.values())),))
        compute_order = stack[::-1]

    return compute_order
Esempio n. 11
0
def get_injured_sharks():
    """
    >>> from wbia.scripts.getshark import *  # NOQA
    """
    import requests

    url = 'http://www.whaleshark.org/getKeywordImages.jsp'
    resp = requests.get(url)
    assert resp.status_code == 200
    keywords = resp.json()['keywords']
    key_list = ut.take_column(keywords, 'indexName')
    key_to_nice = {k['indexName']: k['readableName'] for k in keywords}

    injury_patterns = [
        'injury',
        'net',
        'hook',
        'trunc',
        'damage',
        'scar',
        'nicks',
        'bite',
    ]

    injury_keys = [
        key for key in key_list if any([pat in key for pat in injury_patterns])
    ]
    noninjury_keys = ut.setdiff(key_list, injury_keys)
    injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys)  # NOQA
    noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys)  # NOQA
    key_list = injury_keys

    keyed_images = {}
    for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
        key_url = url + '?indexName={indexName}'.format(indexName=key)
        key_resp = requests.get(key_url)
        assert key_resp.status_code == 200
        key_imgs = key_resp.json()['images']
        keyed_images[key] = key_imgs

    key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
    key_hist = ut.sort_dict(key_hist, 'vals')
    logger.info(ut.repr3(key_hist))
    nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
    nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
    logger.info(ut.repr3(nice_key_hist))

    key_to_urls = {
        key: ut.take_column(vals, 'url')
        for key, vals in keyed_images.items()
    }
    overlaps = {}
    import itertools

    overlap_img_list = []
    for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
        overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
        num_overlap = len(overlap_imgs)
        overlaps[(k1, k2)] = num_overlap
        overlaps[(k1, k1)] = len(key_to_urls[k1])
        if num_overlap > 0:
            # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
            overlap_img_list.extend(overlap_imgs)

    all_img_urls = list(set(ut.flatten(key_to_urls.values())))
    num_all = len(all_img_urls)  # NOQA
    logger.info('num_all = %r' % (num_all, ))

    # Determine super-categories
    categories = ['nicks', 'scar', 'trunc']

    # Force these keys into these categories
    key_to_cat = {'scarbite': 'other_injury'}

    cat_to_keys = ut.ddict(list)

    for key in key_to_urls.keys():
        flag = 1
        if key in key_to_cat:
            cat = key_to_cat[key]
            cat_to_keys[cat].append(key)
            continue
        for cat in categories:
            if cat in key:
                cat_to_keys[cat].append(key)
                flag = 0
        if flag:
            cat = 'other_injury'
            cat_to_keys[cat].append(key)

    cat_urls = ut.ddict(list)
    for cat, keys in cat_to_keys.items():
        for key in keys:
            cat_urls[cat].extend(key_to_urls[key])

    cat_hist = {}
    for cat in list(cat_urls.keys()):
        cat_urls[cat] = list(set(cat_urls[cat]))
        cat_hist[cat] = len(cat_urls[cat])

    logger.info(ut.repr3(cat_to_keys))
    logger.info(ut.repr3(cat_hist))

    key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items()
                       for val in vals])

    # ingestset = {
    #    '__class__': 'ImageSet',
    #    'images': ut.ddict(dict)
    # }
    # for key, key_imgs in keyed_images.items():
    #    for imgdict in key_imgs:
    #        url = imgdict['url']
    #        encid = imgdict['correspondingEncounterNumber']
    #        # Make structure
    #        encdict = encounters[encid]
    #        encdict['__class__'] = 'Encounter'
    #        imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
    #        imgdict['__class__'] = 'Image'
    #        cat = key_to_cat[key]
    #        annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
    #        annotdict['__class__'] = 'Annotation'

    #        # Ensure structures exist
    #        encdict['images'] = encdict.get('images', [])
    #        imgdict['annots'] = imgdict.get('annots', [])

    #        # Add an image to this encounter
    #        encdict['images'].append(imgdict)
    #        # Add an annotation to this image
    #        imgdict['annots'].append(annotdict)

    # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
    # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
    # resp = requests.get(get_enc_url)
    # logger.info(ut.repr3(encdict))
    # logger.info(ut.repr3(encounters))

    # Download the files to the local disk
    # fpath_list =

    all_urls = ut.unique(
        ut.take_column(
            ut.flatten(
                ut.dict_subset(keyed_images,
                               ut.flatten(cat_to_keys.values())).values()),
            'url',
        ))

    dldir = ut.truepath('~/tmpsharks')
    from os.path import commonprefix, basename  # NOQA

    prefix = commonprefix(all_urls)
    suffix_list = [url_[len(prefix):] for url_ in all_urls]
    fname_list = [suffix.replace('/', '--') for suffix in suffix_list]

    fpath_list = []
    for url, fname in ut.ProgIter(zip(all_urls, fname_list),
                                  lbl='downloading imgs',
                                  freq=1):
        fpath = ut.grab_file_url(url,
                                 download_dir=dldir,
                                 fname=fname,
                                 verbose=False)
        fpath_list.append(fpath)

    # Make sure we keep orig info
    # url_to_keys = ut.ddict(list)
    url_to_info = ut.ddict(dict)
    for key, imgdict_list in keyed_images.items():
        for imgdict in imgdict_list:
            url = imgdict['url']
            info = url_to_info[url]
            for k, v in imgdict.items():
                info[k] = info.get(k, [])
                info[k].append(v)
            info['keys'] = info.get('keys', [])
            info['keys'].append(key)
            # url_to_keys[url].append(key)

    info_list = ut.take(url_to_info, all_urls)
    for info in info_list:
        if len(set(info['correspondingEncounterNumber'])) > 1:
            assert False, 'url with two different encounter nums'
    # Combine duplicate tags

    hashid_list = [
        ut.get_file_uuid(fpath_, stride=8)
        for fpath_ in ut.ProgIter(fpath_list, bs=True)
    ]
    groupxs = ut.group_indices(hashid_list)[1]

    # Group properties by duplicate images
    # groupxs = [g for g in groupxs if len(g) > 1]
    fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
    url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
    info_list_ = [
        ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
        for info_ in ut.apply_grouping(info_list, groupxs)
    ]

    encid_list_ = [
        ut.unique(info_['correspondingEncounterNumber'])[0]
        for info_ in info_list_
    ]
    keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
    cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]

    clist = ut.ColumnLists({
        'gpath': fpath_list_,
        'url': url_list_,
        'encid': encid_list_,
        'key': keys_list_,
        'cat': cats_list_,
    })

    # for info_ in ut.apply_grouping(info_list, groupxs):
    #    info = ut.dict_accum(*info_)
    #    info = ut.map_dict_vals(ut.flatten, info)
    #    x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
    #    if len(x) > 1:
    #        info = info.copy()
    #        del info['keys']
    #        logger.info(ut.repr3(info))

    flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
    clist = clist.compress(flags)

    import wbia

    ibs = wbia.opendb('WS_Injury', allow_newdir=True)

    gid_list = ibs.add_images(clist['gpath'])
    clist['gid'] = gid_list

    failed_flags = ut.flag_None_items(clist['gid'])
    logger.info('# failed %s' % (sum(failed_flags), ))
    passed_flags = ut.not_list(failed_flags)
    clist = clist.compress(passed_flags)
    ut.assert_all_not_None(clist['gid'])
    # ibs.get_image_uris_original(clist['gid'])
    ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)

    # ut.zipflat(clist['cat'], clist['key'])
    if False:
        # Can run detection instead
        clist['tags'] = ut.zipflat(clist['cat'])
        aid_list = ibs.use_images_as_annotations(clist['gid'],
                                                 adjust_percent=0.01,
                                                 tags_list=clist['tags'])
        aid_list

    import wbia.plottool as pt
    from wbia import core_annots

    pt.qt4ensure()
    # annots = ibs.annots()
    # aids = [1, 2]
    # ibs.depc_annot.get('hog', aids , 'hog')
    # ibs.depc_annot.get('chip', aids, 'img')
    for aid in ut.InteractiveIter(ibs.get_valid_aids()):
        hogs = ibs.depc_annot.d.get_hog_hog([aid])
        chips = ibs.depc_annot.d.get_chips_img([aid])
        chip = chips[0]
        hogimg = core_annots.make_hog_block_image(hogs[0])
        pt.clf()
        pt.imshow(hogimg, pnum=(1, 2, 1))
        pt.imshow(chip, pnum=(1, 2, 2))
        fig = pt.gcf()
        fig.show()
        fig.canvas.draw()

    # logger.info(len(groupxs))

    # if False:
    # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
    # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
    #    # FIX
    #    for fpath, fname in zip(fpath_list, fname_list):
    #        if ut.checkpath(fpath):
    #            ut.move(fpath, join(dirname(fpath), fname))
    #            logger.info('fpath = %r' % (fpath,))

    # import wbia
    # from wbia.dbio import ingest_dataset
    # dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
    # self = ingest_dataset.Ingestable2(dbdir)

    if False:
        # Show overlap matrix
        import wbia.plottool as pt
        import pandas as pd
        import numpy as np

        dict_ = overlaps
        s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
        df = s.unstack()
        lhs, rhs = df.align(df.T)
        df = lhs.add(rhs, fill_value=0).fillna(0)

        label_texts = df.columns.values

        def label_ticks(label_texts):
            import wbia.plottool as pt

            truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
            ax = pt.gca()
            ax.set_xticks(list(range(len(label_texts))))
            ax.set_xticklabels(truncated_labels)
            [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
            [
                lbl.set_horizontalalignment('left')
                for lbl in ax.get_xticklabels()
            ]

            # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
            # pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
            ax.set_yticks(list(range(len(label_texts))))
            ax.set_yticklabels(truncated_labels)
            [
                lbl.set_horizontalalignment('right')
                for lbl in ax.get_yticklabels()
            ]
            [
                lbl.set_verticalalignment('center')
                for lbl in ax.get_yticklabels()
            ]
            # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]

        # df = df.sort(axis=0)
        # df = df.sort(axis=1)

        sortx = np.argsort(df.sum(axis=1).values)[::-1]
        df = df.take(sortx, axis=0)
        df = df.take(sortx, axis=1)

        fig = pt.figure(fnum=1)
        fig.clf()
        mat = df.values.astype(np.int32)
        mat[np.diag_indices(len(mat))] = 0
        vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
        import matplotlib.colors

        norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
        pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
        pt.plt.colorbar()
        pt.plt.grid('off')
        label_ticks(label_texts)
        fig.tight_layout()

    # overlap_df = pd.DataFrame.from_dict(overlap_img_list)

    class TmpImage(ut.NiceRepr):
        pass

    from skimage.feature import hog
    from skimage import data, color, exposure
    import wbia.plottool as pt

    image2 = color.rgb2gray(data.astronaut())  # NOQA

    fpath = './GOPR1120.JPG'

    import vtool as vt

    for fpath in [fpath]:
        """
        http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
        """

        image = vt.imread(fpath, grayscale=True)
        image = pt.color_funcs.to_base01(image)

        fig = pt.figure(fnum=2)
        fd, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualise=True,
        )

        fig, (ax1, ax2) = pt.plt.subplots(1,
                                          2,
                                          figsize=(8, 4),
                                          sharex=True,
                                          sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=pt.plt.cm.gray)
        ax1.set_title('Input image')
        ax1.set_adjustable('box-forced')

        # Rescale histogram for better display
        hog_image_rescaled = exposure.rescale_intensity(hog_image,
                                                        in_range=(0, 0.02))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        ax1.set_adjustable('box-forced')
        pt.plt.show()
Esempio n. 12
0
def get_query_result_info(qreq_):
    """
    Helper function.

    Runs queries of a specific configuration returns the best rank of each query

    Args:
        qaids (list) : query annotation ids
        daids (list) : database annotation ids

    Returns:
        qx2_bestranks

    CommandLine:
        python -m ibeis.expt.harness --test-get_query_result_info
        python -m ibeis.expt.harness --test-get_query_result_info:0
        python -m ibeis.expt.harness --test-get_query_result_info:1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5'])
        >>> #ibs = ibeis.opendb('PZ_MTEST')
        >>> #qaids = ibs.get_valid_aids()[0:3]
        >>> #daids = ibs.get_valid_aids()[0:5]
        >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> #cfgdict = dict(codename='vsone')
        >>> # ibs.cfg.query_cfg.codename = 'vsone'
        >>> qaids = ibs.get_valid_aids()[0:3]
        >>> daids = ibs.get_valid_aids()[0:5]
        >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Ignore:

        ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big

        ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW
        --show --debug-depc
        ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc
        --clear-all-depcache
    """
    try:
        ibs = qreq_.ibs
    except AttributeError:
        ibs = qreq_.depc.controller
    import vtool as vt
    cm_list = qreq_.execute()
    #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False)
    qx2_cm = cm_list
    qaids = qreq_.qaids
    #qaids2 = [cm.qaid for cm in cm_list]
    qnids = ibs.get_annot_name_rowids(qaids)

    import utool
    with utool.embed_on_exception_context:
        unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids))

        unique_qnids, groupxs = vt.group_indices(qnids)
        cm_group_list = ut.apply_grouping(cm_list, groupxs)
        qnid2_aggnamescores = {}

    qnx2_nameres_info = []

    #import utool
    #utool.embed()

    # Ranked list aggregation-ish
    nameres_info_list = []
    for qnid, cm_group in zip(unique_qnids, cm_group_list):
        nid2_name_score_group = [
            dict([(nid, cm.name_score_list[nidx])
                  for nid, nidx in cm.nid2_nidx.items()]) for cm in cm_group
        ]
        aligned_name_scores = np.array([
            ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf)
            for nid2_name_score in nid2_name_score_group
        ]).T
        name_score_list = np.nanmax(aligned_name_scores, axis=1)
        qnid2_aggnamescores[qnid] = name_score_list
        # sort
        sortx = name_score_list.argsort()[::-1]
        sorted_namescores = name_score_list[sortx]
        sorted_dnids = unique_dnids[sortx]

        ## infer agg name results
        is_positive = sorted_dnids == qnid
        is_negative = np.logical_and(~is_positive, sorted_dnids > 0)
        gt_name_rank = None if not np.any(is_positive) else np.where(
            is_positive)[0][0]
        gf_name_rank = None if not np.any(is_negative) else np.nonzero(
            is_negative)[0][0]
        gt_nid = sorted_dnids[gt_name_rank]
        gf_nid = sorted_dnids[gf_name_rank]
        gt_name_score = sorted_namescores[gt_name_rank]
        gf_name_score = sorted_namescores[gf_name_rank]
        qnx2_nameres_info = {}
        qnx2_nameres_info['qnid'] = qnid
        qnx2_nameres_info['gt_nid'] = gt_nid
        qnx2_nameres_info['gf_nid'] = gf_nid
        qnx2_nameres_info['gt_name_rank'] = gt_name_rank
        qnx2_nameres_info['gf_name_rank'] = gf_name_rank
        qnx2_nameres_info['gt_name_score'] = gt_name_score
        qnx2_nameres_info['gf_name_score'] = gf_name_score

        nameres_info_list.append(qnx2_nameres_info)
        nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_')

    qaids = qreq_.qaids
    daids = qreq_.daids
    qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids)
    # Get the groundtruth ranks and accuracy measures
    qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm]

    cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_')
    #for key in qx2_qresinfo[0].keys():
    #    'qx2_' + key
    #    ut.get_list_column(qx2_qresinfo, key)

    if False:
        qx2_avepercision = np.array([
            cm.get_average_percision(ibs=ibs, gt_aids=gt_aids)
            for (cm, gt_aids) in zip(qx2_cm, qx2_gtaids)
        ])
        cfgres_info['qx2_avepercision'] = qx2_avepercision
    # Compute mAP score  # TODO: use mAP score
    # (Actually map score doesn't make much sense if using name scoring
    #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean()  # NOQA
    cfgres_info['qx2_bestranks'] = ut.replace_nones(
        cfgres_info['qx2_bestranks'], -1)
    cfgres_info.update(nameres_info)
    return cfgres_info
Esempio n. 13
0
def get_query_result_info(qreq_):
    """
    Helper function.

    Runs queries of a specific configuration returns the best rank of each query

    Args:
        qaids (list) : query annotation ids
        daids (list) : database annotation ids

    Returns:
        qx2_bestranks

    CommandLine:
        python -m ibeis.expt.harness --test-get_query_result_info
        python -m ibeis.expt.harness --test-get_query_result_info:0
        python -m ibeis.expt.harness --test-get_query_result_info:1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5'])
        >>> #ibs = ibeis.opendb('PZ_MTEST')
        >>> #qaids = ibs.get_valid_aids()[0:3]
        >>> #daids = ibs.get_valid_aids()[0:5]
        >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> #cfgdict = dict(codename='vsone')
        >>> # ibs.cfg.query_cfg.codename = 'vsone'
        >>> qaids = ibs.get_valid_aids()[0:3]
        >>> daids = ibs.get_valid_aids()[0:5]
        >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Ignore:

        ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big

        ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW
        --show --debug-depc
        ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc
        --clear-all-depcache
    """
    try:
        ibs = qreq_.ibs
    except AttributeError:
        ibs = qreq_.depc.controller
    import vtool as vt
    cm_list = qreq_.execute()
    #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False)
    qx2_cm = cm_list
    qaids = qreq_.qaids
    #qaids2 = [cm.qaid for cm in cm_list]
    qnids = ibs.get_annot_name_rowids(qaids)

    import utool
    with utool.embed_on_exception_context:
        unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids))

        unique_qnids, groupxs = vt.group_indices(qnids)
        cm_group_list = ut.apply_grouping(cm_list, groupxs)
        qnid2_aggnamescores = {}

    qnx2_nameres_info = []

    #import utool
    #utool.embed()

    # Ranked list aggregation-ish
    nameres_info_list = []
    for qnid, cm_group in zip(unique_qnids, cm_group_list):
        nid2_name_score_group = [
            dict([(nid, cm.name_score_list[nidx]) for nid, nidx in cm.nid2_nidx.items()])
            for cm in cm_group
        ]
        aligned_name_scores = np.array([
            ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf)
            for nid2_name_score in nid2_name_score_group
        ]).T
        name_score_list = np.nanmax(aligned_name_scores, axis=1)
        qnid2_aggnamescores[qnid] = name_score_list
        # sort
        sortx = name_score_list.argsort()[::-1]
        sorted_namescores = name_score_list[sortx]
        sorted_dnids = unique_dnids[sortx]

        ## infer agg name results
        is_positive = sorted_dnids == qnid
        is_negative = np.logical_and(~is_positive, sorted_dnids > 0)
        gt_name_rank = None if not np.any(is_positive) else np.where(is_positive)[0][0]
        gf_name_rank = None if not np.any(is_negative) else np.nonzero(is_negative)[0][0]
        gt_nid = sorted_dnids[gt_name_rank]
        gf_nid = sorted_dnids[gf_name_rank]
        gt_name_score = sorted_namescores[gt_name_rank]
        gf_name_score = sorted_namescores[gf_name_rank]
        qnx2_nameres_info = {}
        qnx2_nameres_info['qnid'] = qnid
        qnx2_nameres_info['gt_nid'] = gt_nid
        qnx2_nameres_info['gf_nid'] = gf_nid
        qnx2_nameres_info['gt_name_rank'] = gt_name_rank
        qnx2_nameres_info['gf_name_rank'] = gf_name_rank
        qnx2_nameres_info['gt_name_score'] = gt_name_score
        qnx2_nameres_info['gf_name_score'] = gf_name_score

        nameres_info_list.append(qnx2_nameres_info)
        nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_')

    qaids = qreq_.qaids
    daids = qreq_.daids
    qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids)
    # Get the groundtruth ranks and accuracy measures
    qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm]

    cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_')
    #for key in qx2_qresinfo[0].keys():
    #    'qx2_' + key
    #    ut.get_list_column(qx2_qresinfo, key)

    if False:
        qx2_avepercision = np.array(
            [cm.get_average_percision(ibs=ibs, gt_aids=gt_aids) for
             (cm, gt_aids) in zip(qx2_cm, qx2_gtaids)])
        cfgres_info['qx2_avepercision'] = qx2_avepercision
    # Compute mAP score  # TODO: use mAP score
    # (Actually map score doesn't make much sense if using name scoring
    #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean()  # NOQA
    cfgres_info['qx2_bestranks'] = ut.replace_nones(cfgres_info['qx2_bestranks'] , -1)
    cfgres_info.update(nameres_info)
    return cfgres_info
Esempio n. 14
0
def draw_feat_scoresep(testres, f=None, disttype=None):
    r"""
    SeeAlso:
        ibeis.algo.hots.scorenorm.train_featscore_normalizer

    CommandLine:
        python -m ibeis --tf TestResult.draw_feat_scoresep --show
        python -m ibeis --tf TestResult.draw_feat_scoresep --show -t default:sv_on=[True,False]
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift,fg
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=True
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=False

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift -t best:SV=False

        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1
        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=1:2
        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=0:1

        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=False,bar_l2_on=True  --fsvx=0:1

        # We want to query the oxford annots taged query
        # and we want the database to contain
        # K correct images per query, as well as the distractors

        python -m ibeis --tf TestResult.draw_feat_scoresep  --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=ok
        python -m ibeis --tf TestResult.draw_feat_scoresep  --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=good

        python -m ibeis --tf get_annotcfg_list  --db PZ_Master1 -a timectrl --acfginfo --verbtd  --veryverbtd --nocache-aid

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=ratio

    Example:
        >>> # SCRIPT
        >>> from ibeis.expt.test_result import *  # NOQA
        >>> from ibeis.init import main_helpers
        >>> disttype = ut.get_argval('--disttype', type_=list, default=None)
        >>> ibs, testres = main_helpers.testdata_expts(
        >>>     defaultdb='PZ_MTEST', a=['timectrl'], t=['best'])
        >>> f = ut.get_argval(('--filt', '-f'), type_=list, default=[''])
        >>> testres.draw_feat_scoresep(f=f)
        >>> ut.show_if_requested()
    """
    print('[testres] draw_feat_scoresep')
    import plottool as pt

    def load_feat_scores(qreq_, qaids):
        import ibeis  # NOQA
        from os.path import dirname, join  # NOQA
        # HACKY CACHE
        cfgstr = qreq_.get_cfgstr(with_input=True)
        cache_dir = join(dirname(dirname(ibeis.__file__)),
                         'TMP_FEATSCORE_CACHE')
        namemode = ut.get_argval('--namemode', default=True)
        fsvx = ut.get_argval('--fsvx',
                             type_='fuzzy_subset',
                             default=slice(None, None, None))
        threshx = ut.get_argval('--threshx', type_=int, default=None)
        thresh = ut.get_argval('--thresh', type_=float, default=.9)
        num = ut.get_argval('--num', type_=int, default=1)
        cfg_components = [
            cfgstr, disttype, namemode, fsvx, threshx, thresh, f, num
        ]
        cache_cfgstr = ','.join(ut.lmap(six.text_type, cfg_components))
        cache_hashid = ut.hashstr27(cache_cfgstr + '_v1')
        cache_name = ('get_cfgx_feat_scores_' + cache_hashid)

        @ut.cached_func(cache_name,
                        cache_dir=cache_dir,
                        key_argx=[],
                        use_cache=True)
        def get_cfgx_feat_scores(qreq_, qaids):
            from ibeis.algo.hots import scorenorm
            cm_list = qreq_.execute(qaids)
            # print('Done loading cached chipmatches')
            tup = scorenorm.get_training_featscores(qreq_,
                                                    cm_list,
                                                    disttype,
                                                    namemode,
                                                    fsvx,
                                                    threshx,
                                                    thresh,
                                                    num=num)
            # print(ut.depth_profile(tup))
            tp_scores, tn_scores, scorecfg = tup
            return tp_scores, tn_scores, scorecfg

        tp_scores, tn_scores, scorecfg = get_cfgx_feat_scores(qreq_, qaids)
        return tp_scores, tn_scores, scorecfg

    valid_case_pos = testres.case_sample2(filt_cfg=f, return_mask=False)
    cfgx2_valid_qxs = ut.group_items(valid_case_pos.T[0], valid_case_pos.T[1])
    test_qaids = testres.get_test_qaids()
    cfgx2_valid_qaids = ut.map_dict_vals(ut.partial(ut.take, test_qaids),
                                         cfgx2_valid_qxs)

    join_acfgs = True

    # TODO: option to average over pipeline configurations
    if join_acfgs:
        groupxs = testres.get_cfgx_groupxs()
    else:
        groupxs = list(zip(range(len(testres.cfgx2_qreq_))))
    grouped_qreqs = ut.apply_grouping(testres.cfgx2_qreq_, groupxs)

    grouped_scores = []
    for cfgxs, qreq_group in zip(groupxs, grouped_qreqs):
        # testres.print_pcfg_info()
        score_group = []
        for cfgx, qreq_ in zip(cfgxs, testres.cfgx2_qreq_):
            print('Loading cached chipmatches')
            qaids = cfgx2_valid_qaids[cfgx]
            tp_scores, tn_scores, scorecfg = load_feat_scores(qreq_, qaids)
            score_group.append((tp_scores, tn_scores, scorecfg))
        grouped_scores.append(score_group)

    cfgx2_shortlbl = testres.get_short_cfglbls(join_acfgs=join_acfgs)
    for score_group, lbl in zip(grouped_scores, cfgx2_shortlbl):
        tp_scores = np.hstack(ut.take_column(score_group, 0))
        tn_scores = np.hstack(ut.take_column(score_group, 1))
        scorecfg = '+++'.join(ut.unique(ut.take_column(score_group, 2)))
        score_group
        # TODO: learn this score normalizer as a model
        # encoder = vt.ScoreNormalizer(adjust=4, monotonize=False)
        encoder = vt.ScoreNormalizer(adjust=2, monotonize=True)
        encoder.fit_partitioned(tp_scores, tn_scores, verbose=False)
        figtitle = 'Feature Scores: %s, %s' % (scorecfg, lbl)
        fnum = None

        vizkw = {}
        sephack = ut.get_argflag('--sephack')
        if not sephack:
            vizkw['target_tpr'] = .95
            vizkw['score_range'] = (0, 1.0)

        encoder.visualize(
            figtitle=figtitle,
            fnum=fnum,
            with_scores=False,
            #with_prebayes=True,
            with_prebayes=False,
            with_roc=True,
            with_postbayes=False,
            #with_postbayes=True,
            **vizkw)
        icon = testres.ibs.get_database_icon()
        if icon is not None:
            pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0))

        if ut.get_argflag('--contextadjust'):
            pt.adjust_subplots(left=.1, bottom=.25, wspace=.2, hspace=.2)
            pt.adjust_subplots(use_argv=True)
    return encoder