Example #1
0
    def get_patches(inva, wx, ibs, verbose=True):
        """
        Loads the patches assigned to a particular word in this stack

        >>> inva.wx_to_aids = inva.compute_inverted_list()
        >>> verbose=True
        """
        config = inva.config
        aid_list = inva.wx_to_aids[wx]
        X_list = [inva.get_annot(aid) for aid in aid_list]
        fxs_groups = [X.fxs(wx) for X in X_list]
        all_kpts_list = ibs.depc.d.get_feat_kpts(aid_list, config=config)
        sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0)
        total_patches = sum(ut.lmap(len, fxs_groups))

        chip_list = ibs.depc_annot.d.get_chips_img(aid_list, config=config)
        # convert to approprate colorspace
        #if colorspace is not None:
        #    chip_list = vt.convert_image_list_colorspace(chip_list, colorspace)
        # ut.print_object_size(chip_list, 'chip_list')

        patch_size = 64
        shape = (total_patches, patch_size, patch_size, 3)
        _prog = ut.ProgPartial(enabled=verbose, lbl='warping patches', bs=True)
        _patchiter = ut.iflatten([
            vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0]
            #vt.get_warped_patches(chip, kpts, patch_size=patch_size, use_cpp=True)[0]
            for chip, kpts in _prog(zip(chip_list, sub_kpts_list),
                                    length=len(aid_list))
        ])
        word_patches = vt.fromiter_nd(_patchiter, shape, dtype=np.uint8)
        return word_patches
Example #2
0
def ensure_names_are_connected(graph, aids_list):
    aug_graph = graph.copy().to_undirected()
    orig_edges = aug_graph.edges()
    unflat_edges = [list(itertools.product(aids, aids)) for aids in aids_list]
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    new_edges = ut.setdiff_ordered(aid_pairs, aug_graph.edges())

    preweighted_edges = nx.get_edge_attributes(aug_graph, 'weight')
    if preweighted_edges:
        orig_edges = ut.setdiff(orig_edges, list(preweighted_edges.keys()))

    aug_graph.add_edges_from(new_edges)
    # Ensure the largest possible set of original edges is in the MST
    nx.set_edge_attributes(aug_graph,
                           name='weight',
                           values=dict([(edge, 1.0) for edge in new_edges]))
    nx.set_edge_attributes(aug_graph,
                           name='weight',
                           values=dict([(edge, 0.1) for edge in orig_edges]))
    for cc_sub_graph in nx.connected_component_subgraphs(aug_graph):
        mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph)
        for edge in mst_sub_graph.edges():
            redge = edge[::-1]
            if not (graph.has_edge(*edge) or graph.has_edge(*redge)):
                graph.add_edge(*redge, attr_dict={})
Example #3
0
def get_name_rowid_edges_from_nids(ibs, nids):
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)
    return aids1, aids2
Example #4
0
def get_name_rowid_edges_from_nids(ibs, nids):
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)
    return aids1, aids2
Example #5
0
def get_name_rowid_edges_from_aids2(ibs, aids_list):
    # grouped version
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    #if full:
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)
    return aids1, aids2
Example #6
0
    def compute_word_weights(inva, method='idf'):
        """
        Compute a per-word weight like idf

        Example:
            >>> # xdoctest: +REQUIRES(--slow)
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.smk.inverted_index import *  # NOQA
            >>> qreq_, inva = testdata_inva()
            >>> wx_to_weight = inva.compute_word_weights()
            >>> print('wx_to_weight = %r' % (wx_to_weight,))
        """
        wx_list = sorted(inva.wx_to_aids.keys())
        with ut.Timer('Computing %s weights' % (method, )):
            if method == 'idf':
                ndocs_total = len(inva.aids)
                # Unweighted documents
                ndocs_per_word = np.array(
                    [len(set(inva.wx_to_aids[wx])) for wx in wx_list])
                weight_per_word = smk_funcs.inv_doc_freq(
                    ndocs_total, ndocs_per_word)
            elif method == 'idf-maw':
                # idf denom (the num of docs containing a word for each word)
                # The max(maws) denote the prob that this word indexes an annot
                ndocs_total = len(inva.aids)
                # Weighted documents
                wx_to_ndocs = {wx: 0.0 for wx in wx_list}
                for wx, maws in zip(ut.iflatten(inva.wx_lists),
                                    ut.iflatten(inva.maws_lists)):
                    wx_to_ndocs[wx] += min(1.0, max(maws))
                ndocs_per_word = ut.take(wx_to_ndocs, wx_list)
                weight_per_word = smk_funcs.inv_doc_freq(
                    ndocs_total, ndocs_per_word)
            elif method == 'uniform':
                weight_per_word = np.ones(len(wx_list))
            wx_to_weight = dict(zip(wx_list, weight_per_word))
            wx_to_weight = ut.DefaultValueDict(0, wx_to_weight)
        return wx_to_weight
Example #7
0
def translate_all():
    """ Translates a all python paths in directory """
    dpaths = utool.ls_moduledirs('.')
    #print('[cyth] translate_all: %r' % (dpaths,))

    globkw = {'recursive': True, 'with_dirs': False, 'with_files': True}
    # Find all unique python files in directory
    fpaths_iter = [
        utool.glob(utool.unixpath(dpath), '*.py', **globkw) for dpath in dpaths
    ]
    fpath_iter = utool.iflatten(fpaths_iter)
    abspath_iter = map(utool.unixpath, fpath_iter)
    fpath_list = list(set(list(abspath_iter)))
    #print('[cyth] translate_all: %s' % ('\n'.join(fpath_list),))
    # Try to translate each
    translate(*fpath_list)
Example #8
0
def translate_all():
    """ Translates a all python paths in directory """
    dpaths = utool.ls_moduledirs('.')
    #print('[cyth] translate_all: %r' % (dpaths,))

    globkw = {
        'recursive': True,
        'with_dirs': False,
        'with_files': True
    }
    # Find all unique python files in directory
    fpaths_iter = [utool.glob(utool.unixpath(dpath), '*.py', **globkw)
                   for dpath in dpaths]
    fpath_iter = utool.iflatten(fpaths_iter)
    abspath_iter = map(utool.unixpath, fpath_iter)
    fpath_list = list(set(list(abspath_iter)))
    #print('[cyth] translate_all: %s' % ('\n'.join(fpath_list),))
    # Try to translate each
    translate(*fpath_list)
Example #9
0
def ensure_names_are_connected(graph, aids_list):
    aug_graph = graph.copy().to_undirected()
    orig_edges = aug_graph.edges()
    unflat_edges = [list(itertools.product(aids, aids)) for aids in aids_list]
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    new_edges = ut.setdiff_ordered(aid_pairs, aug_graph.edges())

    preweighted_edges = nx.get_edge_attributes(aug_graph, 'weight')
    if preweighted_edges:
        orig_edges = ut.setdiff(orig_edges, list(preweighted_edges.keys()))

    aug_graph.add_edges_from(new_edges)
    # Ensure the largest possible set of original edges is in the MST
    nx.set_edge_attributes(aug_graph, 'weight', dict([(edge, 1.0) for edge in new_edges]))
    nx.set_edge_attributes(aug_graph, 'weight', dict([(edge, 0.1) for edge in orig_edges]))
    for cc_sub_graph in nx.connected_component_subgraphs(aug_graph):
        mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph)
        for edge in mst_sub_graph.edges():
            redge = edge[::-1]
            if not (graph.has_edge(*edge) or graph.has_edge(*redge)):
                graph.add_edge(*redge, attr_dict={})
Example #10
0
def setup_pzmtest_subgraph():
    import ibeis
    ibs = ibeis.opendb(db='PZ_MTEST')
    nids = ibs.get_valid_nids()
    aids_list = ibs.get_name_aids(nids)

    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    rng = np.random.RandomState(0)
    flags = rng.rand(len(aids1)) > .878
    aids1 = ut.compress(aids1, flags)
    aids2 = ut.compress(aids2, flags)

    for aid1, aid2 in zip(aids1, aids2):
        ibs.set_annot_pair_as_positive_match(aid1, aid2)
        ibs.set_annot_pair_as_positive_match(aid2, aid1)

    rowids = ibs._get_all_annotmatch_rowids()
    aids1 = ibs.get_annotmatch_aid1(rowids)
    aids2 = ibs.get_annotmatch_aid2(rowids)
Example #11
0
def setup_pzmtest_subgraph():
    import ibeis
    ibs = ibeis.opendb(db='PZ_MTEST')
    nids = ibs.get_valid_nids()
    aids_list = ibs.get_name_aids(nids)

    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    rng = np.random.RandomState(0)
    flags = rng.rand(len(aids1)) > .878
    aids1 = ut.compress(aids1, flags)
    aids2 = ut.compress(aids2, flags)

    for aid1, aid2 in zip(aids1, aids2):
        ibs.set_annot_pair_as_positive_match(aid1, aid2)
        ibs.set_annot_pair_as_positive_match(aid2, aid1)

    rowids = ibs._get_all_annotmatch_rowids()
    aids1 = ibs.get_annotmatch_aid1(rowids)
    aids2 = ibs.get_annotmatch_aid2(rowids)
Example #12
0
def build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list):
    """ helper
    these list comprehensions replace the prevous for loop
    they still need to be optimized a little bit (and made clearer)
    can probably unnest the list comprehensions as well
    """
    """
    IGNORE
    Legacy::
        def old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list):
            fm_nestlist_ = []
            fs_nestlist_ = []
            daid_nestlist_ = []
            for scores, qfxs, dfxs, daids in zip(sparse_list, qfxs_list, dfxs_list, daids_list):
                for rx, cx, score in zip(scores.row, scores.col, scores.data):
                    _fm = tuple(product(qfxs[rx], dfxs[cx]))
                    _fs = [score / len(_fm)] * len(_fm)
                    _daid = [daids[cx]] * len(_fm)
                    fm_nestlist_.append(_fm)
                    fs_nestlist_.append(_fs)
                    daid_nestlist_.append(_daid)
            return fm_nestlist_, fs_nestlist_, daid_nestlist_

        oldtup_ = old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)
        fm_nestlist_, fs_nestlist_, daid_nestlist_ = oldtup_
        newtup_ = build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)
        fm_nestlist, fs_nestlist, daid_nestlist = newtup_

        assert fm_nestlist == fm_nestlist_
        assert fs_nestlist == fs_nestlist_
        assert daid_nestlist == daid_nestlist_

        47ms
        %timeit build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)

        59ms
        %timeit old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)
    IGNORE
    """
    # FIXME: rewrite double comprehension as a flat comprehension

    # Build nested feature matches (a single match might have many members)
    fm_nestlist = [
        tuple(product(qfxs[rx], dfxs[cx]))
        for scores, qfxs, dfxs in zip(sparse_list, qfxs_list, dfxs_list)
        for rx, cx in zip(scores.row, scores.col)
    ]
    nFm_list = [len(fm) for fm in fm_nestlist]
    #fs_unsplit = (score
    #              for scores in sparse_list
    #              for score in scores.data)
    #daid_unsplit = (daids[cx]
    #                for scores, daids in zip(sparse_list, daids_list)
    #                for cx in scores.col)
    # Build nested feature scores
    fs_unsplit = utool.iflatten((scores.data for scores in sparse_list))
    # Build nested feature matches (a single match might have many members)
    daid_unsplit = utool.iflatten(
        (daids.take(scores.col)
         for scores, daids in zip(sparse_list, daids_list)))
    # Expand feature scores and daids splitting scores amongst match members
    fs_nestlist = [[score / nFm] * nFm
                   for score, nFm in zip(fs_unsplit, nFm_list)]
    daid_nestlist = [[daid] * nFm for daid, nFm in zip(daid_unsplit, nFm_list)]

    if DEBUG_SMK:
        assert len(fm_nestlist) == len(fs_nestlist), 'inconsistent len'
        assert len(fm_nestlist) == len(nFm_list), 'inconsistent len'
        assert len(daid_nestlist) == len(fs_nestlist), 'inconsistent len'
        min_ = min(2, len(nFm_list))
        max_ = min(15, len(nFm_list))
        print('nFm_list[_min:_max]      = ' +
              utool.list_str(nFm_list[min_:max_]))
        print('fm_nestlist[_min:_max]   = ' +
              utool.list_str(fm_nestlist[min_:max_]))
        print('fs_nestlist[_min:_max]   = ' +
              utool.list_str(fs_nestlist[min_:max_]))
        print('daid_nestlist[_min:_max] = ' +
              utool.list_str(daid_nestlist[min_:max_]))
        for fm_, fs_, daid_ in zip(fm_nestlist, fs_nestlist, daid_nestlist):
            assert len(fm_) == len(fs_), 'inconsistent len'
            assert len(fm_) == len(daid_), 'inconsistent len'
        print('[smk_core] checked build_chipmatch correspondence ...ok')
    return fm_nestlist, fs_nestlist, daid_nestlist
Example #13
0
def invertible_stack(vecs_list, label_list):
    """
    Stacks descriptors into a flat structure and returns inverse mapping from
    flat database descriptor indexes (dx) to annotation ids (label) and feature
    indexes (fx). Feature indexes are w.r.t. annotation indexes.

    Output:
        idx2_desc - flat descriptor stack
        idx2_label  - inverted index into annotations
        idx2_fx   - inverted index into features

    # Example with 2D Descriptors

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.nearest_neighbors import *  # NOQA
        >>> DESC_TYPE = np.uint8
        >>> label_list  = [1, 2, 3, 4, 5]
        >>> vecs_list = [
        ...     np.array([[0, 0], [0, 1]], dtype=DESC_TYPE),
        ...     np.array([[5, 3], [2, 30], [1, 1]], dtype=DESC_TYPE),
        ...     np.empty((0, 2), dtype=DESC_TYPE),
        ...     np.array([[5, 3], [2, 30], [1, 1]], dtype=DESC_TYPE),
        ...     np.array([[3, 3], [42, 42], [2, 6]], dtype=DESC_TYPE),
        ...     ]
        >>> idx2_vec, idx2_label, idx2_fx = invertible_stack(vecs_list, label_list)
        >>> print(repr(idx2_vec.T))
        array([[ 0,  0,  5,  2,  1,  5,  2,  1,  3, 42,  2],
               [ 0,  1,  3, 30,  1,  3, 30,  1,  3, 42,  6]], dtype=uint8)
        >>> print(repr(idx2_label))
        array([1, 1, 2, 2, 2, 4, 4, 4, 5, 5, 5])
        >>> print(repr(idx2_fx))
        array([0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2])
    """
    # INFER DTYPE? dtype = vecs_list[0].dtype
    # Build inverted index of (label, fx) pairs
    nFeats = sum(list(map(len, vecs_list)))
    nFeat_iter = map(len, vecs_list)
    label_nFeat_iter = zip(label_list, map(len, vecs_list))
    # generate featx inverted index for each feature in each annotation
    _ax2_fx = [list(range(nFeat)) for nFeat in nFeat_iter]
    # generate label inverted index for each feature in each annotation
    '''
    # this is not a real test the code just happened to be here. syntax is good though
    #-ifdef CYTH_TEST_SWAP
    _ax2_label = [[label] * nFeat for (label, nFeat) in label_nFeat_iter]
    #-else
    '''
    _ax2_label = [[label] * nFeat for (label, nFeat) in label_nFeat_iter]
    # endif is optional. the end of the functionscope counts as an #endif
    '#-endif'
    # Flatten generators into the inverted index
    _flatlabels = utool.iflatten(_ax2_label)
    _flatfeatxs = utool.iflatten(_ax2_fx)

    idx2_label = np.fromiter(_flatlabels, np.int32, nFeats)
    idx2_fx = np.fromiter(_flatfeatxs, np.int32, nFeats)
    # Stack vecsriptors into numpy array corresponding to inverted inexed
    # This might throw a MemoryError
    idx2_vec = np.vstack(vecs_list)
    '#pragma cyth_returntup'
    return idx2_vec, idx2_label, idx2_fx
Example #14
0
def get_annotmatch_rowids_between(ibs, aids1, aids2, method=None):
    """

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.annotmatch_funcs import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb('PZ_MTEST')
        >>> aids1 = aids2 = [1, 2, 3, 4, 5, 6]
        >>> rowids_between = ibs.get_annotmatch_rowids_between
        >>> ams1 = sorted(rowids_between(aids1, aids2, method=1))
        >>> ams2 = sorted(rowids_between(aids1, aids2, method=2))
        >>> assert len(ub.find_duplicates(ams1)) == 0
        >>> assert len(ub.find_duplicates(ams2)) == 0
        >>> assert sorted(ams2) == sorted(ams1)
    """
    if method is None:
        if len(aids1) * len(aids2) > 5000:
            method = 1
        else:
            method = 2
    if method == 1:
        # Strategy 1: get all existing rows and see what intersects
        # This is better when the enumerated set of rows would be larger than
        # the database size
        unflat_rowids1L = ibs.get_annotmatch_rowids_from_aid1(aids1)
        unflat_rowids1R = ibs.get_annotmatch_rowids_from_aid2(aids1)
        unflat_rowids2L = ibs.get_annotmatch_rowids_from_aid1(aids2)
        unflat_rowids2R = ibs.get_annotmatch_rowids_from_aid2(aids2)

        am_rowids1L = {
            r
            for r in ut.iflatten(unflat_rowids1L) if r is not None
        }
        am_rowids1R = {
            r
            for r in ut.iflatten(unflat_rowids1R) if r is not None
        }
        am_rowids2L = {
            r
            for r in ut.iflatten(unflat_rowids2L) if r is not None
        }
        am_rowids2R = {
            r
            for r in ut.iflatten(unflat_rowids2R) if r is not None
        }

        ams12 = am_rowids1L.intersection(am_rowids2R)
        ams21 = am_rowids2L.intersection(am_rowids1R)
        ams = sorted(ams12.union(ams21))
        # ams = sorted(am_rowids1.intersection(am_rowids2))
        # rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list)
        # unflat_rowids1 = ibs.get_annotmatch_rowids_from_aid(aids1)
        # unflat_rowids2 = ibs.get_annotmatch_rowids_from_aid(aids2)
        # am_rowids1 = {r for r in ut.iflatten(unflat_rowids1) if r is not None}
        # am_rowids2 = {r for r in ut.iflatten(unflat_rowids2) if r is not None}
        # ams = sorted(am_rowids1.intersection(am_rowids2))
        # ams = ut.isect(am_rowids1, am_rowids2)
    elif method == 2:
        # Strategy 2: enumerate what rows could exist and see what does exist
        # This is better when the enumerated set of rows would be smaller than
        # the database size
        edges = list(ut.product_nonsame(aids1, aids2))
        if len(edges) == 0:
            ams = []
        else:
            aids1_, aids2_ = ut.listT(edges)
            # ams = ibs.get_annotmatch_rowid_from_undirected_superkey(aids1_, aids2_)
            ams = ibs.get_annotmatch_rowid_from_superkey(aids1_, aids2_)
            if ams is None:
                ams = []
            ams = ut.filter_Nones(ams)
    return ams
Example #15
0
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs,
                           scores_list, daids_list, query_sccw):
    """
    Builds explicit chipmatches that the rest of the pipeline plays nice with

    Notation:
        An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches,
        feature_scores, and feature_ranks.

        Let N be the number of matches

        A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first
        column corresponds to query_feature_indexes (qfx) and the second column
        corresponds to database_feature_indexes (dfx).

        A feature score, fs{shape=(N,), dtype=float64} is an array of scores

        A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks

    Returns:
        daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk)
        Return Format::
            daid2_fm (dict): {daid: fm, ...}
            daid2_fs (dict): {daid: fs, ...}
            daid2_fk (dict): {daid: fk, ...}

    Example:
        >>> from ibeis.algo.hots.smk.smk_core import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2()
        >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex
        >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha
        >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh
        >>> withinfo = True  # takes an 11s vs 2s
        >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh)
        >>> retL1 =  match_kernel_L1(*args)
        >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,)  = retL1
        >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
        >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
        >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0]))
        >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2]))
        >>> print(utool.is_dicteq(daid2_chipmatch_old[1],  daid2_chipmatch_new[1]))

    %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
    %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
    """
    # FIXME: move groupby to vtool
    if utool.VERBOSE:
        print('[smk_core] build cmtup_old')

    wx2_dfxs  = invindex.wx2_fxs
    daid2_sccw = invindex.daid2_sccw

    qfxs_list = [wx2_qfxs[wx] for wx in common_wxs]
    dfxs_list = [wx2_dfxs[wx] for wx in common_wxs]

    shapes_list  = [scores.shape for scores in scores_list]  # 51us
    shape_ranges = [(mem_arange(w), mem_arange(h)) for (w, h) in shapes_list]  # 230us
    ijs_list = [mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges]  # 278us
    # Normalize scores for words, nMatches, and query sccw (still need daid sccw)
    nscores_iter = (scores * query_sccw for scores in scores_list)

    # FIXME: Preflatten all of these lists
    out_ijs = [
        list(zip(_is.flat, _js.flat))
        for (_is, _js) in ijs_list
    ]
    out_qfxs = [
        [qfxs[ix] for (ix, jx) in ijs]
        for (qfxs, ijs) in zip(qfxs_list, out_ijs)
    ]
    out_dfxs = [
        [dfxs[jx] for (ix, jx) in ijs]
        for (dfxs, ijs) in zip(dfxs_list, out_ijs)
    ]
    out_daids = (
        [daids[jx] for (ix, jx) in ijs]
        for (daids, ijs) in zip(daids_list, out_ijs)
    )
    out_scores = (
        [nscores[ijx] for ijx in ijs]
        for (nscores, ijs) in zip(nscores_iter, out_ijs)
    )
    nested_fm_iter = [
        [
            tuple(product(qfxs_, dfxs_))
            for qfxs_, dfxs_ in zip(qfxs, dfxs)
        ]
        for qfxs, dfxs in zip(out_qfxs, out_dfxs)
    ]
    all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))), dtype=hstypes.FM_DTYPE)
    nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter]
    nested_daid_iter = (
        [
            [daid] * nMatch
            for nMatch, daid in zip(nMatch_list, daids)
        ]
        for nMatch_list, daids in zip(nested_nmatch_list, out_daids)
    )
    nested_score_iter = (
        [
            [score / nMatch] * nMatch
            for nMatch, score in zip(nMatch_list, scores)
        ]
        for nMatch_list, scores in zip(nested_nmatch_list, out_scores)
    )
    all_daids_ = np.array(list(utool.iflatten(utool.iflatten(nested_daid_iter))), dtype=hstypes.INDEX_TYPE)
    all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))), dtype=hstypes.FS_DTYPE)

    # Filter out 0 scores
    keep_xs = np.where(all_fss > 0)[0]
    all_fss = all_fss.take(keep_xs)
    all_fms = all_fms.take(keep_xs, axis=0)
    all_daids_ = all_daids_.take(keep_xs)

    daid_keys, groupxs = clustertool.group_indices(all_daids_)
    fs_list = clustertool.apply_grouping(all_fss, groupxs)
    fm_list = clustertool.apply_grouping(all_fms, groupxs)
    daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)}
    daid2_fs = {daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list)}
    # FIXME: generalize to when nAssign > 1
    daid2_fk = {daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list)}
    daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk)

    return daid2_chipmatch
Example #16
0
 def stagger_group(list_):
     return ut.filter_Nones(ut.iflatten(zip_longest(*list_)))
Example #17
0
aug_graph = graph.copy()

# remove cut edges from augmented graph
edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut')
cut_edges = [
    (u, v)
    for (u, v, d) in aug_graph.edges(data=True)
    if not (d.get('is_cut') or d.get('decision', 'unreviewed') in ['nomatch'])
]
cut_edges = [edge for edge, flag in edge_to_iscut.items() if flag]
aug_graph.remove_edges_from(cut_edges)


# Enumerate cliques inside labels
unflat_edges = [list(ut.itertwo(nodes)) for nodes in label_to_nodes.values()]
node_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]

# Remove candidate MST edges that exist in the original graph
orig_edges = list(aug_graph.edges())
candidate_mst_edges = [edge for edge in node_pairs if not aug_graph.has_edge(*edge)]
# randomness prevents chains and visually looks better
rng = np.random.RandomState(42)


def _randint():
    return 0
    return rng.randint(0, 100)


aug_graph.add_edges_from(candidate_mst_edges)
# Weight edges in aug_graph such that existing edges are chosen
Example #18
0
def build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list):
    """ helper
    these list comprehensions replace the prevous for loop
    they still need to be optimized a little bit (and made clearer)
    can probably unnest the list comprehensions as well
    """

    """
    IGNORE
    Legacy::
        def old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list):
            fm_nestlist_ = []
            fs_nestlist_ = []
            daid_nestlist_ = []
            for scores, qfxs, dfxs, daids in zip(sparse_list, qfxs_list, dfxs_list, daids_list):
                for rx, cx, score in zip(scores.row, scores.col, scores.data):
                    _fm = tuple(product(qfxs[rx], dfxs[cx]))
                    _fs = [score / len(_fm)] * len(_fm)
                    _daid = [daids[cx]] * len(_fm)
                    fm_nestlist_.append(_fm)
                    fs_nestlist_.append(_fs)
                    daid_nestlist_.append(_daid)
            return fm_nestlist_, fs_nestlist_, daid_nestlist_

        oldtup_ = old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)
        fm_nestlist_, fs_nestlist_, daid_nestlist_ = oldtup_
        newtup_ = build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)
        fm_nestlist, fs_nestlist, daid_nestlist = newtup_

        assert fm_nestlist == fm_nestlist_
        assert fs_nestlist == fs_nestlist_
        assert daid_nestlist == daid_nestlist_

        47ms
        %timeit build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)

        59ms
        %timeit old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list)
    IGNORE
    """
    # FIXME: rewrite double comprehension as a flat comprehension

    # Build nested feature matches (a single match might have many members)
    fm_nestlist = [
        tuple(product(qfxs[rx], dfxs[cx]))
        for scores, qfxs, dfxs in zip(sparse_list, qfxs_list, dfxs_list)
        for rx, cx in zip(scores.row, scores.col)
    ]
    nFm_list = [len(fm) for fm in fm_nestlist]
    #fs_unsplit = (score
    #              for scores in sparse_list
    #              for score in scores.data)
    #daid_unsplit = (daids[cx]
    #                for scores, daids in zip(sparse_list, daids_list)
    #                for cx in scores.col)
    # Build nested feature scores
    fs_unsplit = utool.iflatten(
        (scores.data for scores in sparse_list))
    # Build nested feature matches (a single match might have many members)
    daid_unsplit = utool.iflatten(
        (daids.take(scores.col)
         for scores, daids in zip(sparse_list, daids_list)))
    # Expand feature scores and daids splitting scores amongst match members
    fs_nestlist = [
        [score / nFm] * nFm
        for score, nFm in zip(fs_unsplit, nFm_list)
    ]
    daid_nestlist = [
        [daid] * nFm
        for daid, nFm in zip(daid_unsplit, nFm_list)
    ]

    if DEBUG_SMK:
        assert len(fm_nestlist) == len(fs_nestlist), 'inconsistent len'
        assert len(fm_nestlist) == len(nFm_list), 'inconsistent len'
        assert len(daid_nestlist) == len(fs_nestlist), 'inconsistent len'
        min_ = min(2, len(nFm_list))
        max_ = min(15, len(nFm_list))
        print('nFm_list[_min:_max]      = ' + utool.list_str(nFm_list[min_:max_]))
        print('fm_nestlist[_min:_max]   = ' + utool.list_str(fm_nestlist[min_:max_]))
        print('fs_nestlist[_min:_max]   = ' + utool.list_str(fs_nestlist[min_:max_]))
        print('daid_nestlist[_min:_max] = ' + utool.list_str(daid_nestlist[min_:max_]))
        for fm_, fs_, daid_ in zip(fm_nestlist, fs_nestlist, daid_nestlist):
            assert len(fm_) == len(fs_), 'inconsistent len'
            assert len(fm_) == len(daid_), 'inconsistent len'
        print('[smk_core] checked build_chipmatch correspondence ...ok')
    return fm_nestlist, fs_nestlist, daid_nestlist
Example #19
0
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        mark1, end1_ = ut.log_progress(
            '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs),
            freq=100, with_time=WITH_TOTALTIME)

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        end1_()
        mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                        total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME)
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
Example #20
0
def make_netx_graph_from_aid_groups(ibs, aids_list, only_reviewed_matches=True,
                                    invis_edges=None, ensure_edges=None,
                                    temp_nids=None, allow_directed=False):
    r"""
    Args:
        ibs (ibeis.IBEISController): image analysis api
        aids_list (list):

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.viz.viz_graph import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> aids_list = [[1, 2, 3, 4], [5, 6, 7]]
        >>> invis_edges = [(1, 5)]
        >>> only_reviewed_matches = True
        >>> graph = make_netx_graph_from_aid_groups(ibs, aids_list,
        >>>                                         only_reviewed_matches,
        >>>                                         invis_edges)
        >>> list(nx.connected_components(graph.to_undirected()))
    """
    #aids_list, nid_list = ibs.group_annots_by_name(aid_list)
    unique_aids = list(ut.flatten(aids_list))

    # grouped version
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    if only_reviewed_matches:
        annotmatch_rowids = ibs.get_annotmatch_rowid_from_superkey(aids1, aids2)
        annotmatch_rowids = ut.filter_Nones(annotmatch_rowids)
        aids1 = ibs.get_annotmatch_aid1(annotmatch_rowids)
        aids2 = ibs.get_annotmatch_aid2(annotmatch_rowids)

    graph = make_netx_graph_from_aidpairs(ibs, aids1, aids2, unique_aids=unique_aids)

    if ensure_edges is not None:
        if ensure_edges == 'all':
            ensure_edges = list(ut.upper_diag_self_prodx(list(graph.nodes())))
        ensure_edges_ = []
        for edge in ensure_edges:
            edge = tuple(edge)
            redge = tuple(edge[::-1])  # HACK
            if graph.has_edge(*edge):
                ensure_edges_.append(edge)
                pass
                #nx.set_edge_attributes(graph, 'weight', {edge: .001})
            elif (not allow_directed) and graph.has_edge(*redge):
                ensure_edges_.append(redge)
                #nx.set_edge_attributes(graph, 'weight', {redge: .001})
                pass
            else:
                ensure_edges_.append(edge)
                #graph.add_edge(*edge, weight=.001)
                graph.add_edge(*edge)

    if temp_nids is None:
        unique_nids = ibs.get_annot_nids(list(graph.nodes()))
    else:
        # HACK
        unique_nids = [1] * len(list(graph.nodes()))
        #unique_nids = temp_nids

    nx.set_node_attributes(graph, 'nid', dict(zip(graph.nodes(), unique_nids)))

    import plottool as pt
    ensure_names_are_connected(graph, aids_list)

    # Color edges by nid
    color_by_nids(graph, unique_nids=unique_nids)
    if invis_edges:
        for edge in invis_edges:
            if graph.has_edge(*edge):
                nx.set_edge_attributes(graph, 'style', {edge: 'invis'})
                nx.set_edge_attributes(graph, 'invisible', {edge: True})
            else:
                graph.add_edge(*edge, style='invis', invisible=True)

    # Hack color images orange
    if ensure_edges:
        nx.set_edge_attributes(graph, 'color',
                               {tuple(edge): pt.ORANGE for edge in ensure_edges_})

    return graph
Example #21
0
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
Example #22
0
def sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh):
    r"""
    Computes gamma from "To Aggregate or not to aggregate". Every component in
    each list is with repsect to a different word.

    scc = self consistency criterion
    It is a scalar which ensure K(X, X) = 1

    Args:
        rvecs_list (list of ndarrays): residual vectors for every word
        idf_list (list of floats): idf weight for each word
        maws_list (list of ndarrays): multi-assign weights for each word for each residual vector
        smk_alpha (float): selectivity power
        smk_thresh (float): selectivity threshold

    Returns:
        float: sccw self-consistency-criterion weight

    Math:
        \begin{equation}
        \gamma(X) = (\sum_{c \in \C} w_c M(X_c, X_c))^{-.5}
        \end{equation}

    Example:
        >>> from ibeis.algo.hots.smk.smk_scoring import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_scoring
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #idf_list, rvecs_list, maws_list, smk_alpha, smk_thresh, wx2_flags = smk_debug.testdata_sccw_sum(db='testdb1')
        >>> tup = smk_debug.testdata_sccw_sum(db='PZ_MTEST', nWords=128000)
        >>> idf_list, rvecs_list, flags_list, maws_list, smk_alpha, smk_thresh = tup
        >>> sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        >>> print(sccw)
        0.0201041835751

    CommandLine:
        python smk_match.py --db PZ_MOTHERS --nWords 128

    Ignore:
        0.0384477314197
        qmaws_list = dmaws_list = maws_list
        drvecs_list = qrvecs_list = rvecs_list
        dflags_list = qflags_list = flags_list

        flags_list = flags_list[7:10]
        maws_list  = maws_list[7:10]
        idf_list   = idf_list[7:10]
        rvecs_list = rvecs_list[7:10]

    """
    num_rvecs = len(rvecs_list)
    if DEBUG_SMK:
        assert maws_list is None or len(maws_list) == num_rvecs, 'inconsistent lengths'
        assert num_rvecs == len(idf_list), 'inconsistent lengths'
        assert maws_list is None or list(map(len, maws_list)) == list(map(len, rvecs_list)), 'inconsistent per word lengths'
        assert flags_list is None or list(map(len, maws_list)) == list(map(len, flags_list)), 'inconsistent per word lengths'
        assert flags_list is None or len(flags_list) == num_rvecs, 'inconsistent lengths'
    # Indexing with asymetric multi-assignment might get you a non 1 self score?
    # List of scores for every word.
    scores_list = score_matches(rvecs_list, rvecs_list, flags_list, flags_list,
                                maws_list, maws_list, smk_alpha, smk_thresh,
                                idf_list)
    if DEBUG_SMK:
        assert len(scores_list) == num_rvecs, 'bad rvec and score'
        assert len(idf_list) == len(scores_list), 'bad weight and score'
    # Summation over all residual vector scores
    _count = sum((scores.size for scores in  scores_list))
    _iter  = utool.iflatten(scores.ravel() for scores in scores_list)
    self_rawscore = np.fromiter(_iter, np.float64, _count).sum()
    # Square root inverse to enforce normalized self-score is 1.0
    sccw = np.reciprocal(np.sqrt(self_rawscore))
    try:
        assert not np.isinf(sccw), 'sccw cannot be infinite'
        assert not np.isnan(sccw), 'sccw cannot be nan'
    except AssertionError as ex:
        utool.printex(ex, 'problem computing self consistency criterion weight',
                      keys=['num_rvecs'], iswarning=True)
        if num_rvecs > 0:
            raise
        else:
            sccw = 1
    return sccw
Example #23
0
 def stagger_group(list_):
     return ut.filter_Nones(ut.iflatten(zip_longest(*list_)))
Example #24
0
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs,
                           scores_list, daids_list, query_sccw):
    """
    Builds explicit chipmatches that the rest of the pipeline plays nice with

    Notation:
        An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches,
        feature_scores, and feature_ranks.

        Let N be the number of matches

        A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first
        column corresponds to query_feature_indexes (qfx) and the second column
        corresponds to database_feature_indexes (dfx).

        A feature score, fs{shape=(N,), dtype=float64} is an array of scores

        A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks

    Returns:
        daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk)
        Return Format::
            daid2_fm (dict): {daid: fm, ...}
            daid2_fs (dict): {daid: fs, ...}
            daid2_fk (dict): {daid: fk, ...}

    Example:
        >>> from ibeis.algo.hots.smk.smk_core import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2()
        >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex
        >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha
        >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh
        >>> withinfo = True  # takes an 11s vs 2s
        >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh)
        >>> retL1 =  match_kernel_L1(*args)
        >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,)  = retL1
        >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
        >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
        >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0]))
        >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2]))
        >>> print(utool.is_dicteq(daid2_chipmatch_old[1],  daid2_chipmatch_new[1]))

    %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
    %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
    """
    # FIXME: move groupby to vtool
    if utool.VERBOSE:
        print('[smk_core] build cmtup_old')

    wx2_dfxs = invindex.wx2_fxs
    daid2_sccw = invindex.daid2_sccw

    qfxs_list = [wx2_qfxs[wx] for wx in common_wxs]
    dfxs_list = [wx2_dfxs[wx] for wx in common_wxs]

    shapes_list = [scores.shape for scores in scores_list]  # 51us
    shape_ranges = [(mem_arange(w), mem_arange(h))
                    for (w, h) in shapes_list]  # 230us
    ijs_list = [
        mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges
    ]  # 278us
    # Normalize scores for words, nMatches, and query sccw (still need daid sccw)
    nscores_iter = (scores * query_sccw for scores in scores_list)

    # FIXME: Preflatten all of these lists
    out_ijs = [list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list]
    out_qfxs = [[qfxs[ix] for (ix, jx) in ijs]
                for (qfxs, ijs) in zip(qfxs_list, out_ijs)]
    out_dfxs = [[dfxs[jx] for (ix, jx) in ijs]
                for (dfxs, ijs) in zip(dfxs_list, out_ijs)]
    out_daids = ([daids[jx] for (ix, jx) in ijs]
                 for (daids, ijs) in zip(daids_list, out_ijs))
    out_scores = ([nscores[ijx] for ijx in ijs]
                  for (nscores, ijs) in zip(nscores_iter, out_ijs))
    nested_fm_iter = [[
        tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs)
    ] for qfxs, dfxs in zip(out_qfxs, out_dfxs)]
    all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))),
                       dtype=hstypes.FM_DTYPE)
    nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter]
    nested_daid_iter = ([
        [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids)
    ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids))
    nested_score_iter = ([
        [score / nMatch] * nMatch
        for nMatch, score in zip(nMatch_list, scores)
    ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores))
    all_daids_ = np.array(list(utool.iflatten(
        utool.iflatten(nested_daid_iter))),
                          dtype=hstypes.INDEX_TYPE)
    all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))),
                       dtype=hstypes.FS_DTYPE)

    # Filter out 0 scores
    keep_xs = np.where(all_fss > 0)[0]
    all_fss = all_fss.take(keep_xs)
    all_fms = all_fms.take(keep_xs, axis=0)
    all_daids_ = all_daids_.take(keep_xs)

    daid_keys, groupxs = clustertool.group_indices(all_daids_)
    fs_list = clustertool.apply_grouping(all_fss, groupxs)
    fm_list = clustertool.apply_grouping(all_fms, groupxs)
    daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)}
    daid2_fs = {
        daid: fs * daid2_sccw[daid]
        for daid, fs in zip(daid_keys, fs_list)
    }
    # FIXME: generalize to when nAssign > 1
    daid2_fk = {
        daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE)
        for daid, fs in zip(daid_keys, fs_list)
    }
    daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk)

    return daid2_chipmatch
Example #25
0
def get_annotmatch_subgraph(ibs):
    r"""
    http://bokeh.pydata.org/en/latest/
    https://github.com/jsexauer/networkx_viewer

    TODO: Need a special visualization
        In the web I need:
            * graph of annotations matches.
            * can move them around.
            * edit lines between them.
            * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool

            This should  share functionality with a name view.

    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show

        # Networkx example
        python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show

    Ignore:

        from ibeis import viz

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> result = get_annotmatch_subgraph(ibs)
        >>> ut.show_if_requested()
    """
    #import ibeis
    #ibs = ibeis.opendb(db='PZ_MTEST')
    #rowids = ibs._get_all_annotmatch_rowids()
    #aids1 = ibs.get_annotmatch_aid1(rowids)
    #aids2 = ibs.get_annotmatch_aid2(rowids)
    #
    #
    nids = ibs.get_valid_nids()
    nids = nids[0:5]
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    # Enumerate annotmatch properties
    rng = np.random.RandomState(0)
    edge_props = {
        'weight': rng.rand(len(aids1)),
        'reviewer_confidence': rng.rand(len(aids1)),
        'algo_confidence': rng.rand(len(aids1)),
    }

    # Remove data that does not need to be visualized
    # (dont show all the aids if you dont have to)
    thresh = .5
    flags = edge_props['weight'] > thresh
    aids1 = ut.compress(aids1, flags)
    aids2 = ut.compress(aids2, flags)
    edge_props = {key: ut.compress(val, flags) for key, val in edge_props.items()}

    edge_keys = list(edge_props.keys())
    edge_vals = ut.dict_take(edge_props, edge_keys)

    unique_aids = list(set(aids1 + aids2))

    # Make a graph between the chips
    nodes = list(zip(unique_aids))
    edges = list(zip(aids1, aids2, *edge_vals))
    node_lbls = [('aid', 'int')]
    edge_lbls = [('weight', 'float')]
    from ibeis.viz import viz_graph
    netx_graph = viz_graph.make_netx_graph(nodes, edges, node_lbls, edge_lbls)
    fnum = None
    #zoom = kwargs.get('zoom', .4)
    zoom = .4
    viz_graph.viz_netx_chipgraph(ibs, netx_graph, fnum=fnum, with_images=True, zoom=zoom)
Example #26
0
def get_annotmatch_subgraph(ibs):
    r"""
    http://bokeh.pydata.org/en/latest/
    https://github.com/jsexauer/networkx_viewer

    TODO: Need a special visualization
        In the web I need:
            * graph of annotations matches.
            * can move them around.
            * edit lines between them.
            * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool

            This should  share functionality with a name view.

    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show

        # Networkx example
        python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show

    Ignore:

        from ibeis import viz

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> result = get_annotmatch_subgraph(ibs)
        >>> ut.show_if_requested()
    """
    #import ibeis
    #ibs = ibeis.opendb(db='PZ_MTEST')
    #rowids = ibs._get_all_annotmatch_rowids()
    #aids1 = ibs.get_annotmatch_aid1(rowids)
    #aids2 = ibs.get_annotmatch_aid2(rowids)
    #
    #
    nids = ibs.get_valid_nids()
    nids = nids[0:5]
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    # Enumerate annotmatch properties
    rng = np.random.RandomState(0)
    edge_props = {
        'weight': rng.rand(len(aids1)),
        'reviewer_confidence': rng.rand(len(aids1)),
        'algo_confidence': rng.rand(len(aids1)),
    }

    # Remove data that does not need to be visualized
    # (dont show all the aids if you dont have to)
    thresh = .5
    flags = edge_props['weight'] > thresh
    aids1_ = ut.compress(aids1, flags)
    aids2_ = ut.compress(aids2, flags)
    chosen_props = ut.dict_subset(edge_props, ['weight'])
    edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags),
                                  chosen_props)

    edge_keys = list(edge_props.keys())
    edge_vals = ut.dict_take(edge_props, edge_keys)
    edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)]

    unique_aids = list(set(aids1_ + aids2_))
    # Make a graph between the chips
    nodes = unique_aids
    edges = list(zip(aids1_, aids2_, edge_attr_list))
    import networkx as nx
    graph = nx.DiGraph()
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)
    from ibeis.viz import viz_graph
    fnum = None
    #zoom = kwargs.get('zoom', .4)
    viz_graph.viz_netx_chipgraph(ibs,
                                 graph,
                                 fnum=fnum,
                                 with_images=True,
                                 augment_graph=False)
Example #27
0
def invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=ut.NOT_QUIET):
    r"""
    Aggregates descriptors of input annotations and returns inverted information

    Args:
        vecs_list (list):
        fgws_list (list):
        ax_list (list):
        fxs_list (list):
        verbose (bool):  verbosity flag(default = True)

    Returns:
        tuple: (idx2_vec, idx2_fgw, idx2_ax, idx2_fx)

    CommandLine:
        python -m ibeis.algo.hots.neighbor_index invert_index

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
        >>> rng = np.random.RandomState(42)
        >>> DIM_SIZE = 16
        >>> nFeat_list = [3, 0, 4, 1]
        >>> vecs_list = [rng.randn(nFeat, DIM_SIZE) for nFeat in nFeat_list]
        >>> fgws_list = [rng.randn(nFeat) for nFeat in nFeat_list]
        >>> fxs_list = [np.arange(nFeat) for nFeat in nFeat_list]
        >>> ax_list = np.arange(len(vecs_list))
        >>> fgws_list = None
        >>> verbose = True
        >>> tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list)
        >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup
        >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),)
        >>> print(result)
        output depth_profile = [(8, 16), 1, 8, 8]

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', a='default:species=zebra_plains', p='default:fgw_thresh=.999')
        >>> vecs_list, fgws_list, fxs_list = get_support_data(qreq_, qreq_.daids)
        >>> ax_list = np.arange(len(vecs_list))
        >>> input_ = vecs_list, fgws_list, ax_list, fxs_list
        >>> print('input depth_profile = %s' % (ut.depth_profile(input_),))
        >>> tup = invert_index(*input_)
        >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup
        >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),)
        >>> print(result)

        output depth_profile = [(1912, 128), 1912, 1912, 1912]
    """
    if ut.VERYVERBOSE:
        print('[nnindex] stacking descriptors from %d annotations' % len(ax_list))
    try:
        nFeat_list = np.array(list(map(len, vecs_list)))
        # Remove input without any features
        is_valid = nFeat_list > 0
        nFeat_list = nFeat_list.compress(is_valid)
        vecs_list = ut.compress(vecs_list, is_valid)
        if fgws_list is not None:
            fgws_list = ut.compress(fgws_list, is_valid)
        ax_list = ut.compress(ax_list, is_valid)
        fxs_list = ut.compress(fxs_list, is_valid)

        # Flatten into inverted index
        axs_list = [[ax] * nFeat for (ax, nFeat) in zip(ax_list, nFeat_list)]
        nFeats = sum(nFeat_list)
        idx2_ax = np.fromiter(ut.iflatten(axs_list), np.int32, nFeats)
        idx2_fx = np.fromiter(ut.iflatten(fxs_list), np.int32, nFeats)
        idx2_vec = np.vstack(vecs_list)
        if fgws_list is None:
            idx2_fgw = None
        else:
            idx2_fgw = np.hstack(fgws_list)
            try:
                assert len(idx2_fgw) == len(idx2_vec), 'error. weights and vecs do not correspond'
            except Exception as ex:
                ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')])
                raise
        assert idx2_vec.shape[0] == idx2_ax.shape[0]
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
    except MemoryError as ex:
        ut.printex(ex, 'cannot build inverted index', '[!memerror]')
        raise
    if ut.VERYVERBOSE or verbose:
        print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format(
            nVecs=len(idx2_vec), nAnnots=len(ax_list)))
        print('[nnindex] idx2_vecs dtype={}, memory={}'.format(
            idx2_vec.dtype,
            ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize)))
    return idx2_vec, idx2_fgw, idx2_ax, idx2_fx
Example #28
0
def make_netx_graph_from_aid_groups(ibs,
                                    aids_list,
                                    only_reviewed_matches=True,
                                    invis_edges=None,
                                    ensure_edges=None,
                                    temp_nids=None,
                                    allow_directed=False):
    r"""
    Args:
        ibs (ibeis.IBEISController): image analysis api
        aids_list (list):

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.viz.viz_graph import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> aids_list = [[1, 2, 3, 4], [5, 6, 7]]
        >>> invis_edges = [(1, 5)]
        >>> only_reviewed_matches = True
        >>> graph = make_netx_graph_from_aid_groups(ibs, aids_list,
        >>>                                         only_reviewed_matches,
        >>>                                         invis_edges)
        >>> list(nx.connected_components(graph.to_undirected()))
    """
    #aids_list, nid_list = ibs.group_annots_by_name(aid_list)
    unique_aids = list(ut.flatten(aids_list))

    # grouped version
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    if only_reviewed_matches:
        annotmatch_rowids = ibs.get_annotmatch_rowid_from_superkey(
            aids1, aids2)
        annotmatch_rowids = ut.filter_Nones(annotmatch_rowids)
        aids1 = ibs.get_annotmatch_aid1(annotmatch_rowids)
        aids2 = ibs.get_annotmatch_aid2(annotmatch_rowids)

    graph = make_netx_graph_from_aidpairs(ibs,
                                          aids1,
                                          aids2,
                                          unique_aids=unique_aids)

    if ensure_edges is not None:
        if ensure_edges == 'all':
            ensure_edges = list(ut.upper_diag_self_prodx(list(graph.nodes())))
        ensure_edges_ = []
        for edge in ensure_edges:
            edge = tuple(edge)
            redge = tuple(edge[::-1])  # HACK
            if graph.has_edge(*edge):
                ensure_edges_.append(edge)
                pass
                #nx.set_edge_attributes(graph, 'weight', {edge: .001})
            elif (not allow_directed) and graph.has_edge(*redge):
                ensure_edges_.append(redge)
                #nx.set_edge_attributes(graph, 'weight', {redge: .001})
                pass
            else:
                ensure_edges_.append(edge)
                #graph.add_edge(*edge, weight=.001)
                graph.add_edge(*edge)

    if temp_nids is None:
        unique_nids = ibs.get_annot_nids(list(graph.nodes()))
    else:
        # HACK
        unique_nids = [1] * len(list(graph.nodes()))
        #unique_nids = temp_nids

    nx.set_node_attributes(graph, 'nid', dict(zip(graph.nodes(), unique_nids)))

    import plottool as pt
    ensure_names_are_connected(graph, aids_list)

    # Color edges by nid
    color_by_nids(graph, unique_nids=unique_nids)
    if invis_edges:
        for edge in invis_edges:
            if graph.has_edge(*edge):
                nx.set_edge_attributes(graph, 'style', {edge: 'invis'})
                nx.set_edge_attributes(graph, 'invisible', {edge: True})
            else:
                graph.add_edge(*edge, style='invis', invisible=True)

    # Hack color images orange
    if ensure_edges:
        nx.set_edge_attributes(
            graph, 'color', {tuple(edge): pt.ORANGE
                             for edge in ensure_edges_})

    return graph
Example #29
0
def invertible_stack(vecs_list, label_list):
    """
    Stacks descriptors into a flat structure and returns inverse mapping from
    flat database descriptor indexes (dx) to annotation ids (label) and feature
    indexes (fx). Feature indexes are w.r.t. annotation indexes.

    Output:
        idx2_desc - flat descriptor stack
        idx2_label  - inverted index into annotations
        idx2_fx   - inverted index into features

    # Example with 2D Descriptors

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.nearest_neighbors import *  # NOQA
        >>> DESC_TYPE = np.uint8
        >>> label_list  = [1, 2, 3, 4, 5]
        >>> vecs_list = [
        ...     np.array([[0, 0], [0, 1]], dtype=DESC_TYPE),
        ...     np.array([[5, 3], [2, 30], [1, 1]], dtype=DESC_TYPE),
        ...     np.empty((0, 2), dtype=DESC_TYPE),
        ...     np.array([[5, 3], [2, 30], [1, 1]], dtype=DESC_TYPE),
        ...     np.array([[3, 3], [42, 42], [2, 6]], dtype=DESC_TYPE),
        ...     ]
        >>> idx2_vec, idx2_label, idx2_fx = invertible_stack(vecs_list, label_list)
        >>> print(repr(idx2_vec.T))
        array([[ 0,  0,  5,  2,  1,  5,  2,  1,  3, 42,  2],
               [ 0,  1,  3, 30,  1,  3, 30,  1,  3, 42,  6]], dtype=uint8)
        >>> print(repr(idx2_label))
        array([1, 1, 2, 2, 2, 4, 4, 4, 5, 5, 5])
        >>> print(repr(idx2_fx))
        array([0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2])
    """
    # INFER DTYPE? dtype = vecs_list[0].dtype
    # Build inverted index of (label, fx) pairs
    nFeats = sum(list(map(len, vecs_list)))
    nFeat_iter = map(len, vecs_list)
    label_nFeat_iter = zip(label_list, map(len, vecs_list))
    # generate featx inverted index for each feature in each annotation
    _ax2_fx = [list(range(nFeat)) for nFeat in nFeat_iter]
    # generate label inverted index for each feature in each annotation
    '''
    # this is not a real test the code just happened to be here. syntax is good though
    #-ifdef CYTH_TEST_SWAP
    _ax2_label = [[label] * nFeat for (label, nFeat) in label_nFeat_iter]
    #-else
    '''
    _ax2_label = [[label] * nFeat for (label, nFeat) in label_nFeat_iter]
    # endif is optional. the end of the functionscope counts as an #endif
    '#-endif'
    # Flatten generators into the inverted index
    _flatlabels = utool.iflatten(_ax2_label)
    _flatfeatxs = utool.iflatten(_ax2_fx)

    idx2_label = np.fromiter(_flatlabels, np.int32, nFeats)
    idx2_fx = np.fromiter(_flatfeatxs, np.int32, nFeats)
    # Stack vecsriptors into numpy array corresponding to inverted inexed
    # This might throw a MemoryError
    idx2_vec = np.vstack(vecs_list)
    '#pragma cyth_returntup'
    return idx2_vec, idx2_label, idx2_fx
Example #30
0
    def match_single(smk, qaid, daids, qreq_, verbose=True):
        """
        CommandLine:
            python -m wbia.algo.smk.smk_pipeline SMK.match_single --profile
            python -m wbia.algo.smk.smk_pipeline SMK.match_single --show

            python -m wbia SMK.match_single -a ctrl:qmingt=2 --profile --db PZ_Master1
            python -m wbia SMK.match_single -a ctrl --profile --db GZ_ALL

        Example:
            >>> # FUTURE_ENABLE
            >>> from wbia.algo.smk.smk_pipeline import *  # NOQA
            >>> import wbia
            >>> qreq_ = wbia.testdata_qreq_(defaultdb='PZ_MTEST')
            >>> ibs = qreq_.ibs
            >>> daids = qreq_.daids
            >>> #ibs, daids = wbia.testdata_aids(defaultdb='PZ_MTEST', default_set='dcfg')
            >>> qreq_ = SMKRequest(ibs, daids[0:1], daids, {'agg': True,
            >>>                                             'num_words': 1000,
            >>>                                             'sv_on': True})
            >>> qreq_.ensure_data()
            >>> qaid = qreq_.qaids[0]
            >>> daids = qreq_.daids
            >>> daid = daids[1]
            >>> verbose = True
            >>> cm = qreq_.smk.match_single(qaid, daids, qreq_)
            >>> ut.quit_if_noshow()
            >>> ut.qtensure()
            >>> cm.ishow_analysis(qreq_)
            >>> ut.show_if_requested()
        """
        from wbia.algo.hots import chip_match
        from wbia.algo.hots import pipeline

        alpha = qreq_.qparams['smk_alpha']
        thresh = qreq_.qparams['smk_thresh']
        agg = qreq_.qparams['agg']
        # nAnnotPerName   = qreq_.qparams.nAnnotPerNameSVER

        sv_on = qreq_.qparams.sv_on
        if sv_on:
            nNameShortList = qreq_.qparams.nNameShortlistSVER
            shortsize = nNameShortList
        else:
            shortsize = None

        X = qreq_.qinva.get_annot(qaid)

        # Determine which database annotations need to be checked
        # with ut.Timer('searching qaid=%r' % (qaid,), verbose=verbose):
        hit_inva_wxs = ut.take(qreq_.dinva.wx_to_aids, X.wx_list)
        hit_daids = np.array(list(set(ut.iflatten(hit_inva_wxs))))

        # Mark impossible daids
        # with ut.Timer('checking impossible daids=%r' % (qaid,), verbose=verbose):
        valid_flags = check_can_match(qaid, hit_daids, qreq_)
        valid_daids = hit_daids.compress(valid_flags)

        shortlist = ut.Shortlist(shortsize)
        # gammaX = smk.gamma(X, wx_to_weight, agg, alpha, thresh)
        _prog = ut.ProgPartial(lbl='smk scoring qaid=%r' % (qaid, ),
                               enabled=verbose,
                               bs=True,
                               adjust=True)

        wx_to_weight = qreq_.dinva.wx_to_weight

        debug = False
        if debug:
            qnid = qreq_.get_qreq_annot_nids([qaid])[0]
            daids = np.array(qreq_.daids)
            dnids = qreq_.get_qreq_annot_nids(daids)
            correct_aids = daids[np.where(dnids == qnid)[0]]
            daid = correct_aids[0]

        if agg:
            for daid in _prog(valid_daids):
                Y = qreq_.dinva.get_annot(daid)
                item = match_kernel_agg(X, Y, wx_to_weight, alpha, thresh)
                shortlist.insert(item)
        else:
            for daid in _prog(valid_daids):
                Y = qreq_.dinva.get_annot(daid)
                item = match_kernel_sep(X, Y, wx_to_weight, alpha, thresh)
                shortlist.insert(item)

        # Build chipmatches for the shortlist results

        # with ut.Timer('build cms', verbose=verbose):
        cm = chip_match.ChipMatch(qaid=qaid, fsv_col_lbls=['smk'])
        cm.daid_list = []
        cm.fm_list = []
        cm.fsv_list = []
        _prog = ut.ProgPartial(lbl='smk build cm qaid=%r' % (qaid, ),
                               enabled=verbose,
                               bs=True,
                               adjust=True)
        for item in _prog(shortlist):
            (score, score_list, Y, X_idx, Y_idx) = item
            X_fxs = ut.take(X.fxs_list, X_idx)
            Y_fxs = ut.take(Y.fxs_list, Y_idx)
            # Only build matches for those that sver will use
            if agg:
                X_maws = ut.take(X.maws_list, X_idx)
                Y_maws = ut.take(Y.maws_list, Y_idx)
                fm, fs = smk_funcs.build_matches_agg(X_fxs, Y_fxs, X_maws,
                                                     Y_maws, score_list)
            else:
                fm, fs = smk_funcs.build_matches_sep(X_fxs, Y_fxs, score_list)
            if len(fm) > 0:
                # assert not np.any(np.isnan(fs))
                daid = Y.aid
                fsv = fs[:, None]
                cm.daid_list.append(daid)
                cm.fm_list.append(fm)
                cm.fsv_list.append(fsv)
        cm._update_daid_index()
        cm.arraycast_self()
        cm.score_name_maxcsum(qreq_)

        # if False:
        #    cm.assert_self(qreq_=qreq_, verbose=True)

        if sv_on:
            cm = pipeline.sver_single_chipmatch(qreq_, cm, verbose=verbose)
            cm.score_name_maxcsum(qreq_)

        return cm
Example #31
0
def sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha,
                   smk_thresh):
    r"""
    Computes gamma from "To Aggregate or not to aggregate". Every component in
    each list is with repsect to a different word.

    scc = self consistency criterion
    It is a scalar which ensure K(X, X) = 1

    Args:
        rvecs_list (list of ndarrays): residual vectors for every word
        idf_list (list of floats): idf weight for each word
        maws_list (list of ndarrays): multi-assign weights for each word for each residual vector
        smk_alpha (float): selectivity power
        smk_thresh (float): selectivity threshold

    Returns:
        float: sccw self-consistency-criterion weight

    Math:
        \begin{equation}
        \gamma(X) = (\sum_{c \in \C} w_c M(X_c, X_c))^{-.5}
        \end{equation}

    Example:
        >>> from ibeis.algo.hots.smk.smk_scoring import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_scoring
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #idf_list, rvecs_list, maws_list, smk_alpha, smk_thresh, wx2_flags = smk_debug.testdata_sccw_sum(db='testdb1')
        >>> tup = smk_debug.testdata_sccw_sum(db='PZ_MTEST', nWords=128000)
        >>> idf_list, rvecs_list, flags_list, maws_list, smk_alpha, smk_thresh = tup
        >>> sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        >>> print(sccw)
        0.0201041835751

    CommandLine:
        python smk_match.py --db PZ_MOTHERS --nWords 128

    Ignore:
        0.0384477314197
        qmaws_list = dmaws_list = maws_list
        drvecs_list = qrvecs_list = rvecs_list
        dflags_list = qflags_list = flags_list

        flags_list = flags_list[7:10]
        maws_list  = maws_list[7:10]
        idf_list   = idf_list[7:10]
        rvecs_list = rvecs_list[7:10]

    """
    num_rvecs = len(rvecs_list)
    if DEBUG_SMK:
        assert maws_list is None or len(
            maws_list) == num_rvecs, 'inconsistent lengths'
        assert num_rvecs == len(idf_list), 'inconsistent lengths'
        assert maws_list is None or list(map(len, maws_list)) == list(
            map(len, rvecs_list)), 'inconsistent per word lengths'
        assert flags_list is None or list(map(len, maws_list)) == list(
            map(len, flags_list)), 'inconsistent per word lengths'
        assert flags_list is None or len(
            flags_list) == num_rvecs, 'inconsistent lengths'
    # Indexing with asymetric multi-assignment might get you a non 1 self score?
    # List of scores for every word.
    scores_list = score_matches(rvecs_list, rvecs_list, flags_list, flags_list,
                                maws_list, maws_list, smk_alpha, smk_thresh,
                                idf_list)
    if DEBUG_SMK:
        assert len(scores_list) == num_rvecs, 'bad rvec and score'
        assert len(idf_list) == len(scores_list), 'bad weight and score'
    # Summation over all residual vector scores
    _count = sum((scores.size for scores in scores_list))
    _iter = utool.iflatten(scores.ravel() for scores in scores_list)
    self_rawscore = np.fromiter(_iter, np.float64, _count).sum()
    # Square root inverse to enforce normalized self-score is 1.0
    sccw = np.reciprocal(np.sqrt(self_rawscore))
    try:
        assert not np.isinf(sccw), 'sccw cannot be infinite'
        assert not np.isnan(sccw), 'sccw cannot be nan'
    except AssertionError as ex:
        utool.printex(ex,
                      'problem computing self consistency criterion weight',
                      keys=['num_rvecs'],
                      iswarning=True)
        if num_rvecs > 0:
            raise
        else:
            sccw = 1
    return sccw