コード例 #1
0
ファイル: smk_index.py プロジェクト: heroinlin/ibeis
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
            mark1, end1_ = ut.log_progress(
                '[smk_index.sccw] SCCW group (by present words): ', len(wx_sublist),
                freq=100, with_time=WITH_TOTALTIME)
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        if ut.VERBOSE or verbose:
            end1_()

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            mark2, end2_ = ut.log_progress(
                '[smk_index.sccw] SCCW Sum (over daid): ', len(daid2_wx2_drvecs),
                freq=25, with_time=WITH_TOTALTIME)
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
コード例 #2
0
ファイル: smk_index.py プロジェクト: whaozl/ibeis
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] SCCW Sum (over daid): ')
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
コード例 #3
0
ファイル: smk_index.py プロジェクト: heroinlin/ibeis
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        mark1, end1_ = ut.log_progress(
            '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs),
            freq=100, with_time=WITH_TOTALTIME)

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        end1_()
        mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                        total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME)
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
コード例 #4
0
ファイル: smk_index.py プロジェクト: heroinlin/ibeis
def compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid,
                       idx2_fx, aggregate, verbose=False):
    """
    Computes residual vectors based on word assignments
    returns mapping from word index to a set of residual vectors

    Args:
        words (ndarray):
        wx2_idxs (dict):
        wx2_maws (dict):
        idx2_vec (dict):
        idx2_aid (dict):
        idx2_fx (dict):
        aggregate (bool):
        verbose (bool):

    Returns:
        tuple : (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws) formatted as::
            * wx2_rvecs - [ ... [ rvec_i1, ...,  rvec_Mi ]_i ... ]
            * wx2_aids  - [ ... [  aid_i1, ...,   aid_Mi ]_i ... ]
            * wx2_fxs   - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ]

        For every word::

            * list of aggvecs
            * For every aggvec:
                * one parent aid, if aggregate is False: assert isunique(aids)
                * list of parent fxs, if aggregate is True: assert len(fxs) == 1

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1()
        >>> words     = invindex.words
        >>> idx2_aid  = invindex.idx2_daid
        >>> idx2_fx   = invindex.idx2_dfx
        >>> idx2_vec  = invindex.idx2_dvec
        >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate
        >>> wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags = compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate)
    """
    if not ut.QUIET:
        print('[smk_index.rvec] +--- Start Compute Residuals')

    wx_sublist = np.array(wx2_idxs.keys())
    # Build lists w.r.t. words

    idxs_list = [wx2_idxs[wx].astype(hstypes.INDEX_TYPE) for wx in wx_sublist]
    aids_list = [idx2_aid.take(idxs) for idxs in idxs_list]
    if ut.DEBUG2:
        #assert np.all(np.diff(wx_sublist) == 1), 'not dense'
        assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment'
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
        assert idx2_vec.shape[0] == idx2_aid.shape[0]
    # Prealloc output
    if ut.VERBOSE or verbose:
        #print('[smk_index.rvec] Residual Vectors for %d words. aggregate=%r' %
        #      (len(wx2_idxs), aggregate,))
        lbl = '[smk_index.rvec] agg rvecs' if aggregate else '[smk_index.rvec] nonagg rvecs'
        mark, end_ = ut.log_progress(lbl, len(wx2_idxs), freq=50, with_time=True)
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2_idxs(wx2_idxs, len(words))
    # Compute Residuals
    rvecs_list, flags_list = smk_residuals.compute_nonagg_rvecs(words, idx2_vec, wx_sublist, idxs_list)

    if ut.VERBOSE:
        print('Computed size(rvecs_list) = %r' % ut.get_object_size_str(rvecs_list))
        print('Computed size(flags_list) = %r' % ut.get_object_size_str(flags_list))
    if aggregate:
        maws_list = [wx2_maws[wx] for wx in wx_sublist]
        # Aggregate Residuals
        tup = smk_residuals.compute_agg_rvecs(rvecs_list, idxs_list, aids_list, maws_list)
        (aggvecs_list, aggaids_list, aggidxs_list, aggmaws_list, aggflags_list) = tup
        # Pack into common query structure
        aggfxs_list = [[idx2_fx.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list]
        wx2_aggvecs  = dict(zip(wx_sublist, aggvecs_list))
        wx2_aggaids  = dict(zip(wx_sublist, aggaids_list))
        wx2_aggfxs   = dict(zip(wx_sublist, aggfxs_list))
        wx2_aggmaws  = dict(zip(wx_sublist, aggmaws_list))
        wx2_aggflags = dict(zip(wx_sublist, aggflags_list))
        (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags) = (
            wx2_aggvecs, wx2_aggaids, wx2_aggfxs, wx2_aggmaws, wx2_aggflags)
    else:
        # Hack non-aggregate residuals to have the same structure as aggregate
        # residuals for compatability: i.e. each rvec gets a list of fxs that
        # contributed to it, and for SMK this is a list of size 1
        fxs_list  = [[idx2_fx[idx:idx + 1] for idx in idxs]  for idxs in idxs_list]
        wx2_rvecs = dict(zip(wx_sublist, rvecs_list))
        wx2_aids  = dict(zip(wx_sublist, aids_list))
        wx2_fxs   = dict(zip(wx_sublist, fxs_list))
        wx2_flags = dict(zip(wx_sublist, flags_list))
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs)
    if ut.VERBOSE or verbose:
        end_()
        print('[smk_index.rvec] L___ End Compute Residuals')
    return wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags
コード例 #5
0
ファイル: smk_index.py プロジェクト: simplesoftMX/ibeis
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
コード例 #6
0
ファイル: smk_index.py プロジェクト: simplesoftMX/ibeis
def compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid,
                       idx2_fx, aggregate, verbose=False):
    """
    Computes residual vectors based on word assignments
    returns mapping from word index to a set of residual vectors

    Args:
        words (ndarray):
        wx2_idxs (dict):
        wx2_maws (dict):
        idx2_vec (dict):
        idx2_aid (dict):
        idx2_fx (dict):
        aggregate (bool):
        verbose (bool):

    Returns:
        tuple : (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws) formatted as::
            * wx2_rvecs - [ ... [ rvec_i1, ...,  rvec_Mi ]_i ... ]
            * wx2_aids  - [ ... [  aid_i1, ...,   aid_Mi ]_i ... ]
            * wx2_fxs   - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ]

        For every word::

            * list of aggvecs
            * For every aggvec:
                * one parent aid, if aggregate is False: assert isunique(aids)
                * list of parent fxs, if aggregate is True: assert len(fxs) == 1

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1()
        >>> words     = invindex.words
        >>> idx2_aid  = invindex.idx2_daid
        >>> idx2_fx   = invindex.idx2_dfx
        >>> idx2_vec  = invindex.idx2_dvec
        >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate
        >>> wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags = compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate)
    """
    if not ut.QUIET:
        print('[smk_index.rvec] +--- Start Compute Residuals')

    wx_sublist = np.array(wx2_idxs.keys())
    # Build lists w.r.t. words

    idxs_list = [wx2_idxs[wx].astype(hstypes.INDEX_TYPE) for wx in wx_sublist]
    aids_list = [idx2_aid.take(idxs) for idxs in idxs_list]
    if ut.DEBUG2:
        #assert np.all(np.diff(wx_sublist) == 1), 'not dense'
        assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment'
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
        assert idx2_vec.shape[0] == idx2_aid.shape[0]
    # Prealloc output
    if ut.VERBOSE or verbose:
        lbl = '[smk_index.rvec] agg rvecs' if aggregate else '[smk_index.rvec] nonagg rvecs'
        print(lbl)
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2_idxs(wx2_idxs, len(words))
    # Compute Residuals
    rvecs_list, flags_list = smk_residuals.compute_nonagg_rvecs(words, idx2_vec, wx_sublist, idxs_list)

    if ut.VERBOSE:
        print('Computed size(rvecs_list) = %r' % ut.get_object_size_str(rvecs_list))
        print('Computed size(flags_list) = %r' % ut.get_object_size_str(flags_list))
    if aggregate:
        maws_list = [wx2_maws[wx] for wx in wx_sublist]
        # Aggregate Residuals
        tup = smk_residuals.compute_agg_rvecs(rvecs_list, idxs_list, aids_list, maws_list)
        (aggvecs_list, aggaids_list, aggidxs_list, aggmaws_list, aggflags_list) = tup
        # Pack into common query structure
        aggfxs_list = [[idx2_fx.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list]
        wx2_aggvecs  = dict(zip(wx_sublist, aggvecs_list))
        wx2_aggaids  = dict(zip(wx_sublist, aggaids_list))
        wx2_aggfxs   = dict(zip(wx_sublist, aggfxs_list))
        wx2_aggmaws  = dict(zip(wx_sublist, aggmaws_list))
        wx2_aggflags = dict(zip(wx_sublist, aggflags_list))
        (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags) = (
            wx2_aggvecs, wx2_aggaids, wx2_aggfxs, wx2_aggmaws, wx2_aggflags)
    else:
        # Hack non-aggregate residuals to have the same structure as aggregate
        # residuals for compatability: i.e. each rvec gets a list of fxs that
        # contributed to it, and for SMK this is a list of size 1
        fxs_list  = [[idx2_fx[idx:idx + 1] for idx in idxs]  for idxs in idxs_list]
        wx2_rvecs = dict(zip(wx_sublist, rvecs_list))
        wx2_aids  = dict(zip(wx_sublist, aids_list))
        wx2_fxs   = dict(zip(wx_sublist, fxs_list))
        wx2_flags = dict(zip(wx_sublist, flags_list))
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs)
    if ut.VERBOSE or verbose:
        print('[smk_index.rvec] L___ End Compute Residuals')
    return wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags