Example #1
0
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
            mark1, end1_ = ut.log_progress(
                '[smk_index.sccw] SCCW group (by present words): ', len(wx_sublist),
                freq=100, with_time=WITH_TOTALTIME)
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        if ut.VERBOSE or verbose:
            end1_()

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            mark2, end2_ = ut.log_progress(
                '[smk_index.sccw] SCCW Sum (over daid): ', len(daid2_wx2_drvecs),
                freq=25, with_time=WITH_TOTALTIME)
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
Example #2
0
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] SCCW Sum (over daid): ')
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
Example #3
0
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        mark1, end1_ = ut.log_progress(
            '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs),
            freq=100, with_time=WITH_TOTALTIME)

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        end1_()
        mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                        total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME)
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
Example #4
0
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw