コード例 #1
0
def compute_query_repr(annots_df, qaid, invindex, aggregate=False, alpha=3, thresh=0):
    """
    Gets query read for computations

    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, qaid, invindex = smk_debug.testdata_query_repr()
    >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate
    >>> alpha     = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh    = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> query_repr = compute_query_repr(annots_df, qaid, invindex, aggregate, alpha, thresh)
    >>> (wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma) = query_repr
    >>> assert smk_debug.check_wx2_rvecs(wx2_qrvecs), 'has nan'
    >>> invindex_dbgstr.invindex_dbgstr(invindex)


    idx2_vec = qfx2_vec
    idx2_aid = qfx2_aid
    idx2_fx = qfx2_qfx
    wx2_idxs = wx2_qfxs1
    """
    if utool.VERBOSE:
        print('[smk_index] Query Repr qaid=%r' % (qaid,))
    wx2_weight = invindex.wx2_weight
    words = invindex.words
    wordflann = invindex.wordflann
    if WITH_PANDAS:
        qfx2_vec = annots_df['vecs'][qaid]
    else:
        qfx2_vec = pdh.ensure_values(annots_df['vecs'][qaid])
    # Assign query to words
    wx2_qfxs1, qfx2_wx = assign_to_words_(wordflann, words, qfx2_vec,
                                          idx_name='fx', dense=False)  # 71.9 %
    # Hack to make implementing asmk easier, very redundant
    #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), index=qfx2_wx.index, name='qfx2_aid')
    #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), name='qfx2_aid')
    qfx2_aid = np.array([qaid] * len(qfx2_wx), dtype=INTEGER_TYPE)
    if WITH_PANDAS:
        qfx2_qfx = qfx2_vec.index
    else:
        qfx2_qfx = np.arange(len(qfx2_vec))
    # Compute query residuals
    wx2_qrvecs, wx2_qaids, wx2_qfxs = compute_residuals_(
        words, wx2_qfxs1, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate)  # 24.8
    # Compute query gamma
    if utool.VERBOSE:
        print('[smk_index] Query Gamma alpha=%r, thresh=%r' % (alpha, thresh))
    wx_sublist = pdh.ensure_index(wx2_qrvecs).astype(np.int32)
    weight_list = pdh.ensure_values_subset(wx2_weight, wx_sublist)
    rvecs_list  = pdh.ensure_values_subset(wx2_qrvecs, wx_sublist)
    query_gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)
    assert query_gamma > 0, 'query gamma is not positive!'
    return wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma
コード例 #2
0
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight,
                        alpha=3, thresh=0):
    """
    Internals step4

    Computes gamma normalization scalar for the database annotations
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2()
    >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> idx2_daid  = invindex.idx2_daid
    >>> wx2_weight = wx2_idf
    >>> daids      = invindex.daids
    >>> use_cache  = USE_CACHE_GAMMA and False
    >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, daids, use_cache=use_cache)
    """
    # Gropuing by aid and words
    wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs))
    if utool.VERBOSE:
        print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh))
        mark1, end1_ = utool.log_progress(
            '[smk_index] Gamma Group: ', len(wx_sublist), flushfreq=100, writefreq=50)
    rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist)
    aids_list  = pdh.ensure_values_subset(wx2_aids, wx_sublist)
    daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list))
    # Group by daids first and then by word index
    for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1):
        group_aids, groupxs = smk_speed.group_indicies(aids)
        rvecs_group = smk_speed.apply_grouping(rvecs, groupxs)  # 2.9 ms
        for aid, rvecs_ in zip(group_aids, rvecs_group):
            daid2_wx2_drvecs[aid][wx] = rvecs_

    if utool.VERBOSE:
        end1_()

    # For every daid, compute its gamma using pregrouped rvecs
    # Summation over words for each aid
    if utool.VERBOSE:
        mark2, end2_ = utool.log_progress(
            '[smk_index] Gamma Sum: ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25)

    aid_list          = list(daid2_wx2_drvecs.keys())
    wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
    aidwxs_list    = [list(wx2_aidrvecs.keys()) for wx2_aidrvecs in wx2_aidrvecs_list]
    aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in wx2_aidrvecs_list]
    aidweight_list = [[wx2_weight[wx] for wx in aidwxs] for aidwxs in aidwxs_list]

    #gamma_list = []
    #for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list):
    #    assert len(weight_list) == len(rvecs_list), 'one list for each word'
    #    gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)  # 66.8 %
    #    #weight_list = np.ones(weight_list.size)
    #    gamma_list.append(gamma)
    gamma_list = [smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)
                  for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list)]

    daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma')
    if utool.VERBOSE:
        end2_()
    return daid2_gamma
コード例 #3
0
ファイル: _smk_index_withpandas.py プロジェクト: whaozl/ibeis
def compute_query_repr(annots_df,
                       qaid,
                       invindex,
                       aggregate=False,
                       alpha=3,
                       thresh=0):
    """
    Gets query read for computations

    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, qaid, invindex = smk_debug.testdata_query_repr()
    >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate
    >>> alpha     = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh    = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> query_repr = compute_query_repr(annots_df, qaid, invindex, aggregate, alpha, thresh)
    >>> (wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma) = query_repr
    >>> assert smk_debug.check_wx2_rvecs(wx2_qrvecs), 'has nan'
    >>> invindex_dbgstr.invindex_dbgstr(invindex)


    idx2_vec = qfx2_vec
    idx2_aid = qfx2_aid
    idx2_fx = qfx2_qfx
    wx2_idxs = wx2_qfxs1
    """
    if utool.VERBOSE:
        print('[smk_index] Query Repr qaid=%r' % (qaid, ))
    wx2_weight = invindex.wx2_weight
    words = invindex.words
    wordflann = invindex.wordflann
    if WITH_PANDAS:
        qfx2_vec = annots_df['vecs'][qaid]
    else:
        qfx2_vec = pdh.ensure_values(annots_df['vecs'][qaid])
    # Assign query to words
    wx2_qfxs1, qfx2_wx = assign_to_words_(wordflann,
                                          words,
                                          qfx2_vec,
                                          idx_name='fx',
                                          dense=False)  # 71.9 %
    # Hack to make implementing asmk easier, very redundant
    #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), index=qfx2_wx.index, name='qfx2_aid')
    #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), name='qfx2_aid')
    qfx2_aid = np.array([qaid] * len(qfx2_wx), dtype=INTEGER_TYPE)
    if WITH_PANDAS:
        qfx2_qfx = qfx2_vec.index
    else:
        qfx2_qfx = np.arange(len(qfx2_vec))
    # Compute query residuals
    wx2_qrvecs, wx2_qaids, wx2_qfxs = compute_residuals_(
        words, wx2_qfxs1, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate)  # 24.8
    # Compute query gamma
    if utool.VERBOSE:
        print('[smk_index] Query Gamma alpha=%r, thresh=%r' % (alpha, thresh))
    wx_sublist = pdh.ensure_index(wx2_qrvecs).astype(np.int32)
    weight_list = pdh.ensure_values_subset(wx2_weight, wx_sublist)
    rvecs_list = pdh.ensure_values_subset(wx2_qrvecs, wx_sublist)
    query_gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha,
                                            thresh)
    assert query_gamma > 0, 'query gamma is not positive!'
    return wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma
コード例 #4
0
ファイル: _smk_index_withpandas.py プロジェクト: whaozl/ibeis
def compute_data_gamma_(idx2_daid,
                        wx2_rvecs,
                        wx2_aids,
                        wx2_weight,
                        alpha=3,
                        thresh=0):
    """
    Internals step4

    Computes gamma normalization scalar for the database annotations
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2()
    >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> idx2_daid  = invindex.idx2_daid
    >>> wx2_weight = wx2_idf
    >>> daids      = invindex.daids
    >>> use_cache  = USE_CACHE_GAMMA and False
    >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, daids, use_cache=use_cache)
    """
    # Gropuing by aid and words
    wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs))
    if utool.VERBOSE:
        print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' %
              (alpha, thresh))
        mark1, end1_ = utool.log_progress('[smk_index] Gamma Group: ',
                                          len(wx_sublist),
                                          flushfreq=100,
                                          writefreq=50)
    rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist)
    aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist)
    daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list))
    # Group by daids first and then by word index
    for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1):
        group_aids, groupxs = smk_speed.group_indicies(aids)
        rvecs_group = smk_speed.apply_grouping(rvecs, groupxs)  # 2.9 ms
        for aid, rvecs_ in zip(group_aids, rvecs_group):
            daid2_wx2_drvecs[aid][wx] = rvecs_

    if utool.VERBOSE:
        end1_()

    # For every daid, compute its gamma using pregrouped rvecs
    # Summation over words for each aid
    if utool.VERBOSE:
        mark2, end2_ = utool.log_progress('[smk_index] Gamma Sum: ',
                                          len(daid2_wx2_drvecs),
                                          flushfreq=100,
                                          writefreq=25)

    aid_list = list(daid2_wx2_drvecs.keys())
    wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
    aidwxs_list = [
        list(wx2_aidrvecs.keys()) for wx2_aidrvecs in wx2_aidrvecs_list
    ]
    aidrvecs_list = [
        list(wx2_aidrvecs.values()) for wx2_aidrvecs in wx2_aidrvecs_list
    ]
    aidweight_list = [[wx2_weight[wx] for wx in aidwxs]
                      for aidwxs in aidwxs_list]

    #gamma_list = []
    #for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list):
    #    assert len(weight_list) == len(rvecs_list), 'one list for each word'
    #    gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)  # 66.8 %
    #    #weight_list = np.ones(weight_list.size)
    #    gamma_list.append(gamma)
    gamma_list = [
        smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)
        for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list)
    ]

    daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma')
    if utool.VERBOSE:
        end2_()
    return daid2_gamma
コード例 #5
0
ファイル: _smkindexpandas2.py プロジェクト: Erotemic/ibeis
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf,
                        alpha=3, thresh=0):
    """
    Computes gamma normalization scalar for the database annotations
    Internals step4
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2()
    >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> idx2_daid  = invindex.idx2_daid
    >>> wx2_idf = wx2_idf
    >>> daids      = invindex.daids
    >>> use_cache  = USE_CACHE_GAMMA and False
    >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, daids, use_cache=use_cache)
    """
    if utool.DEBUG2:
        from ibeis.model.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_rvecs, wx2_aids=wx2_aids)
    wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs))
    if utool.VERBOSE:
        print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh))
        mark1, end1_ = utool.log_progress(
            '[smk_index] Gamma group (by word): ', len(wx_sublist),
            flushfreq=100, writefreq=50, with_totaltime=True)
    # Get list of aids and rvecs w.r.t. words
    aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist)
    rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist)
    # Group by daids first and then by word index
    daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list))
    for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1):
        group_aids, groupxs = clustertool.group_indicies(aids)
        rvecs_group = clustertool.apply_grouping(rvecs, groupxs)  # 2.9 ms
        for aid, rvecs_ in zip(group_aids, rvecs_group):
            daid2_wx2_drvecs[aid][wx] = rvecs_

    if utool.VERBOSE:
        end1_()

    # For every daid, compute its gamma using pregrouped rvecs
    # Summation over words for each aid
    if utool.VERBOSE:
        mark2, end2_ = utool.log_progress(
            '[smk_index] Gamma Sum (over daid): ', len(daid2_wx2_drvecs),
            flushfreq=100, writefreq=25, with_totaltime=True)
    # Get lists w.r.t daids
    aid_list          = list(daid2_wx2_drvecs.keys())
    # list of mappings from words to rvecs foreach daid
    # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
    _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
    _aidwxs_iter    = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
    aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
    aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    #gamma_list = []
    if utool.DEBUG2:
        try:
            for count, (idf_list, rvecs_list) in enumerate(zip(aididf_list, aidrvecs_list)):
                assert len(idf_list) == len(rvecs_list), 'one list for each word'
                #gamma = smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh)
        except Exception as ex:
            utool.printex(ex)
            utool.embed()
            raise
    gamma_list = [smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh)
                  for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

    if WITH_PANDAS:
        daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma')
    else:
        daid2_gamma = dict(zip(aid_list, gamma_list))
    if utool.VERBOSE:
        end2_()

    return daid2_gamma
コード例 #6
0
def compute_data_gamma_(idx2_daid,
                        wx2_rvecs,
                        wx2_aids,
                        wx2_idf,
                        alpha=3,
                        thresh=0):
    """
    Computes gamma normalization scalar for the database annotations
    Internals step4
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2()
    >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> idx2_daid  = invindex.idx2_daid
    >>> wx2_idf = wx2_idf
    >>> daids      = invindex.daids
    >>> use_cache  = USE_CACHE_GAMMA and False
    >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, daids, use_cache=use_cache)
    """
    if utool.DEBUG2:
        from ibeis.model.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_rvecs, wx2_aids=wx2_aids)
    wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs))
    if utool.VERBOSE:
        print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' %
              (alpha, thresh))
        mark1, end1_ = utool.log_progress(
            '[smk_index] Gamma group (by word): ',
            len(wx_sublist),
            flushfreq=100,
            writefreq=50,
            with_totaltime=True)
    # Get list of aids and rvecs w.r.t. words
    aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist)
    rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist)
    # Group by daids first and then by word index
    daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list))
    for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1):
        group_aids, groupxs = clustertool.group_indicies(aids)
        rvecs_group = clustertool.apply_grouping(rvecs, groupxs)  # 2.9 ms
        for aid, rvecs_ in zip(group_aids, rvecs_group):
            daid2_wx2_drvecs[aid][wx] = rvecs_

    if utool.VERBOSE:
        end1_()

    # For every daid, compute its gamma using pregrouped rvecs
    # Summation over words for each aid
    if utool.VERBOSE:
        mark2, end2_ = utool.log_progress(
            '[smk_index] Gamma Sum (over daid): ',
            len(daid2_wx2_drvecs),
            flushfreq=100,
            writefreq=25,
            with_totaltime=True)
    # Get lists w.r.t daids
    aid_list = list(daid2_wx2_drvecs.keys())
    # list of mappings from words to rvecs foreach daid
    # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
    _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
    _aidwxs_iter = (list(wx2_aidrvecs.keys())
                    for wx2_aidrvecs in _wx2_aidrvecs_list)
    aidrvecs_list = [
        list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list
    ]
    aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    #gamma_list = []
    if utool.DEBUG2:
        try:
            for count, (idf_list, rvecs_list) in enumerate(
                    zip(aididf_list, aidrvecs_list)):
                assert len(idf_list) == len(
                    rvecs_list), 'one list for each word'
                #gamma = smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh)
        except Exception as ex:
            utool.printex(ex)
            utool.embed()
            raise
    gamma_list = [
        smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh)
        for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)
    ]

    if WITH_PANDAS:
        daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma')
    else:
        daid2_gamma = dict(zip(aid_list, gamma_list))
    if utool.VERBOSE:
        end2_()

    return daid2_gamma