Ejemplo n.º 1
0
def testdata_raw_internals1_5(**kwargs):
    """
    contains internal data up to idf weights

    Example:
        >>> from ibeis.algo.hots.smk.smk_debug import *  # NOQA
    """
    from ibeis.algo.hots.smk import smk_debug
    ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1(**kwargs)
    print('[smk_debug] testdata_raw_internals1_5')
    words     = invindex.words
    wx_series = np.arange(len(words))
    idx2_aid  = invindex.idx2_daid
    wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_aid, daids)
    invindex.wx2_idf = wx2_idf
    return ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams
Ejemplo n.º 2
0
def testdata_raw_internals1_5(**kwargs):
    """
    contains internal data up to idf weights

    Example:
        >>> from ibeis.algo.hots.smk.smk_debug import *  # NOQA
    """
    from ibeis.algo.hots.smk import smk_debug
    ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1(
        **kwargs)
    print('[smk_debug] testdata_raw_internals1_5')
    words = invindex.words
    wx_series = np.arange(len(words))
    idx2_aid = invindex.idx2_daid
    wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_aid, daids)
    invindex.wx2_idf = wx2_idf
    return ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams
Ejemplo n.º 3
0
def compute_data_internals_(invindex, qparams, memtrack=None,
                            delete_rawvecs=True):
    """
    Builds each of the inverted index internals.

        invindex (InvertedIndex): object for fast vocab lookup
        qparams (QueryParams): hyper-parameters
        memtrack (None):
        delete_rawvecs (bool):

    Returns:
        None

    Example:
        >>> from ibeis.algo.hots.smk.smk_repr import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0()
        >>> compute_data_internals_(invindex, qreq_.qparams)

    Ignore:
        idx2_vec = idx2_dvec
        wx2_maws = _wx2_maws  # NOQA
    """
    # Get information
    #if memtrack is None:
    #    memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]')

    #memtrack.report('[DATA INTERNALS1]')

    #
    aggregate             = qparams.aggregate
    smk_alpha             = qparams.smk_alpha
    smk_thresh            = qparams.smk_thresh
    #
    massign_alpha         = qparams.massign_alpha
    massign_sigma         = qparams.massign_sigma
    massign_equal_weights = qparams.massign_equal_weights
    #
    vocab_weighting       = qparams.vocab_weighting
    #
    nAssign = 1  # single assignment for database side

    idx2_vec  = invindex.idx2_dvec
    idx2_dfx  = invindex.idx2_dfx
    idx2_daid = invindex.idx2_daid
    daids     = invindex.daids
    wordflann = invindex.wordflann
    words     = invindex.words
    daid2_label = invindex.daid2_label
    wx_series = np.arange(len(words))
    #memtrack.track_obj(idx2_vec, 'idx2_vec')
    if not ut.QUIET:
        print('[smk_repr] compute_data_internals_')
    if ut.VERBOSE:
        print('[smk_repr] * len(daids) = %r' % (len(daids),))
        print('[smk_repr] * len(words) = %r' % (len(words),))
        print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec),))
        print('[smk_repr] * aggregate = %r' % (aggregate,))
        print('[smk_repr] * smk_alpha = %r' % (smk_alpha,))
        print('[smk_repr] * smk_thresh = %r' % (smk_thresh,))

    # Try to use the cache
    #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr
    #cachekw = dict(
        #cfgstr=cfgstr,
        #appname='smk_test'
    #)
    #invindex_cache = ut.Cacher('inverted_index', **cachekw)
    #try:
    #    raise IOError('cache is off')
    #    #cachetup = invindex_cache.load()
    #    #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup
    #    invindex.idx2_dvec = None
    #except IOError as ex:
    # Database word assignments (perform single assignment on database side)
    wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_(
        wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma,
        massign_equal_weights)
    if ut.DEBUG2:
        assert len(idx2_wxs) == len(idx2_vec)
        assert len(wx2_idxs.keys()) == len(_wx2_maws.keys())
        assert len(wx2_idxs.keys()) <= len(words)
        try:
            assert len(wx2_idxs.keys()) == len(words)
        except AssertionError as ex:
            ut.printex(ex, iswarning=True)
    # Database word inverse-document-frequency (idf weights)
    wx2_idf = smk_index.compute_word_idf_(
        wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting,
        verbose=True)
    if ut.DEBUG2:
        assert len(wx2_idf) == len(wx2_idf.keys())
    # Compute (normalized) residual vectors and inverse mappings
    wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_(
        words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx,
        aggregate, verbose=True)
    if not ut.QUIET:
        print('[smk_repr] unloading idx2_vec')
    if delete_rawvecs:
        # Try to save some memory
        del _wx2_maws
        invindex.idx2_dvec = None
        del idx2_vec
    # Compute annotation normalization factor
    daid2_sccw = smk_index.compute_data_sccw_(
        idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha,
        smk_thresh, verbose=True)
    # Cache save
    #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw)
    #invindex_cache.save(cachetup)

    # Store information
    invindex.idx2_wxs    = idx2_wxs   # stacked index -> word indexes (might not be needed)
    invindex.wx2_idxs    = wx2_idxs
    invindex.wx2_idf     = wx2_idf
    invindex.wx2_drvecs  = wx2_drvecs
    invindex.wx2_dflags  = wx2_dflags  # flag nan rvecs
    invindex.wx2_aids    = wx2_aids    # needed for asmk
    invindex.wx2_fxs     = wx2_fxs     # needed for asmk
    invindex.wx2_dmaws   = wx2_dmaws   # needed for awx2_mawssmk
    invindex.daid2_sccw  = daid2_sccw
    #memtrack.report('[DATA INTERNALS3]')

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_invindex_wx2(invindex)
Ejemplo n.º 4
0
def compute_data_internals_(invindex,
                            qparams,
                            memtrack=None,
                            delete_rawvecs=True):
    """
    Builds each of the inverted index internals.

        invindex (InvertedIndex): object for fast vocab lookup
        qparams (QueryParams): hyper-parameters
        memtrack (None):
        delete_rawvecs (bool):

    Returns:
        None

    Example:
        >>> from ibeis.algo.hots.smk.smk_repr import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0()
        >>> compute_data_internals_(invindex, qreq_.qparams)

    Ignore:
        idx2_vec = idx2_dvec
        wx2_maws = _wx2_maws  # NOQA
    """
    # Get information
    #if memtrack is None:
    #    memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]')

    #memtrack.report('[DATA INTERNALS1]')

    #
    aggregate = qparams.aggregate
    smk_alpha = qparams.smk_alpha
    smk_thresh = qparams.smk_thresh
    #
    massign_alpha = qparams.massign_alpha
    massign_sigma = qparams.massign_sigma
    massign_equal_weights = qparams.massign_equal_weights
    #
    vocab_weighting = qparams.vocab_weighting
    #
    nAssign = 1  # single assignment for database side

    idx2_vec = invindex.idx2_dvec
    idx2_dfx = invindex.idx2_dfx
    idx2_daid = invindex.idx2_daid
    daids = invindex.daids
    wordflann = invindex.wordflann
    words = invindex.words
    daid2_label = invindex.daid2_label
    wx_series = np.arange(len(words))
    #memtrack.track_obj(idx2_vec, 'idx2_vec')
    if not ut.QUIET:
        print('[smk_repr] compute_data_internals_')
    if ut.VERBOSE:
        print('[smk_repr] * len(daids) = %r' % (len(daids), ))
        print('[smk_repr] * len(words) = %r' % (len(words), ))
        print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec), ))
        print('[smk_repr] * aggregate = %r' % (aggregate, ))
        print('[smk_repr] * smk_alpha = %r' % (smk_alpha, ))
        print('[smk_repr] * smk_thresh = %r' % (smk_thresh, ))

    # Try to use the cache
    #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr
    #cachekw = dict(
    #cfgstr=cfgstr,
    #appname='smk_test'
    #)
    #invindex_cache = ut.Cacher('inverted_index', **cachekw)
    #try:
    #    raise IOError('cache is off')
    #    #cachetup = invindex_cache.load()
    #    #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup
    #    invindex.idx2_dvec = None
    #except IOError as ex:
    # Database word assignments (perform single assignment on database side)
    wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_(
        wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma,
        massign_equal_weights)
    if ut.DEBUG2:
        assert len(idx2_wxs) == len(idx2_vec)
        assert len(wx2_idxs.keys()) == len(_wx2_maws.keys())
        assert len(wx2_idxs.keys()) <= len(words)
        try:
            assert len(wx2_idxs.keys()) == len(words)
        except AssertionError as ex:
            ut.printex(ex, iswarning=True)
    # Database word inverse-document-frequency (idf weights)
    wx2_idf = smk_index.compute_word_idf_(wx_series,
                                          wx2_idxs,
                                          idx2_daid,
                                          daids,
                                          daid2_label,
                                          vocab_weighting,
                                          verbose=True)
    if ut.DEBUG2:
        assert len(wx2_idf) == len(wx2_idf.keys())
    # Compute (normalized) residual vectors and inverse mappings
    wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_(
        words,
        wx2_idxs,
        _wx2_maws,
        idx2_vec,
        idx2_daid,
        idx2_dfx,
        aggregate,
        verbose=True)
    if not ut.QUIET:
        print('[smk_repr] unloading idx2_vec')
    if delete_rawvecs:
        # Try to save some memory
        del _wx2_maws
        invindex.idx2_dvec = None
        del idx2_vec
    # Compute annotation normalization factor
    daid2_sccw = smk_index.compute_data_sccw_(idx2_daid,
                                              wx2_drvecs,
                                              wx2_dflags,
                                              wx2_aids,
                                              wx2_idf,
                                              wx2_dmaws,
                                              smk_alpha,
                                              smk_thresh,
                                              verbose=True)
    # Cache save
    #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw)
    #invindex_cache.save(cachetup)

    # Store information
    invindex.idx2_wxs = idx2_wxs  # stacked index -> word indexes (might not be needed)
    invindex.wx2_idxs = wx2_idxs
    invindex.wx2_idf = wx2_idf
    invindex.wx2_drvecs = wx2_drvecs
    invindex.wx2_dflags = wx2_dflags  # flag nan rvecs
    invindex.wx2_aids = wx2_aids  # needed for asmk
    invindex.wx2_fxs = wx2_fxs  # needed for asmk
    invindex.wx2_dmaws = wx2_dmaws  # needed for awx2_mawssmk
    invindex.daid2_sccw = daid2_sccw
    #memtrack.report('[DATA INTERNALS3]')

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_invindex_wx2(invindex)