Example #1
0
def new_word_index(aid_list=[],
                   vecs_list=[],
                   flann_params={},
                   flann_cachedir=None,
                   indexer_cfgstr='',
                   hash_rowids=True,
                   use_cache=not NOCACHE_WORD,
                   use_params_hash=True):
    print('[windex] building WordIndex object')
    _check_input(aid_list, vecs_list)
    # Create indexes into the input aids
    ax_list = np.arange(len(aid_list))
    idx2_vec, idx2_ax, idx2_fx = invert_index(vecs_list, ax_list)
    if hash_rowids:
        # Fingerprint
        aids_hashstr = utool.hashstr_arr(aid_list, '_AIDS')
        cfgstr = aids_hashstr + indexer_cfgstr
    else:
        # Dont hash rowids when given enough info in indexer_cfgstr
        cfgstr = indexer_cfgstr
    # Build/Load the flann index
    flann = nntool.flann_cache(
        idx2_vec, **{
            'cache_dir': flann_cachedir,
            'cfgstr': cfgstr,
            'flann_params': flann_params,
            'use_cache': use_cache,
            'use_params_hash': use_params_hash
        })
    ax2_aid = np.array(aid_list)
    windex = WordIndex(ax2_aid, idx2_vec, idx2_ax, idx2_fx, flann)
    return windex
Example #2
0
def new_word_index(aid_list=[], vecs_list=[], flann_params={},
                       flann_cachedir=None, indexer_cfgstr='', hash_rowids=True,
                       use_cache=not NOCACHE_WORD, use_params_hash=True):
    print('[windex] building WordIndex object')
    _check_input(aid_list, vecs_list)
    # Create indexes into the input aids
    ax_list = np.arange(len(aid_list))
    idx2_vec, idx2_ax, idx2_fx = invert_index(vecs_list, ax_list)
    if hash_rowids:
        # Fingerprint
        aids_hashstr = utool.hashstr_arr(aid_list, '_AIDS')
        cfgstr = aids_hashstr + indexer_cfgstr
    else:
        # Dont hash rowids when given enough info in indexer_cfgstr
        cfgstr = indexer_cfgstr
    # Build/Load the flann index
    flann = nntool.flann_cache(idx2_vec, **{
        'cache_dir': flann_cachedir,
        'cfgstr': cfgstr,
        'flann_params': flann_params,
        'use_cache': use_cache,
        'use_params_hash': use_params_hash})
    ax2_aid = np.array(aid_list)
    windex = WordIndex(ax2_aid, idx2_vec, idx2_ax, idx2_fx, flann)
    return windex
Example #3
0
def build_flann_inverted_index(ibs, aid_list):
    """
    Build a inverted index (using FLANN)
    </CYTH> """
    try:
        if len(aid_list) == 0:
            msg = ('len(aid_list) == 0\n'
                    'Cannot build inverted index without features!')
            raise AssertionError(msg)
        dx2_desc, dx2_aid, dx2_fx = aggregate_descriptors(ibs, aid_list)
    except Exception as ex:
        intostr = ibs.get_infostr()  # NOQA
        dbname = ibs.get_dbname()  # NOQA
        num_images = ibs.get_num_images()  # NOQA
        num_annotations = ibs.get_num_annotations()      # NOQA
        num_names = ibs.get_num_names()    # NOQA
        utool.printex(ex, '', 'cannot build inverted index', locals().keys())
        raise
    # Build/Load the flann index
    flann_cfgstr = get_flann_cfgstr(ibs, aid_list)
    flann_params = {'algorithm': 'kdtree', 'trees': 4}
    precomp_kwargs = {'cache_dir': ibs.get_flann_cachedir(),
                      'cfgstr': flann_cfgstr,
                      'flann_params': flann_params,
                      'force_recompute': NOCACHE_FLANN}
    flann = nntool.flann_cache(dx2_desc, **precomp_kwargs)
    return dx2_desc, dx2_aid, dx2_fx, flann
Example #4
0
def index_data_annots(annots_df,
                      daids,
                      words,
                      with_internals=True,
                      aggregate=False,
                      alpha=3,
                      thresh=0,
                      with_pandas=WITH_PANDAS):
    """
    Builds the initial inverted index from a dataframe, daids, and words.
    Optionally builds the internals of the inverted structure
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, daids, qaids, words = smk_debug.testdata_words()
    >>> with_internals = False
    >>> invindex = index_data_annots(annots_df, daids, words, with_internals)

    #>>> print(utool.hashstr(repr(list(invindex.__dict__.values()))))
    #v8+i5i8+55j0swio
    """
    if utool.VERBOSE:
        print('[smk_index] index_data_annots')
    flann_params = {}
    _words = pdh.ensure_values(words)
    wordflann = nntool.flann_cache(_words,
                                   flann_params=flann_params,
                                   appname='smk')
    _daids = pdh.ensure_values(daids)
    _vecs_list = pdh.ensure_2d_values(annots_df['vecs'][_daids])
    _idx2_dvec, _idx2_daid, _idx2_dfx = nntool.invertable_stack(
        _vecs_list, _daids)

    # Pandasify
    if with_pandas:
        idx_series = pdh.IntIndex(np.arange(len(_idx2_daid)), name='idx')
        idx2_dfx = pdh.IntSeries(_idx2_dfx, index=idx_series, name='fx')
        idx2_daid = pdh.IntSeries(_idx2_daid, index=idx_series, name='aid')
        idx2_dvec = pd.DataFrame(_idx2_dvec,
                                 index=idx_series,
                                 columns=VEC_COLUMNS)
    else:
        idx2_dfx = _idx2_dfx
        idx2_daid = _idx2_daid
        idx2_dvec = _idx2_dvec
        pass

    invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx,
                             daids)
    if with_internals:
        compute_data_internals_(invindex, aggregate, alpha, thresh)  # 99%
    return invindex
Example #5
0
def build_flann_inverted_index(ibs, aid_list, **kwargs):
    """
    Build a inverted index (using FLANN)
    """
    # Aggregate descriptors
    dx2_desc, dx2_aid, dx2_fx = build_ibs_inverted_descriptor_index(ibs, aid_list)
    # hash which annotations are input
    indexed_cfgstr = get_indexed_cfgstr(ibs, aid_list)
    flann_params = {'algorithm': 'kdtree', 'trees': 4}
    flann_cachedir = ibs.get_flann_cachedir()
    precomp_kwargs = {'cache_dir': flann_cachedir,
                      'cfgstr': indexed_cfgstr,
                      'flann_params': flann_params,
                      'use_cache': kwargs.get('use_cache', not NOCACHE_FLANN)}
    # Build/Load the flann index
    flann = nntool.flann_cache(dx2_desc, **precomp_kwargs)
    return dx2_desc, dx2_aid, dx2_fx, flann
Example #6
0
def index_data_annots(annots_df, daids, words, with_internals=True):
    """
    Create inverted index for database annotations
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> with_internals = True
    >>> invindex = index_data_annots(annots_df, daids, words, with_internals)
    """
    vecs_list = ensure_values(annots_df['vecs'][daids])
    flann_params = {}
    cache_dir = utool.get_app_resource_dir('smk')
    wordflann = nntool.flann_cache(words, flann_params=flann_params, cache_dir=cache_dir)
    _daids = ensure_values(daids)
    idx2_dvec, idx2_daid, idx2_dfx = nntool.invertable_stack(vecs_list, _daids)
    invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, _daids)
    if with_internals:
        invindex.compute_internals()
    return invindex
Example #7
0
def index_data_annots(annots_df, daids, words, with_internals=True,
                      aggregate=False, alpha=3, thresh=0, with_pandas=WITH_PANDAS):
    """
    Builds the initial inverted index from a dataframe, daids, and words.
    Optionally builds the internals of the inverted structure
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, daids, qaids, words = smk_debug.testdata_words()
    >>> with_internals = False
    >>> invindex = index_data_annots(annots_df, daids, words, with_internals)

    #>>> print(utool.hashstr(repr(list(invindex.__dict__.values()))))
    #v8+i5i8+55j0swio
    """
    if utool.VERBOSE:
        print('[smk_index] index_data_annots')
    flann_params = {}
    _words = pdh.ensure_values(words)
    wordflann = nntool.flann_cache(_words, flann_params=flann_params,
                                   appname='smk')
    _daids = pdh.ensure_values(daids)
    _vecs_list = pdh.ensure_2d_values(annots_df['vecs'][_daids])
    _idx2_dvec, _idx2_daid, _idx2_dfx = nntool.invertable_stack(_vecs_list, _daids)

    # Pandasify
    if with_pandas:
        idx_series = pdh.IntIndex(np.arange(len(_idx2_daid)), name='idx')
        idx2_dfx   = pdh.IntSeries(_idx2_dfx, index=idx_series, name='fx')
        idx2_daid  = pdh.IntSeries(_idx2_daid, index=idx_series, name='aid')
        idx2_dvec  = pd.DataFrame(_idx2_dvec, index=idx_series, columns=VEC_COLUMNS)
    else:
        idx2_dfx = _idx2_dfx
        idx2_daid = _idx2_daid
        idx2_dvec = _idx2_dvec
        pass

    invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids)
    if with_internals:
        compute_data_internals_(invindex, aggregate, alpha, thresh)  # 99%
    return invindex
Example #8
0
File: smk1.py Project: whaozl/ibeis
def index_data_annots(annots_df, daids, words, with_internals=True):
    """
    Create inverted index for database annotations
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> with_internals = True
    >>> invindex = index_data_annots(annots_df, daids, words, with_internals)
    """
    vecs_list = ensure_values(annots_df['vecs'][daids])
    flann_params = {}
    cache_dir = utool.get_app_resource_dir('smk')
    wordflann = nntool.flann_cache(words,
                                   flann_params=flann_params,
                                   cache_dir=cache_dir)
    _daids = ensure_values(daids)
    idx2_dvec, idx2_daid, idx2_dfx = nntool.invertable_stack(vecs_list, _daids)
    invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx,
                             _daids)
    if with_internals:
        invindex.compute_internals()
    return invindex
Example #9
0
def index_data_annots(annots_df, daids, words, qparams, with_internals=True,
                      memtrack=None, delete_rawvecs=False):
    """
    Builds the initial inverted index from a dataframe, daids, and words.
    Optionally builds the internals of the inverted structure

    Args:
        annots_df ():
        daids ():
        words ():
        qparams ():
        with_internals ():
        memtrack (): memory debugging object

    Returns:
        invindex

    Example:
        >>> from ibeis.algo.hots.smk.smk_repr import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words()
        >>> qparams = qreq_.qparams
        >>> with_internals = False
        >>> invindex = index_data_annots(annots_df, daids, words, qparams, with_internals)

    Ignore:
        #>>> print(ut.hashstr(repr(list(invindex.__dict__.values()))))
        #v8+i5i8+55j0swio

    Auto:
        from ibeis.algo.hots.smk import smk_repr
        import utool as ut
        ut.rrrr()
        print(ut.make_default_docstr(smk_repr.index_data_annots))
    """
    if not ut.QUIET:
        print('[smk_repr] index_data_annots')
    flann_params = {}
    # Compute fast lookup index for the words
    wordflann = nntool.flann_cache(words, flann_params=flann_params, appname='smk')
    _vecs_list = annots_df['vecs'][daids]
    _label_list = annots_df['labels'][daids]
    idx2_dvec, idx2_daid, idx2_dfx = nntool.invertible_stack(_vecs_list, daids)

    # TODO:
    # Need to individually cache residual vectors.
    # rvecs_list = annots_df['rvecs'][daids]
    #
    # Residual vectors depend on
    # * nearest word (word assignment)
    # * original vectors
    # * multiassignment

    daid2_label = dict(zip(daids, _label_list))

    invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx,
                             daids, daid2_label)
    # Decrement reference count so memory can be cleared in the next function
    del words, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label
    del _vecs_list, _label_list
    if with_internals:
        compute_data_internals_(invindex, qparams, memtrack=memtrack,
                                delete_rawvecs=delete_rawvecs)  # 99%
    return invindex
Example #10
0
def index_data_annots(annots_df,
                      daids,
                      words,
                      qparams,
                      with_internals=True,
                      memtrack=None,
                      delete_rawvecs=False):
    """
    Builds the initial inverted index from a dataframe, daids, and words.
    Optionally builds the internals of the inverted structure

    Args:
        annots_df ():
        daids ():
        words ():
        qparams ():
        with_internals ():
        memtrack (): memory debugging object

    Returns:
        invindex

    Example:
        >>> from ibeis.algo.hots.smk.smk_repr import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words()
        >>> qparams = qreq_.qparams
        >>> with_internals = False
        >>> invindex = index_data_annots(annots_df, daids, words, qparams, with_internals)

    Ignore:
        #>>> print(ut.hashstr(repr(list(invindex.__dict__.values()))))
        #v8+i5i8+55j0swio

    Auto:
        from ibeis.algo.hots.smk import smk_repr
        import utool as ut
        ut.rrrr()
        print(ut.make_default_docstr(smk_repr.index_data_annots))
    """
    if not ut.QUIET:
        print('[smk_repr] index_data_annots')
    flann_params = {}
    # Compute fast lookup index for the words
    wordflann = nntool.flann_cache(words,
                                   flann_params=flann_params,
                                   appname='smk')
    _vecs_list = annots_df['vecs'][daids]
    _label_list = annots_df['labels'][daids]
    idx2_dvec, idx2_daid, idx2_dfx = nntool.invertible_stack(_vecs_list, daids)

    # TODO:
    # Need to individually cache residual vectors.
    # rvecs_list = annots_df['rvecs'][daids]
    #
    # Residual vectors depend on
    # * nearest word (word assignment)
    # * original vectors
    # * multiassignment

    daid2_label = dict(zip(daids, _label_list))

    invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx,
                             daids, daid2_label)
    # Decrement reference count so memory can be cleared in the next function
    del words, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label
    del _vecs_list, _label_list
    if with_internals:
        compute_data_internals_(invindex,
                                qparams,
                                memtrack=memtrack,
                                delete_rawvecs=delete_rawvecs)  # 99%
    return invindex