Beispiel #1
0
def compute_nonagg_residuals_pandas(words, wx_sublist, wx2_idxs, idx2_vec):
    """
    VERY SLOW. DEBUG USE ONLY

    Ignore:
        words = words.values
        wxlist = [wx]
        ### index test
        %timeit words[wx:wx + 1]      # 0.334 us
        %timeit words[wx, np.newaxis] # 1.05 us
        %timeit words[np.newaxis, wx] # 1.05 us
        %timeit words.take(wxlist, axis=0) # 1.6 us
        ### pandas test
        %timeit words.values[wx:wx + 1]      # 7.6 us
        %timeit words[wx:wx + 1].values      # 84.9 us
    """
    #with utool.Timer('compute_nonagg_residuals_pandas'):
    #mark, end_ = utool.log_progress('compute residual: ', len(wx_sublist), flushfreq=500, writefreq=50)
    num = wx_sublist.size
    rvecs_arr = np.empty(num, dtype=np.ndarray)
    # Compute Residuals
    for count, wx in enumerate(wx_sublist):
        #mark(count)
        idxs = wx2_idxs[wx].values
        vecs = idx2_vec.take(idxs).values
        word = words.values[wx:wx + 1]
        rvecs_n = smk_core.get_norm_rvecs(vecs, word)
        rvecs_arr[count] = rvecs_n
    return rvecs_arr
Beispiel #2
0
def compute_nonagg_residuals_pandas(words, wx_sublist, wx2_idxs, idx2_vec):
    """
    VERY SLOW. DEBUG USE ONLY

    Ignore:
        words = words.values
        wxlist = [wx]
        ### index test
        %timeit words[wx:wx + 1]      # 0.334 us
        %timeit words[wx, np.newaxis] # 1.05 us
        %timeit words[np.newaxis, wx] # 1.05 us
        %timeit words.take(wxlist, axis=0) # 1.6 us
        ### pandas test
        %timeit words.values[wx:wx + 1]      # 7.6 us
        %timeit words[wx:wx + 1].values      # 84.9 us
    """
    #with utool.Timer('compute_nonagg_residuals_pandas'):
    #mark, end_ = utool.log_progress('compute residual: ', len(wx_sublist), flushfreq=500, writefreq=50)
    num = wx_sublist.size
    rvecs_arr = np.empty(num, dtype=np.ndarray)
    # Compute Residuals
    for count, wx in enumerate(wx_sublist):
        #mark(count)
        idxs = wx2_idxs[wx].values
        vecs = idx2_vec.take(idxs).values
        word = words.values[wx:wx + 1]
        rvecs_n = smk_core.get_norm_rvecs(vecs, word)
        rvecs_arr[count] = rvecs_n
    return rvecs_arr
Beispiel #3
0
def compute_nonagg_rvec_listcomp(words, wx_sublist, idxs_list, idx2_vec):
    """
    PREFERED METHOD - 110ms

    Example:
        >>> from ibeis.model.hots.smk import smk_debug
        >>> words, wx_sublist, aids_list, idxs_list, idx2_vec, maws_list = smk_debug.testdata_nonagg_rvec()

    Timeit:
        %timeit words_list = [words[np.newaxis, wx] for wx in wx_sublist]  # 5 ms
        %timeit words_list = [words[wx:wx + 1] for wx in wx_sublist]  # 1.6 ms
    """
    #with utool.Timer('compute_nonagg_rvec_listcomp'):
    #vecs_list  = [idx2_vec[idxs] for idxs in idxs_list]  # 23 ms
    words_list = [words[wx:wx + 1] for wx in wx_sublist]  # 1 ms
    vecs_list  = [idx2_vec.take(idxs, axis=0) for idxs in idxs_list]  # 5.3 ms
    rvecs_list = [smk_core.get_norm_rvecs(vecs, word)
                  for vecs, word in zip(vecs_list, words_list)]  # 103 ms  # 90%
    return rvecs_list
Beispiel #4
0
def compute_nonagg_residuals_forloop(words, wx_sublist, idxs_list, idx2_vec):
    """
    OK, but slower than listcomp method - 140ms

    Timeit:
        idxs = idxs.astype(np.int32)
        %timeit idx2_vec.take(idxs, axis=0)  # 1.27
        %timeit idx2_vec.take(idxs.astype(np.int32), axis=0)  # 1.94
        %timeit idx2_vec[idxs]  # 7.8
    """
    #with utool.Timer('compute_nonagg_residuals_forloop'):
    num = wx_sublist.size
    rvecs_list = np.empty(num, dtype=np.ndarray)
    for count, wx in enumerate(wx_sublist):
        idxs = idxs_list[count]
        vecs = idx2_vec[idxs]
        word = words[wx:wx + 1]
        rvecs_n = smk_core.get_norm_rvecs(vecs, word)
        rvecs_list[count] = rvecs_n
    return rvecs_list
Beispiel #5
0
def compute_nonagg_residuals_forloop(words, wx_sublist, idxs_list, idx2_vec):
    """
    OK, but slower than listcomp method - 140ms

    Timeit:
        idxs = idxs.astype(np.int32)
        %timeit idx2_vec.take(idxs, axis=0)  # 1.27
        %timeit idx2_vec.take(idxs.astype(np.int32), axis=0)  # 1.94
        %timeit idx2_vec[idxs]  # 7.8
    """
    #with utool.Timer('compute_nonagg_residuals_forloop'):
    num = wx_sublist.size
    rvecs_list = np.empty(num, dtype=np.ndarray)
    for count, wx in enumerate(wx_sublist):
        idxs = idxs_list[count]
        vecs = idx2_vec[idxs]
        word = words[wx:wx + 1]
        rvecs_n = smk_core.get_norm_rvecs(vecs, word)
        rvecs_list[count] = rvecs_n
    return rvecs_list
Beispiel #6
0
def compute_nonagg_rvec_listcomp(words, wx_sublist, idxs_list, idx2_vec):
    """
    PREFERED METHOD - 110ms

    Example:
        >>> from ibeis.model.hots.smk import smk_debug
        >>> words, wx_sublist, aids_list, idxs_list, idx2_vec, maws_list = smk_debug.testdata_nonagg_rvec()

    Timeit:
        %timeit words_list = [words[np.newaxis, wx] for wx in wx_sublist]  # 5 ms
        %timeit words_list = [words[wx:wx + 1] for wx in wx_sublist]  # 1.6 ms
    """
    #with utool.Timer('compute_nonagg_rvec_listcomp'):
    #vecs_list  = [idx2_vec[idxs] for idxs in idxs_list]  # 23 ms
    words_list = [words[wx:wx + 1] for wx in wx_sublist]  # 1 ms
    vecs_list = [idx2_vec.take(idxs, axis=0) for idxs in idxs_list]  # 5.3 ms
    rvecs_list = [
        smk_core.get_norm_rvecs(vecs, word)
        for vecs, word in zip(vecs_list, words_list)
    ]  # 103 ms  # 90%
    return rvecs_list
def compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate,
                       with_pandas=WITH_PANDAS):
    """
    Computes residual vectors based on word assignments
    returns mapping from word index to a set of residual vectors

    Output:
        wx2_rvecs - [ ... [ rvec_i1, ...,  rvec_Mi ]_i ... ]
        wx2_aids  - [ ... [  aid_i1, ...,   aid_Mi ]_i ... ]
        wx2_fxs   - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ]

    For every word:
        * list of aggvecs
        For every aggvec:
            * one parent aid, if aggregate is False: assert isunique(aids)
            * list of parent fxs, if aggregate is True: assert len(fxs) == 1

    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs = smk_debug.testdata_raw_internals1()
    >>> words     = invindex.words
    >>> idx2_aid  = invindex.idx2_daid
    >>> idx2_fx   = invindex.idx2_dfx
    >>> idx2_vec  = invindex.idx2_dvec
    >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate
    >>> wx2_rvecs, wx2_aids = compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate)
    """
    words_values    = pdh.ensure_values(words)
    idx2_aid_values = pdh.ensure_values(idx2_aid)
    idx2_vec_values = pdh.ensure_values(idx2_vec)
    idx2_fx_values  = pdh.ensure_values(idx2_fx)
    wx_sublist      = pdh.ensure_index(wx2_idxs)
    # Build lists w.r.t. words
    idxs_list = pdh.ensure_values_subset(wx2_idxs, wx_sublist)
    aids_list = [idx2_aid_values.take(idxs) for idxs in idxs_list]
    #wx2_idxs_values = pdh.ensure_values_subset(wx2_idxs, wx_sublist)
    #idxs_list  = [pdh.ensure_values(idxsdf).astype(INDEX_TYPE) for idxsdf in wx2_idxs_values]   # 13 ms
    if utool.DEBUG2:
        #assert np.all(np.diff(wx_sublist) == 1), 'not dense'
        assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment'
        assert idx2_vec_values.shape[0] == idx2_fx_values.shape[0]
        assert idx2_vec_values.shape[0] == idx2_aid_values.shape[0]
    # Prealloc output
    if utool.VERBOSE:
        print('[smk_index] Residual Vectors for %d words. aggregate=%r' %
              (len(wx2_idxs), aggregate,))
    # Nonaggregated residuals
    #_args1 = (words_values, wx_sublist, idxs_list, idx2_vec_values)
    #rvecs_list = smk_speed.compute_nonagg_rvec_listcomp(*_args1)  # 125 ms  11%
    words_list = [words_values[wx:wx + 1] for wx in wx_sublist]  # 1 ms
    vecs_list  = [idx2_vec_values.take(idxs, axis=0) for idxs in idxs_list]  # 5.3 ms
    rvecs_list = [smk_core.get_norm_rvecs(vecs, word)
                  for vecs, word in zip(vecs_list, words_list)]  # 103 ms  # 90%
    if aggregate:
        # Aggregate over words of the same aid
        tup = smk_speed.compute_agg_rvecs(rvecs_list, idxs_list, aids_list)  # 38%
        (aggvecs_list, aggaids_list, aggidxs_list) = tup
        aggfxs_list = [[idx2_fx_values.take(idxs) for idxs in aggidxs]
                       for aggidxs in aggidxs_list]
        if with_pandas:
            _args2 = (wx_sublist, aggvecs_list, aggaids_list, aggfxs_list)
            # Make aggregate dataframes
            wx2_aggvecs, wx2_aggaids, wx2_aggfxs = pdh.pandasify_agg_list(*_args2)  # 617 ms  47%
        else:
            wx2_aggvecs = {wx: aggvecs for wx, aggvecs in zip(wx_sublist, aggvecs_list)}
            wx2_aggaids = {wx: aggaids for wx, aggaids in zip(wx_sublist, aggaids_list)}
            wx2_aggfxs  = {wx: aggfxs  for wx, aggfxs  in zip(wx_sublist, aggfxs_list)}
            if utool.DEBUG2:
                from ibeis.model.hots.smk import smk_debug
                smk_debug.check_wx2(words, wx2_aggvecs, wx2_aggaids, wx2_aggfxs)

        return wx2_aggvecs, wx2_aggaids, wx2_aggfxs
    else:
        # Make residuals dataframes
        # compatibility hack
        fxs_list  = [[idx2_fx_values[idx:idx + 1] for idx in idxs]  for idxs in idxs_list]
        if with_pandas:
            _args3 = (wx_sublist, idxs_list, rvecs_list, aids_list, fxs_list)
            wx2_rvecs, wx2_aids, wx2_fxs = pdh.pandasify_rvecs_list(*_args3)  # 405 ms
        else:
            wx2_rvecs = {wx: rvecs for wx, rvecs in zip(wx_sublist, rvecs_list)}
            wx2_aids  = {wx: aids  for wx, aids  in zip(wx_sublist, aids_list)}
            wx2_fxs   = {wx: fxs   for wx, fxs   in zip(wx_sublist, fxs_list)}
        if utool.DEBUG2:
            from ibeis.model.hots.smk import smk_debug
            smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs)
        return wx2_rvecs, wx2_aids, wx2_fxs
Beispiel #8
0
def compute_residuals_(words,
                       wx2_idxs,
                       idx2_vec,
                       idx2_aid,
                       idx2_fx,
                       aggregate,
                       with_pandas=WITH_PANDAS):
    """
    Computes residual vectors based on word assignments
    returns mapping from word index to a set of residual vectors

    Output:
        wx2_rvecs - [ ... [ rvec_i1, ...,  rvec_Mi ]_i ... ]
        wx2_aids  - [ ... [  aid_i1, ...,   aid_Mi ]_i ... ]
        wx2_fxs   - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ]

    For every word:
        * list of aggvecs
        For every aggvec:
            * one parent aid, if aggregate is False: assert isunique(aids)
            * list of parent fxs, if aggregate is True: assert len(fxs) == 1

    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs = smk_debug.testdata_raw_internals1()
    >>> words     = invindex.words
    >>> idx2_aid  = invindex.idx2_daid
    >>> idx2_fx   = invindex.idx2_dfx
    >>> idx2_vec  = invindex.idx2_dvec
    >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate
    >>> wx2_rvecs, wx2_aids = compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate)
    """
    words_values = pdh.ensure_values(words)
    idx2_aid_values = pdh.ensure_values(idx2_aid)
    idx2_vec_values = pdh.ensure_values(idx2_vec)
    idx2_fx_values = pdh.ensure_values(idx2_fx)
    wx_sublist = pdh.ensure_index(wx2_idxs)
    # Build lists w.r.t. words
    idxs_list = pdh.ensure_values_subset(wx2_idxs, wx_sublist)
    aids_list = [idx2_aid_values.take(idxs) for idxs in idxs_list]
    #wx2_idxs_values = pdh.ensure_values_subset(wx2_idxs, wx_sublist)
    #idxs_list  = [pdh.ensure_values(idxsdf).astype(INDEX_TYPE) for idxsdf in wx2_idxs_values]   # 13 ms
    if utool.DEBUG2:
        #assert np.all(np.diff(wx_sublist) == 1), 'not dense'
        assert all([len(a) == len(b)
                    for a, b in zip(idxs_list, aids_list)]), 'bad alignment'
        assert idx2_vec_values.shape[0] == idx2_fx_values.shape[0]
        assert idx2_vec_values.shape[0] == idx2_aid_values.shape[0]
    # Prealloc output
    if utool.VERBOSE:
        print('[smk_index] Residual Vectors for %d words. aggregate=%r' % (
            len(wx2_idxs),
            aggregate,
        ))
    # Nonaggregated residuals
    #_args1 = (words_values, wx_sublist, idxs_list, idx2_vec_values)
    #rvecs_list = smk_speed.compute_nonagg_rvec_listcomp(*_args1)  # 125 ms  11%
    words_list = [words_values[wx:wx + 1] for wx in wx_sublist]  # 1 ms
    vecs_list = [idx2_vec_values.take(idxs, axis=0)
                 for idxs in idxs_list]  # 5.3 ms
    rvecs_list = [
        smk_core.get_norm_rvecs(vecs, word)
        for vecs, word in zip(vecs_list, words_list)
    ]  # 103 ms  # 90%
    if aggregate:
        # Aggregate over words of the same aid
        tup = smk_speed.compute_agg_rvecs(rvecs_list, idxs_list,
                                          aids_list)  # 38%
        (aggvecs_list, aggaids_list, aggidxs_list) = tup
        aggfxs_list = [[idx2_fx_values.take(idxs) for idxs in aggidxs]
                       for aggidxs in aggidxs_list]
        if with_pandas:
            _args2 = (wx_sublist, aggvecs_list, aggaids_list, aggfxs_list)
            # Make aggregate dataframes
            wx2_aggvecs, wx2_aggaids, wx2_aggfxs = pdh.pandasify_agg_list(
                *_args2)  # 617 ms  47%
        else:
            wx2_aggvecs = {
                wx: aggvecs
                for wx, aggvecs in zip(wx_sublist, aggvecs_list)
            }
            wx2_aggaids = {
                wx: aggaids
                for wx, aggaids in zip(wx_sublist, aggaids_list)
            }
            wx2_aggfxs = {
                wx: aggfxs
                for wx, aggfxs in zip(wx_sublist, aggfxs_list)
            }
            if utool.DEBUG2:
                from ibeis.model.hots.smk import smk_debug
                smk_debug.check_wx2(words, wx2_aggvecs, wx2_aggaids,
                                    wx2_aggfxs)

        return wx2_aggvecs, wx2_aggaids, wx2_aggfxs
    else:
        # Make residuals dataframes
        # compatibility hack
        fxs_list = [[idx2_fx_values[idx:idx + 1] for idx in idxs]
                    for idxs in idxs_list]
        if with_pandas:
            _args3 = (wx_sublist, idxs_list, rvecs_list, aids_list, fxs_list)
            wx2_rvecs, wx2_aids, wx2_fxs = pdh.pandasify_rvecs_list(
                *_args3)  # 405 ms
        else:
            wx2_rvecs = {
                wx: rvecs
                for wx, rvecs in zip(wx_sublist, rvecs_list)
            }
            wx2_aids = {wx: aids for wx, aids in zip(wx_sublist, aids_list)}
            wx2_fxs = {wx: fxs for wx, fxs in zip(wx_sublist, fxs_list)}
        if utool.DEBUG2:
            from ibeis.model.hots.smk import smk_debug
            smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs)
        return wx2_rvecs, wx2_aids, wx2_fxs