Exemple #1
0
def testdata_siam_desc(num_data=128, desc_dim=8):
    rng = np.random.RandomState(0)
    network_output = vt.normalize_rows(rng.rand(num_data, desc_dim))
    vecs1 = network_output[0::2]
    vecs2 = network_output[1::2]
    # roll vecs2 so it is essentially translated
    vecs2 = np.roll(vecs1, 1, axis=1)
    network_output[1::2] = vecs2
    # Every other pair is an imposter match
    network_output[::4, :] = vt.normalize_rows(rng.rand(32, desc_dim))
    #data_per_label = 2

    vecs1 = network_output[0::2].astype(np.float32)
    vecs2 = network_output[1::2].astype(np.float32)

    def true_dist_metric(vecs1, vecs2):
        g1_ = np.roll(vecs1, 1, axis=1)
        dist = vt.L2(g1_, vecs2)
        return dist
    #l2dist = vt.L2(vecs1, vecs2)
    true_dist = true_dist_metric(vecs1, vecs2)
    label = (true_dist > 0).astype(np.float32)
    vecs1 = torch.from_numpy(vecs1)
    vecs2 = torch.from_numpy(vecs2)
    label = torch.from_numpy(label)
    return vecs1, vecs2, label
Exemple #2
0
def testdata_dummy_sift(nPts=10, rng=np.random):
    r"""
    Makes a dummy sift descriptor that has the uint8 * 512 hack
    like hesaff returns

    Args:
        nPts (int): (default = 10)

    CommandLine:
        python -m vtool.tests.dummy --test-testdata_dummy_sift

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.dummy import *  # NOQA
        >>> import vtool as vt
        >>> nPts = 10
        >>> rng = np.random.RandomState(0)
        >>> sift = testdata_dummy_sift(nPts, rng)
        >>> assert vt.check_sift_validity(sift), 'bad SIFT properties'
        >>> #assert np.allclose(((sift / 512) ** 2).sum(axis=1), 1, rtol=.01), 'bad SIFT property'
        >>> #assert np.all(sift / 512 < .2), 'bad SIFT property'
    """
    import vtool as vt
    sift_ = rng.rand(nPts, 128)
    # normalize
    sift_ = vt.normalize_rows(rng.rand(nPts, 128))
    # clip bin values
    sift_[sift_ > .2] = .2
    # renormalize
    sift_ = vt.normalize_rows(rng.rand(nPts, 128))
    # compress into uint8
    #sift = (sift_ * 512).round().astype(np.uint8)
    sift = (sift_ * 512).astype(np.uint8)
    return sift
Exemple #3
0
    def compute_nonagg_rvecs(invassign, wx, compress=False):
        """
        Driver function for nonagg residual computation

        Args:
            words (ndarray): array of words
            idx2_vec (dict): stacked vectors
            wx_sublist (list): words of interest
            idxs_list (list): list of idxs grouped by wx_sublist

        Returns:
            tuple : (rvecs_list, flags_list)
        """
        # Pick out corresonding lists of residuals and words
        vecs = invassign.get_vecs(wx)
        word = invassign.vocab.wx2_word[wx]
        # Compute nonaggregated normalized residuals
        arr_float = np.subtract(word.astype(np.float), vecs.astype(np.float))
        vt.normalize_rows(arr_float, out=arr_float)
        if compress:
            rvecs_list = np.clip(np.round(arr_float * 255.0), -127, 127).astype(np.int8)
        else:
            rvecs_list = arr_float
        # Extract flags (rvecs_list which are all zeros) and rvecs_list
        error_flags = ~np.any(rvecs_list, axis=1)
        return rvecs_list, error_flags
Exemple #4
0
    def compute_nonagg_rvecs(invassign, wx, compress=False):
        """
        Driver function for nonagg residual computation

        Args:
            words (ndarray): array of words
            idx2_vec (dict): stacked vectors
            wx_sublist (list): words of interest
            idxs_list (list): list of idxs grouped by wx_sublist

        Returns:
            tuple : (rvecs_list, flags_list)
        """
        # Pick out corresonding lists of residuals and words
        vecs = invassign.get_vecs(wx)
        word = invassign.vocab.wx2_word[wx]
        # Compute nonaggregated normalized residuals
        arr_float = np.subtract(word.astype(np.float), vecs.astype(np.float))
        vt.normalize_rows(arr_float, out=arr_float)
        if compress:
            rvecs_list = np.clip(np.round(arr_float * 255.0), -127,
                                 127).astype(np.int8)
        else:
            rvecs_list = arr_float
        # Extract flags (rvecs_list which are all zeros) and rvecs_list
        error_flags = ~np.any(rvecs_list, axis=1)
        return rvecs_list, error_flags
Exemple #5
0
def testdata_dummy_sift(nPts=10, rng=np.random):
    r"""
    Makes a dummy sift descriptor that has the uint8 * 512 hack
    like hesaff returns

    Args:
        nPts (int): (default = 10)

    CommandLine:
        python -m vtool.tests.dummy --test-testdata_dummy_sift

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.dummy import *  # NOQA
        >>> import vtool as vt
        >>> nPts = 10
        >>> rng = np.random.RandomState(0)
        >>> sift = testdata_dummy_sift(nPts, rng)
        >>> assert vt.check_sift_validity(sift), 'bad SIFT properties'
        >>> #assert np.allclose(((sift / 512) ** 2).sum(axis=1), 1, rtol=.01), 'bad SIFT property'
        >>> #assert np.all(sift / 512 < .2), 'bad SIFT property'
    """
    import vtool as vt
    sift_ = rng.rand(nPts, 128)
    # normalize
    sift_ = vt.normalize_rows(rng.rand(nPts, 128))
    # clip bin values
    sift_[sift_ > .2] = .2
    # renormalize
    sift_ = vt.normalize_rows(rng.rand(nPts, 128))
    # compress into uint8
    #sift = (sift_ * 512).round().astype(np.uint8)
    sift = (sift_ * 512).astype(np.uint8)
    return sift
Exemple #6
0
def aggregate_rvecs(rvecs, maws):
    r"""
    helper for compute_agg_rvecs

    Args:
        rvecs (ndarray): residual vectors
        maws (ndarray): multi assign weights

    Returns:
        rvecs_agg : aggregated residual vectors

    CommandLine:
        python -m ibeis.algo.hots.smk.smk_residuals --test-aggregate_rvecs
        ./run_tests.py --exclude-doctest-patterns pipeline neighbor score coverage automated_helpers name automatch chip_match multi_index automated special_query scoring automated nn_weights distinctive match_chips4 query_request devcases hstypes params ibsfuncs smk_core, smk_debug control

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_residuals import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> rvecs = (hstypes.RVEC_MAX * rng.rand(4, 128)).astype(hstypes.RVEC_TYPE)
        >>> maws  = (rng.rand(rvecs.shape[0])).astype(hstypes.FLOAT_TYPE)
        >>> rvecs_agg = aggregate_rvecs(rvecs, maws)
        >>> result = ut.numpy_str2(rvecs_agg, linewidth=70)
        >>> print(result)
        np.array([[28, 27, 32, 16, 16, 16, 12, 31, 27, 29, 19, 27, 21, 24, 15,
                   21, 17, 37, 13, 40, 38, 33, 17, 30, 13, 23,  9, 25, 19, 15,
                   20, 17, 19, 18, 13, 25, 37, 29, 21, 16, 20, 21, 34, 11, 28,
                   19, 17, 12, 14, 24, 21, 11, 27, 11, 24, 10, 23, 20, 28, 12,
                   16, 14, 30, 22, 18, 26, 21, 20, 18,  9, 29, 20, 25, 19, 23,
                   20,  7, 13, 22, 22, 15, 20, 22, 16, 27, 10, 16, 20, 25, 25,
                   26, 28, 22, 38, 24, 16, 14, 19, 24, 14, 22, 19, 19, 33, 21,
                   22, 18, 22, 25, 25, 22, 23, 32, 16, 25, 15, 29, 21, 25, 20,
                   22, 31, 29, 24, 24, 25, 20, 14]], dtype=np.int8)

    """
    if rvecs.shape[0] == 1:
        return rvecs
    # Prealloc sum output (do not assign the result of sum)
    arr_float = np.empty((1, rvecs.shape[1]), dtype=hstypes.FLOAT_TYPE)
    # Take weighted average of multi-assigned vectors
    (maws[:, np.newaxis] * rvecs.astype(hstypes.FLOAT_TYPE)).sum(
        axis=0, out=arr_float[0])
    # Jegou uses mean instead. Sum should be fine because we normalize
    #rvecs.mean(axis=0, out=rvecs_agg[0])
    vt.normalize_rows(arr_float, out=arr_float)
    rvecs_agg = compress_normvec(arr_float)
    return rvecs_agg
Exemple #7
0
def aggregate_rvecs(rvecs, maws):
    r"""
    helper for compute_agg_rvecs

    Args:
        rvecs (ndarray): residual vectors
        maws (ndarray): multi assign weights

    Returns:
        rvecs_agg : aggregated residual vectors

    CommandLine:
        python -m ibeis.algo.hots.smk.smk_residuals --test-aggregate_rvecs
        ./run_tests.py --exclude-doctest-patterns pipeline neighbor score coverage automated_helpers name automatch chip_match multi_index automated special_query scoring automated nn_weights distinctive match_chips4 query_request devcases hstypes params ibsfuncs smk_core, smk_debug control

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_residuals import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> rvecs = (hstypes.RVEC_MAX * rng.rand(4, 128)).astype(hstypes.RVEC_TYPE)
        >>> maws  = (rng.rand(rvecs.shape[0])).astype(hstypes.FLOAT_TYPE)
        >>> rvecs_agg = aggregate_rvecs(rvecs, maws)
        >>> result = ut.numpy_str2(rvecs_agg, linewidth=70)
        >>> print(result)
        np.array([[28, 27, 32, 16, 16, 16, 12, 31, 27, 29, 19, 27, 21, 24, 15,
                   21, 17, 37, 13, 40, 38, 33, 17, 30, 13, 23,  9, 25, 19, 15,
                   20, 17, 19, 18, 13, 25, 37, 29, 21, 16, 20, 21, 34, 11, 28,
                   19, 17, 12, 14, 24, 21, 11, 27, 11, 24, 10, 23, 20, 28, 12,
                   16, 14, 30, 22, 18, 26, 21, 20, 18,  9, 29, 20, 25, 19, 23,
                   20,  7, 13, 22, 22, 15, 20, 22, 16, 27, 10, 16, 20, 25, 25,
                   26, 28, 22, 38, 24, 16, 14, 19, 24, 14, 22, 19, 19, 33, 21,
                   22, 18, 22, 25, 25, 22, 23, 32, 16, 25, 15, 29, 21, 25, 20,
                   22, 31, 29, 24, 24, 25, 20, 14]], dtype=np.int8)

    """
    if rvecs.shape[0] == 1:
        return rvecs
    # Prealloc sum output (do not assign the result of sum)
    arr_float = np.empty((1, rvecs.shape[1]), dtype=hstypes.FLOAT_TYPE)
    # Take weighted average of multi-assigned vectors
    (maws[:, np.newaxis] * rvecs.astype(hstypes.FLOAT_TYPE)).sum(axis=0, out=arr_float[0])
    # Jegou uses mean instead. Sum should be fine because we normalize
    #rvecs.mean(axis=0, out=rvecs_agg[0])
    vt.normalize_rows(arr_float, out=arr_float)
    rvecs_agg = compress_normvec(arr_float)
    return rvecs_agg
Exemple #8
0
def aggregate_rvecs(rvecs, maws, compress=False):
    r"""
    helper for compute_agg_rvecs
    """
    if rvecs.shape[0] == 0:
        rvecs_agg = np.empty((0, rvecs.shape[1]), dtype=np.float)
    if rvecs.shape[0] == 1:
        rvecs_agg = rvecs
    else:
        # Prealloc sum output (do not assign the result of sum)
        arr_float = np.empty((1, rvecs.shape[1]), dtype=np.float)
        out = arr_float[0]
        # Take weighted average of multi-assigned vectors
        total_weight = maws.sum()
        weighted_sum = (maws[:, np.newaxis] * rvecs.astype(np.float)).sum(axis=0, out=out)
        np.divide(weighted_sum, total_weight, out=out)
        vt.normalize_rows(arr_float, out=arr_float)
        if compress:
            rvecs_agg = np.clip(np.round(arr_float * 255.0), -127, 127).astype(np.int8)
        else:
            rvecs_agg = arr_float
    return rvecs_agg
Exemple #9
0
def aggregate_rvecs(rvecs, maws, compress=False):
    r"""
    helper for compute_agg_rvecs
    """
    if rvecs.shape[0] == 0:
        rvecs_agg = np.empty((0, rvecs.shape[1]), dtype=np.float)
    if rvecs.shape[0] == 1:
        rvecs_agg = rvecs
    else:
        # Prealloc sum output (do not assign the result of sum)
        arr_float = np.empty((1, rvecs.shape[1]), dtype=np.float)
        out = arr_float[0]
        # Take weighted average of multi-assigned vectors
        total_weight = maws.sum()
        weighted_sum = (maws[:, np.newaxis] * rvecs.astype(np.float)).sum(
            axis=0, out=out)
        np.divide(weighted_sum, total_weight, out=out)
        vt.normalize_rows(arr_float, out=arr_float)
        if compress:
            rvecs_agg = np.clip(np.round(arr_float * 255.0), -127,
                                127).astype(np.int8)
        else:
            rvecs_agg = arr_float
    return rvecs_agg
Exemple #10
0
def understanding_pseudomax_props(mode=2):
    """
    Function showing some properties of distances between normalized pseudomax vectors

    CommandLine:
        python -m vtool.distance --test-understanding_pseudomax_props

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.distance import *  # NOQA
        >>> for mode in [0, 1, 2, 3]:
        ...     print('+---')
        ...     print('mode = %r' % (mode,))
        ...     result = understanding_pseudomax_props(mode)
        ...     print('L___')
        >>> print(result)
    """
    import vtool as vt
    pseudo_max = 512
    rng = np.random.RandomState(0)
    num = 10
    if mode == 0:
        dim = 2
        p1_01 = (vt.normalize_rows(rng.rand(num, dim)))
        p2_01 = (vt.normalize_rows(rng.rand(num, dim)))
    elif mode == 1:
        p1_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max
        p2_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max
    elif mode == 2:
        # Build theoretically maximally distant normalized vectors (type 1)
        dim = 128
        p1_01 = np.zeros((1, dim))
        p2_01 = np.zeros((1, dim))
        p2_01[:, 0::2] = 1
        p1_01[:, 1::2] = 1
        p1_01 = vt.normalize_rows(p1_01)
        p2_01 = vt.normalize_rows(p2_01)
    elif mode == 3:
        # Build theoretically maximally distant vectors (type 2)
        # This mode will clip if cast to uint8, thus failing the test
        dim = 128
        p1_01 = np.zeros((1, dim))
        p2_01 = np.zeros((1, dim))
        p2_01[:, 0] = 1
        p1_01[:, 1:] = 1
        p1_01 = vt.normalize_rows(p1_01)
        p2_01 = vt.normalize_rows(p2_01)
        pass
    print('ndims = %r' % (p1_01.shape[1]))

    p1_01 = p1_01.astype(TEMP_VEC_DTYPE)
    p2_01 = p2_01.astype(TEMP_VEC_DTYPE)

    p1_256 = p1_01 * pseudo_max
    p2_256 = p2_01 * pseudo_max

    dist_sqrd_01 = vt.L2_sqrd(p1_01, p2_01)
    dist_sqrd_256 = vt.L2_sqrd(p1_256, p2_256)

    dist_01 = np.sqrt(dist_sqrd_01)
    dist_256 = np.sqrt(dist_sqrd_256)

    print('dist_sqrd_01  = %s' % (ut.numpy_str(dist_sqrd_01, precision=2), ))
    print('dist_sqrd_256 = %s' % (ut.numpy_str(dist_sqrd_256, precision=2), ))
    print('dist_01       = %s' % (ut.numpy_str(dist_01, precision=2), ))
    print('dist_256      = %s' % (ut.numpy_str(dist_256, precision=2), ))

    print('--')
    print('sqrt(2)       = %f' % (np.sqrt(2)))
    print('--')

    assert np.all(dist_01 == vt.L2(p1_01, p2_01))
    assert np.all(dist_256 == vt.L2(p1_256, p2_256))

    const_sqrd = dist_sqrd_256 / dist_sqrd_01
    const = dist_256 / dist_01

    print('const = %r' % (const[0], ))
    print('const_sqrd = %r' % (const_sqrd[0], ))
    print('1 / const = %r' % (1 / const[0], ))
    print('1 / const_sqrd = %r' % (1 / const_sqrd[0], ))

    assert ut.allsame(const)
    assert ut.allsame(const_sqrd)

    assert np.all(const == np.sqrt(const_sqrd))

    # Assert that distance conversions work
    assert np.all(dist_256 / const == dist_01)
    assert np.all(dist_sqrd_256 / const_sqrd == dist_sqrd_01)
    print('Conversions work')

    print('Maximal L2 distance between any two NON-NEGATIVE L2-NORMALIZED'
          ' vectors should always be sqrt(2)')
Exemple #11
0
def get_norm_residuals(vecs, word):
    """
    computes normalized residuals of vectors with respect to a word

    Args:
        vecs (ndarray):
        word (ndarray):

    Returns:
        tuple : (rvecs_n, rvec_flag)

    CommandLine:
        python -m ibeis.algo.hots.smk.smk_residuals --test-get_norm_residuals

    Example:
        >>> # ENABLE_DOCTEST
        >>> # The case where vecs != words
        >>> from ibeis.algo.hots.smk.smk_residuals import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> vecs = (hstypes.VEC_MAX * rng.rand(4, 128)).astype(hstypes.VEC_TYPE)
        >>> word = (hstypes.VEC_MAX * rng.rand(1, 128)).astype(hstypes.VEC_TYPE)
        >>> rvecs_n = get_norm_residuals(vecs, word)
        >>> result = ut.numpy_str2(rvecs_n)
        >>> print(result)

    Example:
        >>> # ENABLE_DOCTEST
        >>> # The case where vecs == words
        >>> from ibeis.algo.hots.smk.smk_residuals import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> vecs = (hstypes.VEC_MAX * rng.rand(4, 128)).astype(hstypes.VEC_TYPE)
        >>> word = vecs[1]
        >>> rvecs_n = get_norm_residuals(vecs, word)
        >>> result = ut.numpy_str2(rvecs_n)
        >>> print(result)

    IGNORE
        rvecs_agg8 = compress_normvec_uint8(arr_float)
        rvecs_agg16 = compress_normvec_float16(arr_float)
        ut.print_object_size(rvecs_agg16, 'rvecs_agg16: ')
        ut.print_object_size(rvecs_agg8,  'rvecs_agg8:  ')
        ut.print_object_size(rvec_flag,   'rvec_flag:   ')

        %timeit np.isnan(_rvec_sums)
        %timeit  _rvec_sums == 0
        %timeit  np.equal(rvec_sums, 0)
        %timeit  rvec_sums == 0
        %timeit  np.logical_or(np.isnan(_rvec_sums), _rvec_sums == 0)
    """
    # Compute residuals of assigned vectors
    #rvecs_n = word.astype(dtype=FLOAT_TYPE) - vecs.astype(dtype=FLOAT_TYPE)
    arr_float = np.subtract(word.astype(hstypes.FLOAT_TYPE), vecs.astype(hstypes.FLOAT_TYPE))
    # Faster, but doesnt work with np.norm
    #rvecs_n = np.subtract(word.view(hstypes.FLOAT_TYPE), vecs.view(hstypes.FLOAT_TYPE))
    vt.normalize_rows(arr_float, out=arr_float)
    # Mark null residuals
    #_rvec_sums = arr_float.sum(axis=1)
    #rvec_flag = np.isnan(_rvec_sums)
    # Converts normvec to a smaller type like float16 or int8
    rvecs_n = compress_normvec(arr_float)
    # IF FLOAT16 WE NEED TO FILL NANS
    # (but we should use int8, and in that case it is implicit)
    # rvecs_n = np.nan_to_num(rvecs_n)
    return rvecs_n
Exemple #12
0
def understanding_pseudomax_props(mode=2):
    """
    Function showing some properties of distances between normalized pseudomax vectors

    CommandLine:
        python -m vtool.distance --test-understanding_pseudomax_props

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.distance import *  # NOQA
        >>> for mode in [0, 1, 2, 3]:
        ...     print('+---')
        ...     print('mode = %r' % (mode,))
        ...     result = understanding_pseudomax_props(mode)
        ...     print('L___')
        >>> print(result)
    """
    import vtool as vt
    pseudo_max = 512
    rng = np.random.RandomState(0)
    num = 10
    if mode == 0:
        dim = 2
        p1_01 = (vt.normalize_rows(rng.rand(num, dim)))
        p2_01 = (vt.normalize_rows(rng.rand(num, dim)))
    elif mode == 1:
        p1_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max
        p2_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max
    elif mode == 2:
        # Build theoretically maximally distant normalized vectors (type 1)
        dim = 128
        p1_01 = np.zeros((1, dim))
        p2_01 = np.zeros((1, dim))
        p2_01[:, 0::2] = 1
        p1_01[:, 1::2] = 1
        p1_01 = vt.normalize_rows(p1_01)
        p2_01 = vt.normalize_rows(p2_01)
    elif mode == 3:
        # Build theoretically maximally distant vectors (type 2)
        # This mode will clip if cast to uint8, thus failing the test
        dim = 128
        p1_01 = np.zeros((1, dim))
        p2_01 = np.zeros((1, dim))
        p2_01[:, 0] = 1
        p1_01[:, 1:] = 1
        p1_01 = vt.normalize_rows(p1_01)
        p2_01 = vt.normalize_rows(p2_01)
        pass
    print('ndims = %r' % (p1_01.shape[1]))

    p1_01 = p1_01.astype(TEMP_VEC_DTYPE)
    p2_01 = p2_01.astype(TEMP_VEC_DTYPE)

    p1_256 = p1_01 * pseudo_max
    p2_256 = p2_01 * pseudo_max

    dist_sqrd_01 = vt.L2_sqrd(p1_01, p2_01)
    dist_sqrd_256 = vt.L2_sqrd(p1_256, p2_256)

    dist_01 = np.sqrt(dist_sqrd_01)
    dist_256 = np.sqrt(dist_sqrd_256)

    print('dist_sqrd_01  = %s' % (ut.numpy_str(dist_sqrd_01, precision=2),))
    print('dist_sqrd_256 = %s' % (ut.numpy_str(dist_sqrd_256, precision=2),))
    print('dist_01       = %s' % (ut.numpy_str(dist_01, precision=2),))
    print('dist_256      = %s' % (ut.numpy_str(dist_256, precision=2),))

    print('--')
    print('sqrt(2)       = %f' % (np.sqrt(2)))
    print('--')

    assert np.all(dist_01 == vt.L2(p1_01, p2_01))
    assert np.all(dist_256 == vt.L2(p1_256, p2_256))

    const_sqrd = dist_sqrd_256 / dist_sqrd_01
    const = dist_256 / dist_01

    print('const = %r' % (const[0],))
    print('const_sqrd = %r' % (const_sqrd[0],))
    print('1 / const = %r' % (1 / const[0],))
    print('1 / const_sqrd = %r' % (1 / const_sqrd[0],))

    assert ut.allsame(const)
    assert ut.allsame(const_sqrd)

    assert np.all(const == np.sqrt(const_sqrd))

    # Assert that distance conversions work
    assert np.all(dist_256 / const == dist_01)
    assert np.all(dist_sqrd_256 / const_sqrd == dist_sqrd_01)
    print('Conversions work')

    print('Maximal L2 distance between any two NON-NEGATIVE L2-NORMALIZED'
          ' vectors should always be sqrt(2)')
Exemple #13
0
def get_norm_residuals(vecs, word):
    """
    computes normalized residuals of vectors with respect to a word

    Args:
        vecs (ndarray):
        word (ndarray):

    Returns:
        tuple : (rvecs_n, rvec_flag)

    CommandLine:
        python -m ibeis.algo.hots.smk.smk_residuals --test-get_norm_residuals

    Example:
        >>> # ENABLE_DOCTEST
        >>> # The case where vecs != words
        >>> from ibeis.algo.hots.smk.smk_residuals import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> vecs = (hstypes.VEC_MAX * rng.rand(4, 128)).astype(hstypes.VEC_TYPE)
        >>> word = (hstypes.VEC_MAX * rng.rand(1, 128)).astype(hstypes.VEC_TYPE)
        >>> rvecs_n = get_norm_residuals(vecs, word)
        >>> result = ut.numpy_str2(rvecs_n)
        >>> print(result)

    Example:
        >>> # ENABLE_DOCTEST
        >>> # The case where vecs == words
        >>> from ibeis.algo.hots.smk.smk_residuals import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> vecs = (hstypes.VEC_MAX * rng.rand(4, 128)).astype(hstypes.VEC_TYPE)
        >>> word = vecs[1]
        >>> rvecs_n = get_norm_residuals(vecs, word)
        >>> result = ut.numpy_str2(rvecs_n)
        >>> print(result)

    IGNORE
        rvecs_agg8 = compress_normvec_uint8(arr_float)
        rvecs_agg16 = compress_normvec_float16(arr_float)
        ut.print_object_size(rvecs_agg16, 'rvecs_agg16: ')
        ut.print_object_size(rvecs_agg8,  'rvecs_agg8:  ')
        ut.print_object_size(rvec_flag,   'rvec_flag:   ')

        %timeit np.isnan(_rvec_sums)
        %timeit  _rvec_sums == 0
        %timeit  np.equal(rvec_sums, 0)
        %timeit  rvec_sums == 0
        %timeit  np.logical_or(np.isnan(_rvec_sums), _rvec_sums == 0)
    """
    # Compute residuals of assigned vectors
    #rvecs_n = word.astype(dtype=FLOAT_TYPE) - vecs.astype(dtype=FLOAT_TYPE)
    arr_float = np.subtract(word.astype(hstypes.FLOAT_TYPE),
                            vecs.astype(hstypes.FLOAT_TYPE))
    # Faster, but doesnt work with np.norm
    #rvecs_n = np.subtract(word.view(hstypes.FLOAT_TYPE), vecs.view(hstypes.FLOAT_TYPE))
    vt.normalize_rows(arr_float, out=arr_float)
    # Mark null residuals
    #_rvec_sums = arr_float.sum(axis=1)
    #rvec_flag = np.isnan(_rvec_sums)
    # Converts normvec to a smaller type like float16 or int8
    rvecs_n = compress_normvec(arr_float)
    # IF FLOAT16 WE NEED TO FILL NANS
    # (but we should use int8, and in that case it is implicit)
    # rvecs_n = np.nan_to_num(rvecs_n)
    return rvecs_n