def testdata_siam_desc(num_data=128, desc_dim=8): rng = np.random.RandomState(0) network_output = vt.normalize_rows(rng.rand(num_data, desc_dim)) vecs1 = network_output[0::2] vecs2 = network_output[1::2] # roll vecs2 so it is essentially translated vecs2 = np.roll(vecs1, 1, axis=1) network_output[1::2] = vecs2 # Every other pair is an imposter match network_output[::4, :] = vt.normalize_rows(rng.rand(32, desc_dim)) #data_per_label = 2 vecs1 = network_output[0::2].astype(np.float32) vecs2 = network_output[1::2].astype(np.float32) def true_dist_metric(vecs1, vecs2): g1_ = np.roll(vecs1, 1, axis=1) dist = vt.L2(g1_, vecs2) return dist #l2dist = vt.L2(vecs1, vecs2) true_dist = true_dist_metric(vecs1, vecs2) label = (true_dist > 0).astype(np.float32) vecs1 = torch.from_numpy(vecs1) vecs2 = torch.from_numpy(vecs2) label = torch.from_numpy(label) return vecs1, vecs2, label
def testdata_dummy_sift(nPts=10, rng=np.random): r""" Makes a dummy sift descriptor that has the uint8 * 512 hack like hesaff returns Args: nPts (int): (default = 10) CommandLine: python -m vtool.tests.dummy --test-testdata_dummy_sift Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.dummy import * # NOQA >>> import vtool as vt >>> nPts = 10 >>> rng = np.random.RandomState(0) >>> sift = testdata_dummy_sift(nPts, rng) >>> assert vt.check_sift_validity(sift), 'bad SIFT properties' >>> #assert np.allclose(((sift / 512) ** 2).sum(axis=1), 1, rtol=.01), 'bad SIFT property' >>> #assert np.all(sift / 512 < .2), 'bad SIFT property' """ import vtool as vt sift_ = rng.rand(nPts, 128) # normalize sift_ = vt.normalize_rows(rng.rand(nPts, 128)) # clip bin values sift_[sift_ > .2] = .2 # renormalize sift_ = vt.normalize_rows(rng.rand(nPts, 128)) # compress into uint8 #sift = (sift_ * 512).round().astype(np.uint8) sift = (sift_ * 512).astype(np.uint8) return sift
def compute_nonagg_rvecs(invassign, wx, compress=False): """ Driver function for nonagg residual computation Args: words (ndarray): array of words idx2_vec (dict): stacked vectors wx_sublist (list): words of interest idxs_list (list): list of idxs grouped by wx_sublist Returns: tuple : (rvecs_list, flags_list) """ # Pick out corresonding lists of residuals and words vecs = invassign.get_vecs(wx) word = invassign.vocab.wx2_word[wx] # Compute nonaggregated normalized residuals arr_float = np.subtract(word.astype(np.float), vecs.astype(np.float)) vt.normalize_rows(arr_float, out=arr_float) if compress: rvecs_list = np.clip(np.round(arr_float * 255.0), -127, 127).astype(np.int8) else: rvecs_list = arr_float # Extract flags (rvecs_list which are all zeros) and rvecs_list error_flags = ~np.any(rvecs_list, axis=1) return rvecs_list, error_flags
def aggregate_rvecs(rvecs, maws): r""" helper for compute_agg_rvecs Args: rvecs (ndarray): residual vectors maws (ndarray): multi assign weights Returns: rvecs_agg : aggregated residual vectors CommandLine: python -m ibeis.algo.hots.smk.smk_residuals --test-aggregate_rvecs ./run_tests.py --exclude-doctest-patterns pipeline neighbor score coverage automated_helpers name automatch chip_match multi_index automated special_query scoring automated nn_weights distinctive match_chips4 query_request devcases hstypes params ibsfuncs smk_core, smk_debug control Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.smk.smk_residuals import * # NOQA >>> rng = np.random.RandomState(0) >>> rvecs = (hstypes.RVEC_MAX * rng.rand(4, 128)).astype(hstypes.RVEC_TYPE) >>> maws = (rng.rand(rvecs.shape[0])).astype(hstypes.FLOAT_TYPE) >>> rvecs_agg = aggregate_rvecs(rvecs, maws) >>> result = ut.numpy_str2(rvecs_agg, linewidth=70) >>> print(result) np.array([[28, 27, 32, 16, 16, 16, 12, 31, 27, 29, 19, 27, 21, 24, 15, 21, 17, 37, 13, 40, 38, 33, 17, 30, 13, 23, 9, 25, 19, 15, 20, 17, 19, 18, 13, 25, 37, 29, 21, 16, 20, 21, 34, 11, 28, 19, 17, 12, 14, 24, 21, 11, 27, 11, 24, 10, 23, 20, 28, 12, 16, 14, 30, 22, 18, 26, 21, 20, 18, 9, 29, 20, 25, 19, 23, 20, 7, 13, 22, 22, 15, 20, 22, 16, 27, 10, 16, 20, 25, 25, 26, 28, 22, 38, 24, 16, 14, 19, 24, 14, 22, 19, 19, 33, 21, 22, 18, 22, 25, 25, 22, 23, 32, 16, 25, 15, 29, 21, 25, 20, 22, 31, 29, 24, 24, 25, 20, 14]], dtype=np.int8) """ if rvecs.shape[0] == 1: return rvecs # Prealloc sum output (do not assign the result of sum) arr_float = np.empty((1, rvecs.shape[1]), dtype=hstypes.FLOAT_TYPE) # Take weighted average of multi-assigned vectors (maws[:, np.newaxis] * rvecs.astype(hstypes.FLOAT_TYPE)).sum( axis=0, out=arr_float[0]) # Jegou uses mean instead. Sum should be fine because we normalize #rvecs.mean(axis=0, out=rvecs_agg[0]) vt.normalize_rows(arr_float, out=arr_float) rvecs_agg = compress_normvec(arr_float) return rvecs_agg
def aggregate_rvecs(rvecs, maws): r""" helper for compute_agg_rvecs Args: rvecs (ndarray): residual vectors maws (ndarray): multi assign weights Returns: rvecs_agg : aggregated residual vectors CommandLine: python -m ibeis.algo.hots.smk.smk_residuals --test-aggregate_rvecs ./run_tests.py --exclude-doctest-patterns pipeline neighbor score coverage automated_helpers name automatch chip_match multi_index automated special_query scoring automated nn_weights distinctive match_chips4 query_request devcases hstypes params ibsfuncs smk_core, smk_debug control Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.smk.smk_residuals import * # NOQA >>> rng = np.random.RandomState(0) >>> rvecs = (hstypes.RVEC_MAX * rng.rand(4, 128)).astype(hstypes.RVEC_TYPE) >>> maws = (rng.rand(rvecs.shape[0])).astype(hstypes.FLOAT_TYPE) >>> rvecs_agg = aggregate_rvecs(rvecs, maws) >>> result = ut.numpy_str2(rvecs_agg, linewidth=70) >>> print(result) np.array([[28, 27, 32, 16, 16, 16, 12, 31, 27, 29, 19, 27, 21, 24, 15, 21, 17, 37, 13, 40, 38, 33, 17, 30, 13, 23, 9, 25, 19, 15, 20, 17, 19, 18, 13, 25, 37, 29, 21, 16, 20, 21, 34, 11, 28, 19, 17, 12, 14, 24, 21, 11, 27, 11, 24, 10, 23, 20, 28, 12, 16, 14, 30, 22, 18, 26, 21, 20, 18, 9, 29, 20, 25, 19, 23, 20, 7, 13, 22, 22, 15, 20, 22, 16, 27, 10, 16, 20, 25, 25, 26, 28, 22, 38, 24, 16, 14, 19, 24, 14, 22, 19, 19, 33, 21, 22, 18, 22, 25, 25, 22, 23, 32, 16, 25, 15, 29, 21, 25, 20, 22, 31, 29, 24, 24, 25, 20, 14]], dtype=np.int8) """ if rvecs.shape[0] == 1: return rvecs # Prealloc sum output (do not assign the result of sum) arr_float = np.empty((1, rvecs.shape[1]), dtype=hstypes.FLOAT_TYPE) # Take weighted average of multi-assigned vectors (maws[:, np.newaxis] * rvecs.astype(hstypes.FLOAT_TYPE)).sum(axis=0, out=arr_float[0]) # Jegou uses mean instead. Sum should be fine because we normalize #rvecs.mean(axis=0, out=rvecs_agg[0]) vt.normalize_rows(arr_float, out=arr_float) rvecs_agg = compress_normvec(arr_float) return rvecs_agg
def aggregate_rvecs(rvecs, maws, compress=False): r""" helper for compute_agg_rvecs """ if rvecs.shape[0] == 0: rvecs_agg = np.empty((0, rvecs.shape[1]), dtype=np.float) if rvecs.shape[0] == 1: rvecs_agg = rvecs else: # Prealloc sum output (do not assign the result of sum) arr_float = np.empty((1, rvecs.shape[1]), dtype=np.float) out = arr_float[0] # Take weighted average of multi-assigned vectors total_weight = maws.sum() weighted_sum = (maws[:, np.newaxis] * rvecs.astype(np.float)).sum(axis=0, out=out) np.divide(weighted_sum, total_weight, out=out) vt.normalize_rows(arr_float, out=arr_float) if compress: rvecs_agg = np.clip(np.round(arr_float * 255.0), -127, 127).astype(np.int8) else: rvecs_agg = arr_float return rvecs_agg
def aggregate_rvecs(rvecs, maws, compress=False): r""" helper for compute_agg_rvecs """ if rvecs.shape[0] == 0: rvecs_agg = np.empty((0, rvecs.shape[1]), dtype=np.float) if rvecs.shape[0] == 1: rvecs_agg = rvecs else: # Prealloc sum output (do not assign the result of sum) arr_float = np.empty((1, rvecs.shape[1]), dtype=np.float) out = arr_float[0] # Take weighted average of multi-assigned vectors total_weight = maws.sum() weighted_sum = (maws[:, np.newaxis] * rvecs.astype(np.float)).sum( axis=0, out=out) np.divide(weighted_sum, total_weight, out=out) vt.normalize_rows(arr_float, out=arr_float) if compress: rvecs_agg = np.clip(np.round(arr_float * 255.0), -127, 127).astype(np.int8) else: rvecs_agg = arr_float return rvecs_agg
def understanding_pseudomax_props(mode=2): """ Function showing some properties of distances between normalized pseudomax vectors CommandLine: python -m vtool.distance --test-understanding_pseudomax_props Example: >>> # ENABLE_DOCTEST >>> from vtool.distance import * # NOQA >>> for mode in [0, 1, 2, 3]: ... print('+---') ... print('mode = %r' % (mode,)) ... result = understanding_pseudomax_props(mode) ... print('L___') >>> print(result) """ import vtool as vt pseudo_max = 512 rng = np.random.RandomState(0) num = 10 if mode == 0: dim = 2 p1_01 = (vt.normalize_rows(rng.rand(num, dim))) p2_01 = (vt.normalize_rows(rng.rand(num, dim))) elif mode == 1: p1_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max p2_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max elif mode == 2: # Build theoretically maximally distant normalized vectors (type 1) dim = 128 p1_01 = np.zeros((1, dim)) p2_01 = np.zeros((1, dim)) p2_01[:, 0::2] = 1 p1_01[:, 1::2] = 1 p1_01 = vt.normalize_rows(p1_01) p2_01 = vt.normalize_rows(p2_01) elif mode == 3: # Build theoretically maximally distant vectors (type 2) # This mode will clip if cast to uint8, thus failing the test dim = 128 p1_01 = np.zeros((1, dim)) p2_01 = np.zeros((1, dim)) p2_01[:, 0] = 1 p1_01[:, 1:] = 1 p1_01 = vt.normalize_rows(p1_01) p2_01 = vt.normalize_rows(p2_01) pass print('ndims = %r' % (p1_01.shape[1])) p1_01 = p1_01.astype(TEMP_VEC_DTYPE) p2_01 = p2_01.astype(TEMP_VEC_DTYPE) p1_256 = p1_01 * pseudo_max p2_256 = p2_01 * pseudo_max dist_sqrd_01 = vt.L2_sqrd(p1_01, p2_01) dist_sqrd_256 = vt.L2_sqrd(p1_256, p2_256) dist_01 = np.sqrt(dist_sqrd_01) dist_256 = np.sqrt(dist_sqrd_256) print('dist_sqrd_01 = %s' % (ut.numpy_str(dist_sqrd_01, precision=2), )) print('dist_sqrd_256 = %s' % (ut.numpy_str(dist_sqrd_256, precision=2), )) print('dist_01 = %s' % (ut.numpy_str(dist_01, precision=2), )) print('dist_256 = %s' % (ut.numpy_str(dist_256, precision=2), )) print('--') print('sqrt(2) = %f' % (np.sqrt(2))) print('--') assert np.all(dist_01 == vt.L2(p1_01, p2_01)) assert np.all(dist_256 == vt.L2(p1_256, p2_256)) const_sqrd = dist_sqrd_256 / dist_sqrd_01 const = dist_256 / dist_01 print('const = %r' % (const[0], )) print('const_sqrd = %r' % (const_sqrd[0], )) print('1 / const = %r' % (1 / const[0], )) print('1 / const_sqrd = %r' % (1 / const_sqrd[0], )) assert ut.allsame(const) assert ut.allsame(const_sqrd) assert np.all(const == np.sqrt(const_sqrd)) # Assert that distance conversions work assert np.all(dist_256 / const == dist_01) assert np.all(dist_sqrd_256 / const_sqrd == dist_sqrd_01) print('Conversions work') print('Maximal L2 distance between any two NON-NEGATIVE L2-NORMALIZED' ' vectors should always be sqrt(2)')
def get_norm_residuals(vecs, word): """ computes normalized residuals of vectors with respect to a word Args: vecs (ndarray): word (ndarray): Returns: tuple : (rvecs_n, rvec_flag) CommandLine: python -m ibeis.algo.hots.smk.smk_residuals --test-get_norm_residuals Example: >>> # ENABLE_DOCTEST >>> # The case where vecs != words >>> from ibeis.algo.hots.smk.smk_residuals import * # NOQA >>> rng = np.random.RandomState(0) >>> vecs = (hstypes.VEC_MAX * rng.rand(4, 128)).astype(hstypes.VEC_TYPE) >>> word = (hstypes.VEC_MAX * rng.rand(1, 128)).astype(hstypes.VEC_TYPE) >>> rvecs_n = get_norm_residuals(vecs, word) >>> result = ut.numpy_str2(rvecs_n) >>> print(result) Example: >>> # ENABLE_DOCTEST >>> # The case where vecs == words >>> from ibeis.algo.hots.smk.smk_residuals import * # NOQA >>> rng = np.random.RandomState(0) >>> vecs = (hstypes.VEC_MAX * rng.rand(4, 128)).astype(hstypes.VEC_TYPE) >>> word = vecs[1] >>> rvecs_n = get_norm_residuals(vecs, word) >>> result = ut.numpy_str2(rvecs_n) >>> print(result) IGNORE rvecs_agg8 = compress_normvec_uint8(arr_float) rvecs_agg16 = compress_normvec_float16(arr_float) ut.print_object_size(rvecs_agg16, 'rvecs_agg16: ') ut.print_object_size(rvecs_agg8, 'rvecs_agg8: ') ut.print_object_size(rvec_flag, 'rvec_flag: ') %timeit np.isnan(_rvec_sums) %timeit _rvec_sums == 0 %timeit np.equal(rvec_sums, 0) %timeit rvec_sums == 0 %timeit np.logical_or(np.isnan(_rvec_sums), _rvec_sums == 0) """ # Compute residuals of assigned vectors #rvecs_n = word.astype(dtype=FLOAT_TYPE) - vecs.astype(dtype=FLOAT_TYPE) arr_float = np.subtract(word.astype(hstypes.FLOAT_TYPE), vecs.astype(hstypes.FLOAT_TYPE)) # Faster, but doesnt work with np.norm #rvecs_n = np.subtract(word.view(hstypes.FLOAT_TYPE), vecs.view(hstypes.FLOAT_TYPE)) vt.normalize_rows(arr_float, out=arr_float) # Mark null residuals #_rvec_sums = arr_float.sum(axis=1) #rvec_flag = np.isnan(_rvec_sums) # Converts normvec to a smaller type like float16 or int8 rvecs_n = compress_normvec(arr_float) # IF FLOAT16 WE NEED TO FILL NANS # (but we should use int8, and in that case it is implicit) # rvecs_n = np.nan_to_num(rvecs_n) return rvecs_n
def understanding_pseudomax_props(mode=2): """ Function showing some properties of distances between normalized pseudomax vectors CommandLine: python -m vtool.distance --test-understanding_pseudomax_props Example: >>> # ENABLE_DOCTEST >>> from vtool.distance import * # NOQA >>> for mode in [0, 1, 2, 3]: ... print('+---') ... print('mode = %r' % (mode,)) ... result = understanding_pseudomax_props(mode) ... print('L___') >>> print(result) """ import vtool as vt pseudo_max = 512 rng = np.random.RandomState(0) num = 10 if mode == 0: dim = 2 p1_01 = (vt.normalize_rows(rng.rand(num, dim))) p2_01 = (vt.normalize_rows(rng.rand(num, dim))) elif mode == 1: p1_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max p2_01 = vt.dummy.testdata_dummy_sift(num, rng) / pseudo_max elif mode == 2: # Build theoretically maximally distant normalized vectors (type 1) dim = 128 p1_01 = np.zeros((1, dim)) p2_01 = np.zeros((1, dim)) p2_01[:, 0::2] = 1 p1_01[:, 1::2] = 1 p1_01 = vt.normalize_rows(p1_01) p2_01 = vt.normalize_rows(p2_01) elif mode == 3: # Build theoretically maximally distant vectors (type 2) # This mode will clip if cast to uint8, thus failing the test dim = 128 p1_01 = np.zeros((1, dim)) p2_01 = np.zeros((1, dim)) p2_01[:, 0] = 1 p1_01[:, 1:] = 1 p1_01 = vt.normalize_rows(p1_01) p2_01 = vt.normalize_rows(p2_01) pass print('ndims = %r' % (p1_01.shape[1])) p1_01 = p1_01.astype(TEMP_VEC_DTYPE) p2_01 = p2_01.astype(TEMP_VEC_DTYPE) p1_256 = p1_01 * pseudo_max p2_256 = p2_01 * pseudo_max dist_sqrd_01 = vt.L2_sqrd(p1_01, p2_01) dist_sqrd_256 = vt.L2_sqrd(p1_256, p2_256) dist_01 = np.sqrt(dist_sqrd_01) dist_256 = np.sqrt(dist_sqrd_256) print('dist_sqrd_01 = %s' % (ut.numpy_str(dist_sqrd_01, precision=2),)) print('dist_sqrd_256 = %s' % (ut.numpy_str(dist_sqrd_256, precision=2),)) print('dist_01 = %s' % (ut.numpy_str(dist_01, precision=2),)) print('dist_256 = %s' % (ut.numpy_str(dist_256, precision=2),)) print('--') print('sqrt(2) = %f' % (np.sqrt(2))) print('--') assert np.all(dist_01 == vt.L2(p1_01, p2_01)) assert np.all(dist_256 == vt.L2(p1_256, p2_256)) const_sqrd = dist_sqrd_256 / dist_sqrd_01 const = dist_256 / dist_01 print('const = %r' % (const[0],)) print('const_sqrd = %r' % (const_sqrd[0],)) print('1 / const = %r' % (1 / const[0],)) print('1 / const_sqrd = %r' % (1 / const_sqrd[0],)) assert ut.allsame(const) assert ut.allsame(const_sqrd) assert np.all(const == np.sqrt(const_sqrd)) # Assert that distance conversions work assert np.all(dist_256 / const == dist_01) assert np.all(dist_sqrd_256 / const_sqrd == dist_sqrd_01) print('Conversions work') print('Maximal L2 distance between any two NON-NEGATIVE L2-NORMALIZED' ' vectors should always be sqrt(2)')