def test_output_immutability(self): # make sure that data stored is not susceptible to modifications after # extraction v = numpy.ones(16) d = DescriptorMemoryElement('test', 0) ntools.assert_false(d.has_vector()) d.set_vector(v) r = d.vector() r[:] = 0 ntools.assert_equal(r.sum(), 0) ntools.assert_equal(d.vector().sum(), 16)
def test_none_set(self): d = DescriptorMemoryElement('test', 0) ntools.assert_false(d.has_vector()) d.set_vector(numpy.ones(16)) ntools.assert_true(d.has_vector()) numpy.testing.assert_equal(d.vector(), numpy.ones(16)) d.set_vector(None) ntools.assert_false(d.has_vector()) ntools.assert_is(d.vector(), None)
def test_output_immutability(self): # make sure that data stored is not susceptible to modifications after # extraction v = numpy.ones(16) d = DescriptorMemoryElement('test', 0) ntools.assert_false(d.has_vector()) d.set_vector(v) r = d.vector() r[:] = 0 ntools.assert_equal(r.sum(), 0) ntools.assert_equal(d.vector().sum(), 16)
def test_none_set(self): d = DescriptorMemoryElement('test', 0) ntools.assert_false(d.has_vector()) d.set_vector(numpy.ones(16)) ntools.assert_true(d.has_vector()) numpy.testing.assert_equal(d.vector(), numpy.ones(16)) d.set_vector(None) ntools.assert_false(d.has_vector()) ntools.assert_is(d.vector(), None)
def test_none_set(self): d = DescriptorMemoryElement('test', 0) self.assertFalse(d.has_vector()) d.set_vector(numpy.ones(16)) self.assertTrue(d.has_vector()) numpy.testing.assert_equal(d.vector(), numpy.ones(16)) d.set_vector(None) self.assertFalse(d.has_vector()) self.assertIs(d.vector(), None)
def test_none_set(self): d = DescriptorMemoryElement('test', 0) self.assertFalse(d.has_vector()) d.set_vector(numpy.ones(16)) self.assertTrue(d.has_vector()) numpy.testing.assert_equal(d.vector(), numpy.ones(16)) d.set_vector(None) self.assertFalse(d.has_vector()) self.assertIs(d.vector(), None)
def test_nn_known_descriptors_euclidean_ordered(self): index = self._make_inst() # make vectors to return in a known euclidean distance order i = 100 test_descriptors = [] for j in range(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(np.array([j, j * 2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index # order. q = DescriptorMemoryElement('query', 99) q.set_vector(np.array([0, 0], float)) r, dists = index.nn(q, n=i) # Because the data is one-dimensional, all of the cells will have # the same points (any division will just correspond to a point on # the line), and a cell can't have more than half of the points self.assertEqual(len(dists), i // 2) for j, d, dist in zip(range(i), r, dists): self.assertEqual(d.uuid(), j) np.testing.assert_equal(d.vector(), [j, j * 2])
def _random_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # :param hash_ftor: Hash function class for generating hash codes for # descriptors. # :param hash_idx: Hash index instance to use in local LSH algo # instance. # :param ftor_train_hook: Function for training functor if necessary. # make random descriptors i = 1000 dim = 256 td = [] numpy.random.seed(self.RANDOM_SEED) for j in range(i): d = DescriptorMemoryElement('random', j) d.set_vector(numpy.random.rand(dim)) td.append(d) ftor_train_hook(td) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement('query', i) v = td_q.vector().copy() v_min = max(v.min(), 0.1) v[0] += v_min v[dim - 1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) ntools.assert_false(numpy.array_equal(q.vector(), td_q.vector())) ntools.assert_equal(r[0], td_q) # random query q = DescriptorMemoryElement('query', i + 1) q.set_vector(numpy.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in range(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) r, dists = index.nn(q, i) for j in range(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1])
def _random_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # :param hash_ftor: Hash function class for generating hash codes for # descriptors. # :param hash_idx: Hash index instance to use in local LSH algo # instance. # :param ftor_train_hook: Function for training functor if necessary. # make random descriptors i = 1000 dim = 256 td = [] np.random.seed(self.RANDOM_SEED) for j in range(i): d = DescriptorMemoryElement('random', j) d.set_vector(np.random.rand(dim)) td.append(d) ftor_train_hook(td) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) self.assertEqual(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement('query', i) v = td_q.vector().copy() v_min = max(v.min(), 0.1) v[0] += v_min v[dim-1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) self.assertFalse(np.array_equal(q.vector(), td_q.vector())) self.assertEqual(r[0], td_q) # random query q = DescriptorMemoryElement('query', i+1) q.set_vector(np.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in range(1, len(dists)): self.assertGreater(dists[j], dists[j-1]) r, dists = index.nn(q, i) for j in range(1, len(dists)): self.assertGreater(dists[j], dists[j-1])
def test_input_immutability(self): # make sure that data stored is not susceptible to shifts in the # originating data matrix they were pulled from. # # Testing this with a single vector # v = numpy.random.rand(16) t = tuple(v.copy()) d = DescriptorMemoryElement('test', 0) d.set_vector(v) v[:] = 0 ntools.assert_true((v == 0).all()) ntools.assert_false(sum(t) == 0.) numpy.testing.assert_equal(d.vector(), t) # # Testing with matrix # m = numpy.random.rand(20, 16) v1 = m[3] v2 = m[15] v3 = m[19] # Save truth values of arrays as immutable tuples (copies) t1 = tuple(v1.copy()) t2 = tuple(v2.copy()) t3 = tuple(v3.copy()) d1 = DescriptorMemoryElement('test', 1) d1.set_vector(v1) d2 = DescriptorMemoryElement('test', 2) d2.set_vector(v2) d3 = DescriptorMemoryElement('test', 3) d3.set_vector(v3) numpy.testing.assert_equal(v1, d1.vector()) numpy.testing.assert_equal(v2, d2.vector()) numpy.testing.assert_equal(v3, d3.vector()) # Changing the source should not change stored vectors m[:, :] = 0. ntools.assert_true((v1 == 0).all()) ntools.assert_true((v2 == 0).all()) ntools.assert_true((v3 == 0).all()) ntools.assert_false(sum(t1) == 0.) ntools.assert_false(sum(t2) == 0.) ntools.assert_false(sum(t3) == 0.) numpy.testing.assert_equal(d1.vector(), t1) numpy.testing.assert_equal(d2.vector(), t2) numpy.testing.assert_equal(d3.vector(), t3)
def test_input_immutability(self): # make sure that data stored is not susceptible to shifts in the # originating data matrix they were pulled from. # # Testing this with a single vector # v = numpy.random.rand(16) t = tuple(v.copy()) d = DescriptorMemoryElement('test', 0) d.set_vector(v) v[:] = 0 ntools.assert_true((v == 0).all()) ntools.assert_false(sum(t) == 0.) numpy.testing.assert_equal(d.vector(), t) # # Testing with matrix # m = numpy.random.rand(20, 16) v1 = m[3] v2 = m[15] v3 = m[19] # Save truth values of arrays as immutable tuples (copies) t1 = tuple(v1.copy()) t2 = tuple(v2.copy()) t3 = tuple(v3.copy()) d1 = DescriptorMemoryElement('test', 1) d1.set_vector(v1) d2 = DescriptorMemoryElement('test', 2) d2.set_vector(v2) d3 = DescriptorMemoryElement('test', 3) d3.set_vector(v3) numpy.testing.assert_equal(v1, d1.vector()) numpy.testing.assert_equal(v2, d2.vector()) numpy.testing.assert_equal(v3, d3.vector()) # Changing the source should not change stored vectors m[:, :] = 0. ntools.assert_true((v1 == 0).all()) ntools.assert_true((v2 == 0).all()) ntools.assert_true((v3 == 0).all()) ntools.assert_false(sum(t1) == 0.) ntools.assert_false(sum(t2) == 0.) ntools.assert_false(sum(t3) == 0.) numpy.testing.assert_equal(d1.vector(), t1) numpy.testing.assert_equal(d2.vector(), t2) numpy.testing.assert_equal(d3.vector(), t3)
def test_set_state_version_1(self): # Test support of older state version expected_type = 'test-type' expected_uid = 'test-uid' expected_v = numpy.array([1, 2, 3]) expected_v_b = BytesIO() # noinspection PyTypeChecker numpy.save(expected_v_b, expected_v) expected_v_dump = expected_v_b.getvalue() e = DescriptorMemoryElement(None, None) e.__setstate__((expected_type, expected_uid, expected_v_dump)) self.assertEqual(e.type(), expected_type) self.assertEqual(e.uuid(), expected_uid) numpy.testing.assert_array_equal(e.vector(), expected_v)
def test_set_state_version_1(self): # Test support of older state version expected_type = 'test-type' expected_uid = 'test-uid' expected_v = numpy.array([1, 2, 3]) expected_v_b = BytesIO() # noinspection PyTypeChecker numpy.save(expected_v_b, expected_v) expected_v_dump = expected_v_b.getvalue() e = DescriptorMemoryElement(None, None) e.__setstate__((expected_type, expected_uid, expected_v_dump)) self.assertEqual(e.type(), expected_type) self.assertEqual(e.uuid(), expected_uid) numpy.testing.assert_array_equal(e.vector(), expected_v)
def _random_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # make random descriptors i = 1000 dim = 256 td = [] numpy.random.seed(self.RANDOM_SEED) for j in xrange(i): d = DescriptorMemoryElement("random", j) d.set_vector(numpy.random.rand(dim)) td.append(d) ftor_train_hook(td) di = MemoryDescriptorIndex() index = LSHNearestNeighborIndex(hash_ftor, di, hash_idx, distance_method="euclidean") index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement("query", i) v = td_q.vector().copy() v_min = max(v.min(), 0.1) v[0] += v_min v[dim - 1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) ntools.assert_false(numpy.array_equal(q.vector(), td_q.vector())) ntools.assert_equal(r[0], td_q) # random query q = DescriptorMemoryElement("query", i + 1) q.set_vector(numpy.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) r, dists = index.nn(q, i) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) DescriptorMemoryElement.MEMORY_CACHE = {}
def test_random_descriptors_euclidean(self): # make random descriptors i = 1000 dim = 256 bits = 32 td = [] for j in xrange(i): d = DescriptorMemoryElement('random', j) d.set_vector(numpy.random.rand(dim)) td.append(d) index = self._make_inst('euclidean', bits) index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement('query', i) v = numpy.array(td_q.vector()) # copy v_min = max(v.min(), 0.1) v[0] += v_min v[dim-1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) ntools.assert_false(numpy.array_equal(q.vector(), td_q.vector())) ntools.assert_equal(r[0], td_q) # random query q = DescriptorMemoryElement('query', i+1) q.set_vector(numpy.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j-1]) r, dists = index.nn(q, i) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j-1])
def test_random_descriptors_euclidean(self): # make random descriptors i = 1000 dim = 256 bits = 32 td = [] for j in xrange(i): d = DescriptorMemoryElement('random', j) d.set_vector(numpy.random.rand(dim)) td.append(d) index = self._make_inst('euclidean', bits) index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement('query', i) v = numpy.array(td_q.vector()) # copy v_min = max(v.min(), 0.1) v[0] += v_min v[dim - 1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) ntools.assert_false(numpy.array_equal(q.vector(), td_q.vector())) ntools.assert_equal(r[0], td_q) # random query q = DescriptorMemoryElement('query', i + 1) q.set_vector(numpy.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) r, dists = index.nn(q, i) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1])
def test_known_descriptors_euclidean_ordered(self): index = self._make_inst('euclidean') # make vectors to return in a known euclidean distance order i = 10 test_descriptors = [] for j in xrange(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(numpy.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', 99) q.set_vector(numpy.array([0, 0], float)) r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): ntools.assert_equal(d.uuid(), j) numpy.testing.assert_equal(d.vector(), [j, j*2])
def test_known_descriptors_euclidean_ordered(self): index = self._make_inst('euclidean') # make vectors to return in a known euclidean distance order i = 10 test_descriptors = [] for j in xrange(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(numpy.array([j, j * 2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', 99) q.set_vector(numpy.array([0, 0], float)) r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): ntools.assert_equal(d.uuid(), j) numpy.testing.assert_equal(d.vector(), [j, j * 2])
def test_classify_async(self): # Check that async classify calls classify on all input elements # correctly # make some descriptor elements d_elems = [] for i in range(20): d = DescriptorMemoryElement('test', i) d.set_vector([i]) d_elems.append(d) c = DummyClassifier() m = c.classify_async(d_elems) # Due to mocking, we expect to see a map of descriptor element to the # result of classification, which in this case is the UUID of the # element for d in d_elems: nose.tools.assert_in(d, m) # Check for expected classification nose.tools.assert_equal(m[d].get_classification(), {d.uuid(): d.vector().tolist()})
def test_refine_no_prev_results(self): """ Test that the results of RelevancyIndex ranking are directly reflected in a new results dictionary of probability values, even for elements that were also used in adjudication. This test is useful because a previous state of the IQR Session structure would force return probabilities for some descriptor elements to certain values if they were also present in the positive or negative adjudicate (internal or external) sets. """ test_in_pos_elem = DescriptorMemoryElement('t', 0).set_vector([0]) test_in_neg_elem = DescriptorMemoryElement('t', 1).set_vector([1]) test_ex_pos_elem = DescriptorMemoryElement('t', 2).set_vector([2]) test_ex_neg_elem = DescriptorMemoryElement('t', 3).set_vector([3]) test_other_elem = DescriptorMemoryElement('t', 4).set_vector([4]) # Mock the working set so it has the correct size and elements desc_list = [test_in_pos_elem, test_in_neg_elem, test_other_elem] self.iqrs.working_set.add_many_descriptors(desc_list) # Mock return dictionary, probabilities don't matter much other than # they are not 1.0 or 0.0. pool_ids = [de.uuid() for de in desc_list] self.iqrs.rank_relevancy_with_feedback.rank_with_feedback.return_value = ( [0.5, 0.5, 0.5], pool_ids) # Asserting expected pre-condition where there are no results yet. assert self.iqrs.results is None assert self.iqrs.feedback_list is None # Prepare IQR state for refinement # - set dummy internal/external positive negatives. self.iqrs.external_descriptors(positive=[test_ex_pos_elem], negative=[test_ex_neg_elem]) self.iqrs.adjudicate(new_positives=[test_in_pos_elem], new_negatives=[test_in_neg_elem]) # Test calling refine method self.iqrs.refine() # We test that: # - ``rank_relevancy_with_feedback.rank`` called with the combination of # external/adjudicated descriptor elements. # - ``results`` attribute now has a dict value # - value of ``results`` attribute is what we expect. pool_uids, pool_de = zip(*self.iqrs.working_set.items()) pool = [de.vector() for de in pool_de] self.iqrs.rank_relevancy_with_feedback.rank_with_feedback.assert_called_once_with( [test_in_pos_elem.vector(), test_ex_pos_elem.vector()], [test_in_neg_elem.vector(), test_ex_neg_elem.vector()], pool, pool_uids) assert self.iqrs.results is not None assert len(self.iqrs.results) == 3 assert test_other_elem in self.iqrs.results assert test_in_pos_elem in self.iqrs.results assert test_in_neg_elem in self.iqrs.results assert self.iqrs.results[test_other_elem] == 0.5 assert self.iqrs.results[test_in_pos_elem] == 0.5 assert self.iqrs.results[test_in_neg_elem] == 0.5 assert self.iqrs.feedback_list == desc_list
def test_refine_with_prev_results(self): """ Test that the results of RelevancyIndex ranking are directly reflected in an existing results dictionary of probability values. """ test_in_pos_elem = DescriptorMemoryElement('t', 0).set_vector([0]) test_in_neg_elem = DescriptorMemoryElement('t', 1).set_vector([1]) test_ex_pos_elem = DescriptorMemoryElement('t', 2).set_vector([2]) test_ex_neg_elem = DescriptorMemoryElement('t', 3).set_vector([3]) test_other_elem = DescriptorMemoryElement('t', 4).set_vector([4]) # Mock the working set so it has the correct size and elements desc_list = [test_in_pos_elem, test_in_neg_elem, test_other_elem] self.iqrs.working_set.add_many_descriptors(desc_list) # Mock return dictionary, probabilities don't matter much other than # they are not 1.0 or 0.0. pool_ids = [*self.iqrs.working_set.iterkeys()] self.iqrs.rank_relevancy_with_feedback.rank_with_feedback.return_value = ( [0.5, 0.5, 0.5], pool_ids) # Create a "previous state" of the results dictionary containing # results from our "working set" of descriptor elements. self.iqrs.results = { test_in_pos_elem: 0.2, test_in_neg_elem: 0.2, test_other_elem: 0.2, # ``refine`` replaces the previous dict, so disjoint keys are # NOT retained. 'something else': 0.3, } # Create a "previous state" of the feedback results. self.iqrs.feedback_list = [ test_ex_pos_elem, test_ex_neg_elem, test_other_elem ] # Prepare IQR state for refinement # - set dummy internal/external positive negatives. self.iqrs.external_descriptors(positive=[test_ex_pos_elem], negative=[test_ex_neg_elem]) self.iqrs.adjudicate(new_positives=[test_in_pos_elem], new_negatives=[test_in_neg_elem]) # Test calling refine method self.iqrs.refine() # We test that: # - ``rel_index.rank`` called with the combination of # external/adjudicated descriptor elements. # - ``results`` attribute now has an dict value # - value of ``results`` attribute is what we expect. pool_uids, pool_de = zip(*self.iqrs.working_set.items()) pool = [de.vector() for de in pool_de] self.iqrs.rank_relevancy_with_feedback.rank_with_feedback.assert_called_once_with( [test_in_pos_elem.vector(), test_ex_pos_elem.vector()], [test_in_neg_elem.vector(), test_ex_neg_elem.vector()], pool, pool_uids) assert self.iqrs.results is not None assert len(self.iqrs.results) == 3 assert test_other_elem in self.iqrs.results assert test_in_pos_elem in self.iqrs.results assert test_in_neg_elem in self.iqrs.results assert 'something else' not in self.iqrs.results assert self.iqrs.results[test_other_elem] == 0.5 assert self.iqrs.results[test_in_pos_elem] == 0.5 assert self.iqrs.results[test_in_neg_elem] == 0.5 assert self.iqrs.feedback_list == desc_list