def test_nn_known_descriptors_euclidean_ordered(self): index = self._make_inst() # make vectors to return in a known euclidean distance order i = 100 test_descriptors = [] for j in range(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(np.array([j, j * 2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index # order. q = DescriptorMemoryElement('query', 99) q.set_vector(np.array([0, 0], float)) r, dists = index.nn(q, n=i) # Because the data is one-dimensional, all of the cells will have # the same points (any division will just correspond to a point on # the line), and a cell can't have more than half of the points self.assertEqual(len(dists), i // 2) for j, d, dist in zip(range(i), r, dists): self.assertEqual(d.uuid(), j) np.testing.assert_equal(d.vector(), [j, j * 2])
def _known_ordered_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # make vectors to return in a known euclidean distance order i = 1000 test_descriptors = [] for j in range(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(np.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(test_descriptors) # Since descriptors were built in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', i) q.set_vector(np.array([0, 0], float)) # top result should have UUID == 0 (nearest to query) r, dists = index.nn(q, 5) self.assertEqual(r[0].uuid(), 0) self.assertEqual(r[1].uuid(), 1) self.assertEqual(r[2].uuid(), 2) self.assertEqual(r[3].uuid(), 3) self.assertEqual(r[4].uuid(), 4) # global search should be in complete order r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): self.assertEqual(d.uuid(), j)
def test_known_descriptors_euclidean_ordered(self): index = self._make_inst('euclidean') # make vectors to return in a known euclidean distance order i = 1000 test_descriptors = [] for j in xrange(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(numpy.array([j, j * 2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', i) q.set_vector(numpy.array([0, 0], float)) # top result should have UUID == 0 (nearest to query) r, dists = index.nn(q, 5) ntools.assert_equal(r[0].uuid(), 0) ntools.assert_equal(r[1].uuid(), 1) ntools.assert_equal(r[2].uuid(), 2) ntools.assert_equal(r[3].uuid(), 3) ntools.assert_equal(r[4].uuid(), 4) # global search should be in complete order r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): ntools.assert_equal(d.uuid(), j)
def test_known_descriptors_euclidean_ordered(self): index = self._make_inst('euclidean') # make vectors to return in a known euclidean distance order i = 1000 test_descriptors = [] for j in xrange(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(numpy.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', i) q.set_vector(numpy.array([0, 0], float)) # top result should have UUID == 0 (nearest to query) r, dists = index.nn(q, 5) ntools.assert_equal(r[0].uuid(), 0) ntools.assert_equal(r[1].uuid(), 1) ntools.assert_equal(r[2].uuid(), 2) ntools.assert_equal(r[3].uuid(), 3) ntools.assert_equal(r[4].uuid(), 4) # global search should be in complete order r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): ntools.assert_equal(d.uuid(), j)
def _known_ordered_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # make vectors to return in a known euclidean distance order i = 1000 test_descriptors = [] for j in range(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(np.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(test_descriptors) # Since descriptors were built in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', i) q.set_vector(np.array([0, 0], float)) # top result should have UUID == 0 (nearest to query) r, dists = index.nn(q, 5) self.assertEqual(r[0].uuid(), 0) self.assertEqual(r[1].uuid(), 1) self.assertEqual(r[2].uuid(), 2) self.assertEqual(r[3].uuid(), 3) self.assertEqual(r[4].uuid(), 4) # global search should be in complete order r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): self.assertEqual(d.uuid(), j)
def test_classify(self): d = DescriptorMemoryElement('test', 0) d.set_vector([1, 2, 3]) c = DummyClassifier() e = c.classify(d) nose.tools.assert_equal(e.get_classification(), {0: [1, 2, 3]}) nose.tools.assert_equal(e.uuid, d.uuid())
def test_set_state_version_1(self): # Test support of older state version expected_type = 'test-type' expected_uid = 'test-uid' expected_v = numpy.array([1, 2, 3]) expected_v_b = BytesIO() # noinspection PyTypeChecker numpy.save(expected_v_b, expected_v) expected_v_dump = expected_v_b.getvalue() e = DescriptorMemoryElement(None, None) e.__setstate__((expected_type, expected_uid, expected_v_dump)) self.assertEqual(e.type(), expected_type) self.assertEqual(e.uuid(), expected_uid) numpy.testing.assert_array_equal(e.vector(), expected_v)
def test_set_state_version_1(self): # Test support of older state version expected_type = 'test-type' expected_uid = 'test-uid' expected_v = numpy.array([1, 2, 3]) expected_v_b = BytesIO() # noinspection PyTypeChecker numpy.save(expected_v_b, expected_v) expected_v_dump = expected_v_b.getvalue() e = DescriptorMemoryElement(None, None) e.__setstate__((expected_type, expected_uid, expected_v_dump)) self.assertEqual(e.type(), expected_type) self.assertEqual(e.uuid(), expected_uid) numpy.testing.assert_array_equal(e.vector(), expected_v)
def test_known_descriptors_euclidean_ordered(self): index = self._make_inst('euclidean') # make vectors to return in a known euclidean distance order i = 10 test_descriptors = [] for j in xrange(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(numpy.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', 99) q.set_vector(numpy.array([0, 0], float)) r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): ntools.assert_equal(d.uuid(), j)
def test_known_descriptors_euclidean_ordered(self): index = self._make_inst('euclidean') # make vectors to return in a known euclidean distance order i = 10 test_descriptors = [] for j in xrange(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(numpy.array([j, j * 2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) index.build_index(test_descriptors) # Since descriptors were build in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', 99) q.set_vector(numpy.array([0, 0], float)) r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): ntools.assert_equal(d.uuid(), j)
def test_classify_async(self): # Check that async classify calls classify on all input elements # correctly # make some descriptor elements d_elems = [] for i in range(20): d = DescriptorMemoryElement('test', i) d.set_vector([i]) d_elems.append(d) c = DummyClassifier() m = c.classify_async(d_elems) # Due to mocking, we expect to see a map of descriptor element to the # result of classification, which in this case is the UUID of the # element for d in d_elems: nose.tools.assert_in(d, m) # Check for expected classification nose.tools.assert_equal(m[d].get_classification(), {d.uuid(): d.vector().tolist()})