def setUp(self): self.V = [] self.V.append((numpy.array([0]), 'data1', 0.4)) self.V.append((numpy.array([1]), 'data2', 0.9)) self.V.append((numpy.array([2]), 'data3', 1.4)) self.V.append((numpy.array([3]), 'data4', 2.1)) self.V.append((numpy.array([4]), 'data5', 0.1)) self.V.append((numpy.array([5]), 'data6', 8.7)) self.V.append((numpy.array([6]), 'data7', 3.4)) self.V.append((numpy.array([7]), 'data8', 2.8)) self.threshold_filter = DistanceThresholdFilter(1.0) self.nearest_filter = NearestFilter(5) self.unique = UniqueFilter()
def __init__(self, data_points, sim_threshold=0.5, num_vectors=3): self.data_points = data_points self.point_num = self.data_points.shape[0] self.dimension = self.data_points.shape[1] - 1 # Create a random binary hash with . bits self.rbp = RandomBinaryProjections('rbp', num_vectors, rand_seed=42) self.engine = Engine( self.dimension, lshashes=[self.rbp], vector_filters=[DistanceThresholdFilter(1 - sim_threshold)]) for i in range(self.point_num): self.engine.store_vector(self.data_points[i, 1:], '%d' % i)
def __init__(self, level, dim, proj, bico): self.level = level self.dim = dim self.proj = proj self.point_to_biconode = {} self.rbpt = RandomBinaryProjectionTree('rbpt', proj, 1) self.rbp = RandomBinaryProjections('rbp', proj) self.sqdist = SquaredEuclideanDistance() self.ann_engine = Engine( dim, lshashes=[self.rbp], distance=self.sqdist, vector_filters=[DistanceThresholdFilter(bico._getR(self.level))]) self.num_cfs = 0 self.bico = bico self.cf = ClusteringFeature(Point(np.zeros(dim)), Point(np.zeros(dim)), 0, 0)
def __init__(self, dimension: int, number_projections: int, threshold: float): """ :param dimension: Number of dimensions of input points :param number_projections: Number of random projections used for finding nearest neighbors. Trade-off: More projections result in a smaller number of false positives in candidate set :param threshold: Distance threshold for definition nearest: all points within this specific distance """ self.rbp = RandomBinaryProjections('rbp', number_projections) self.sqdist = SquaredEuclideanDistance() self.ann_engine = Engine( dimension, lshashes=[self.rbp], distance=self.sqdist, vector_filters=[DistanceThresholdFilter(threshold)])
class TestVectorFilters(unittest.TestCase): def setUp(self): self.V = [] self.V.append((numpy.array([0]), 'data1', 0.4)) self.V.append((numpy.array([1]), 'data2', 0.9)) self.V.append((numpy.array([2]), 'data3', 1.4)) self.V.append((numpy.array([3]), 'data4', 2.1)) self.V.append((numpy.array([4]), 'data5', 0.1)) self.V.append((numpy.array([5]), 'data6', 8.7)) self.V.append((numpy.array([6]), 'data7', 3.4)) self.V.append((numpy.array([7]), 'data8', 2.8)) self.threshold_filter = DistanceThresholdFilter(1.0) self.nearest_filter = NearestFilter(5) self.unique = UniqueFilter() def test_thresholding(self): result = self.threshold_filter.filter_vectors(self.V) self.assertEqual(len(result), 3) self.assertIn(self.V[0], result) self.assertIn(self.V[1], result) self.assertIn(self.V[4], result) def test_nearest(self): result = self.nearest_filter.filter_vectors(self.V) self.assertEqual(len(result), 5) self.assertIn(self.V[0], result) self.assertIn(self.V[1], result) self.assertIn(self.V[4], result) self.assertIn(self.V[2], result) self.assertIn(self.V[3], result) def test_unique(self): W = self.V W.append((numpy.array([7]), 'data8', 2.8)) W.append((numpy.array([0]), 'data1', 2.8)) W.append((numpy.array([1]), 'data2', 2.8)) W.append((numpy.array([6]), 'data7', 2.8)) result = self.unique.filter_vectors(W) self.assertEqual(len(result), 8)
class TestVectorFilters(unittest.TestCase): def setUp(self): self.V = [] self.V.append((numpy.array([0]), 'data1', 0.4)) self.V.append((numpy.array([1]), 'data2', 0.9)) self.V.append((numpy.array([2]), 'data3', 1.4)) self.V.append((numpy.array([3]), 'data4', 2.1)) self.V.append((numpy.array([4]), 'data5', 0.1)) self.V.append((numpy.array([5]), 'data6', 8.7)) self.V.append((numpy.array([6]), 'data7', 3.4)) self.V.append((numpy.array([7]), 'data8', 2.8)) self.threshold_filter = DistanceThresholdFilter(1.0) self.nearest_filter = NearestFilter(5) self.unique = UniqueFilter() def test_thresholding(self): result = self.threshold_filter.filter_vectors(self.V) self.assertEqual(len(result), 3) self.assertTrue(self.V[0] in result) self.assertTrue(self.V[1] in result) self.assertTrue(self.V[4] in result) def test_nearest(self): result = self.nearest_filter.filter_vectors(self.V) self.assertEqual(len(result), 5) self.assertTrue(self.V[0] in result) self.assertTrue(self.V[1] in result) self.assertTrue(self.V[4] in result) self.assertTrue(self.V[2] in result) self.assertTrue(self.V[3] in result) def test_unique(self): W = self.V W.append((numpy.array([7]), 'data8', 2.8)) W.append((numpy.array([0]), 'data1', 2.8)) W.append((numpy.array([1]), 'data2', 2.8)) W.append((numpy.array([6]), 'data7', 2.8)) result = self.unique.filter_vectors(W) self.assertEqual(len(result), 8)