def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()
Exemple #2
0
 def __init__(self, data_points, sim_threshold=0.5, num_vectors=3):
     self.data_points = data_points
     self.point_num = self.data_points.shape[0]
     self.dimension = self.data_points.shape[1] - 1
     # Create a random binary hash with . bits
     self.rbp = RandomBinaryProjections('rbp', num_vectors, rand_seed=42)
     self.engine = Engine(
         self.dimension,
         lshashes=[self.rbp],
         vector_filters=[DistanceThresholdFilter(1 - sim_threshold)])
     for i in range(self.point_num):
         self.engine.store_vector(self.data_points[i, 1:], '%d' % i)
Exemple #3
0
    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()
Exemple #4
0
 def __init__(self, level, dim, proj, bico):
     self.level = level
     self.dim = dim
     self.proj = proj
     self.point_to_biconode = {}
     self.rbpt = RandomBinaryProjectionTree('rbpt', proj, 1)
     self.rbp = RandomBinaryProjections('rbp', proj)
     self.sqdist = SquaredEuclideanDistance()
     self.ann_engine = Engine(
         dim,
         lshashes=[self.rbp],
         distance=self.sqdist,
         vector_filters=[DistanceThresholdFilter(bico._getR(self.level))])
     self.num_cfs = 0
     self.bico = bico
     self.cf = ClusteringFeature(Point(np.zeros(dim)), Point(np.zeros(dim)),
                                 0, 0)
Exemple #5
0
 def __init__(self, dimension: int, number_projections: int,
              threshold: float):
     """
     :param dimension:
         Number of dimensions of input points
     :param number_projections:
         Number of random projections used for finding nearest neighbors.
         Trade-off: More projections result in a smaller number of false positives in candidate set
     :param threshold:
         Distance threshold for definition nearest: all points within this specific distance
     """
     self.rbp = RandomBinaryProjections('rbp', number_projections)
     self.sqdist = SquaredEuclideanDistance()
     self.ann_engine = Engine(
         dimension,
         lshashes=[self.rbp],
         distance=self.sqdist,
         vector_filters=[DistanceThresholdFilter(threshold)])
Exemple #6
0
class TestVectorFilters(unittest.TestCase):

    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()

    def test_thresholding(self):
        result = self.threshold_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 3)
        self.assertIn(self.V[0], result)
        self.assertIn(self.V[1], result)
        self.assertIn(self.V[4], result)

    def test_nearest(self):
        result = self.nearest_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 5)
        self.assertIn(self.V[0], result)
        self.assertIn(self.V[1], result)
        self.assertIn(self.V[4], result)
        self.assertIn(self.V[2], result)
        self.assertIn(self.V[3], result)

    def test_unique(self):
        W = self.V
        W.append((numpy.array([7]), 'data8', 2.8))
        W.append((numpy.array([0]), 'data1', 2.8))
        W.append((numpy.array([1]), 'data2', 2.8))
        W.append((numpy.array([6]), 'data7', 2.8))

        result = self.unique.filter_vectors(W)
        self.assertEqual(len(result), 8)
Exemple #7
0
class TestVectorFilters(unittest.TestCase):

    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()

    def test_thresholding(self):
        result = self.threshold_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 3)
        self.assertTrue(self.V[0] in result)
        self.assertTrue(self.V[1] in result)
        self.assertTrue(self.V[4] in result)

    def test_nearest(self):
        result = self.nearest_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 5)
        self.assertTrue(self.V[0] in result)
        self.assertTrue(self.V[1] in result)
        self.assertTrue(self.V[4] in result)
        self.assertTrue(self.V[2] in result)
        self.assertTrue(self.V[3] in result)

    def test_unique(self):
        W = self.V
        W.append((numpy.array([7]), 'data8', 2.8))
        W.append((numpy.array([0]), 'data1', 2.8))
        W.append((numpy.array([1]), 'data2', 2.8))
        W.append((numpy.array([6]), 'data7', 2.8))

        result = self.unique.filter_vectors(W)
        self.assertEqual(len(result), 8)