Ejemplo n.º 1
0
    def test_random_discretized_projections(self):
        dim = 4
        vector_count = 5000
        vectors = numpy.random.randn(dim, vector_count)

        # First get recall and precision for one 1-dim random hash
        rdp = RandomDiscretizedProjections("rdp", 1, 0.01)
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[rdp], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        recall1 = result[0][0]
        precision1 = result[0][1]
        searchtime1 = result[0][2]

        print("\nRecall RDP: %f, Precision RDP: %f, SearchTime RDP: %f\n" % (recall1, precision1, searchtime1))

        # Then get recall and precision for one 4-dim random hash
        rdp = RandomDiscretizedProjections("rdp", 2, 0.2)
        engine = Engine(dim, lshashes=[rdp], vector_filters=[nearest])
        result = exp.perform_experiment([engine])

        recall2 = result[0][0]
        precision2 = result[0][1]
        searchtime2 = result[0][2]

        print("\nRecall RDP: %f, Precision RDP: %f, SearchTime RDP: %f\n" % (recall2, precision2, searchtime2))

        # Many things are random here, but the precision should increase
        # with dimension
        self.assertTrue(precision2 > precision1)
Ejemplo n.º 2
0
    def test_random_discretized_projections(self):
        dim = 4
        vector_count = 5000
        vectors = numpy.random.randn(dim, vector_count)

        # First get recall and precision for one 1-dim random hash
        rdp = RandomDiscretizedProjections('rdp', 1, 0.01)
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[rdp], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        recall1 = result[0][0]
        precision1 = result[0][1]
        searchtime1 = result[0][2]

        print('\nRecall RDP: %f, Precision RDP: %f, SearchTime RDP: %f\n' % \
            (recall1, precision1, searchtime1))

        # Then get recall and precision for one 4-dim random hash
        rdp = RandomDiscretizedProjections('rdp', 2, 0.2)
        engine = Engine(dim, lshashes=[rdp], vector_filters=[nearest])
        result = exp.perform_experiment([engine])

        recall2 = result[0][0]
        precision2 = result[0][1]
        searchtime2 = result[0][2]

        print('\nRecall RDP: %f, Precision RDP: %f, SearchTime RDP: %f\n' % \
            (recall2, precision2, searchtime2))

        # Many things are random here, but the precision should increase
        # with dimension
        self.assertTrue(precision2 > precision1)
Ejemplo n.º 3
0
    def test_experiment_with_unibucket_1(self):
        dim = 50
        vector_count = 100
        vectors = numpy.random.randn(dim, vector_count)
        unibucket = UniBucket("testHash")
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # Both recall and precision must be one in this case
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 1.0)
Ejemplo n.º 4
0
    def test_experiment_with_unibucket_1(self):
        dim = 50
        vector_count = 100
        vectors = numpy.random.randn(dim, vector_count)
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # Both recall and precision must be one in this case
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 1.0)
Ejemplo n.º 5
0
    def test_experiment_with_unibucket_3(self):
        dim = 50
        vector_count = 100
        vectors = numpy.random.randn(dim, vector_count)
        unibucket = UniBucket("testHash")
        nearest = NearestFilter(5)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # In this case recall is only 0.5
        # because the engine returns 5 nearest, but
        # the experiment looks for 10 nearest.
        self.assertEqual(result[0][0], 0.5)
        self.assertEqual(result[0][1], 1.0)
Ejemplo n.º 6
0
    def test_experiment_with_unibucket_3(self):
        dim = 50
        vector_count = 100
        vectors = numpy.random.randn(dim, vector_count)
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(5)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # In this case recall is only 0.5
        # because the engine returns 5 nearest, but
        # the experiment looks for 10 nearest.
        self.assertEqual(result[0][0], 0.5)
        self.assertEqual(result[0][1], 1.0)
Ejemplo n.º 7
0
    def test_experiment_with_list_2(self):
        dim = 50
        vector_count = 100
        vectors = []
        for index in range(vector_count):
            vectors.append(numpy.random.randn(dim))
        unibucket = UniBucket("testHash")
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(5, vectors)
        result = exp.perform_experiment([engine])

        # In this case precision is only 0.5
        # because the engine returns 10 nearest, but
        # the experiment only looks for 5 nearest.
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 0.5)
Ejemplo n.º 8
0
    def test_random_binary_projections(self):
        dim = 4
        vector_count = 5000
        vectors = numpy.random.randn(dim, vector_count)

        # First get recall and precision for one 1-dim random hash
        rbp = RandomBinaryProjections("rbp", 32)
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[rbp], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        recall1 = result[0][0]
        precision1 = result[0][1]
        searchtime1 = result[0][2]

        print("\nRecall RBP: %f, Precision RBP: %f, SearchTime RBP: %f\n" % (recall1, precision1, searchtime1))
Ejemplo n.º 9
0
    def test_experiment_with_list_2(self):
        dim = 50
        vector_count = 100
        vectors = []
        for index in range(vector_count):
            vectors.append(numpy.random.randn(dim))
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(5, vectors)
        result = exp.perform_experiment([engine])

        # In this case precision is only 0.5
        # because the engine returns 10 nearest, but
        # the experiment only looks for 5 nearest.
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 0.5)
Ejemplo n.º 10
0
    def test_random_binary_projections(self):
        dim = 4
        vector_count = 5000
        vectors = numpy.random.randn(dim, vector_count)

        # First get recall and precision for one 1-dim random hash
        rbp = RandomBinaryProjections('rbp', 32)
        nearest = NearestFilter(10)
        engine = Engine(dim, lshashes=[rbp], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        recall1 = result[0][0]
        precision1 = result[0][1]
        searchtime1 = result[0][2]

        print('\nRecall RBP: %f, Precision RBP: %f, SearchTime RBP: %f\n' % \
            (recall1, precision1, searchtime1))