Beispiel #1
0
    def __init__(self,
                 dim,
                 lshashes=None,
                 distance=None,
                 fetch_vector_filters=None,
                 vector_filters=None,
                 storage=None):
        """ Keeps the configuration. """
        if lshashes is None:
            lshashes = [RandomBinaryProjections('default', 10)]
        self.lshashes = lshashes
        if distance is None: distance = EuclideanDistance()
        self.distance = distance
        if vector_filters is None: vector_filters = [NearestFilter(10)]
        self.vector_filters = vector_filters
        if fetch_vector_filters is None:
            fetch_vector_filters = [UniqueFilter()]
        self.fetch_vector_filters = fetch_vector_filters
        if storage is None: storage = MemoryStorage()
        self.storage = storage

        # Initialize all hashes for the data space dimension.
        for lshash in self.lshashes:
            lshash.reset(dim)

        print('*** engine init done ***')
Beispiel #2
0
    def __init__(self,
                 dim,
                 lshashes=[RandomBinaryProjections('default', 10)],
                 distance=EuclideanDistance(),
                 vector_filters=[NearestFilter(10)],
                 storage=MemoryStorage()):
        """ Keeps the configuration. """
        self.lshashes = lshashes
        self.distance = distance
        self.vector_filters = vector_filters
        self.storage = storage

        # Initialize all hashes for the data space dimension.
        for lshash in self.lshashes:
            lshash.reset(dim)
Beispiel #3
0
class TestStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash',
                                                       bucket_key), [])
Beispiel #4
0
class TestStorage(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost', port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash', bucket_key),
                         [])
    def __init__(self,
                 measure="EuclideanDistance",
                 data_path='data/classed_data/'):
        self.res = ResnetSimilarity()
        self.pbar = ProgressBar()
        # Dimension of our vector space
        self.dimension = 2048
        self.data_path = data_path

        # Create a random binary hash with 10 bits
        self.rbp = RandomBinaryProjections('rbp', 10)

        self.measure = measure
        self.msote = MemoryStorage()
        if measure == "EuclideanDistance":
            self.engine = Engine(self.dimension,
                                 lshashes=[self.rbp],
                                 storage=self.msote,
                                 distance=EuclideanDistance())
        else:
            self.engine = Engine(self.dimension,
                                 lshashes=[self.rbp],
                                 storage=self.msote,
                                 distance=CosineDistance())
Beispiel #6
0
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis(host='localhost', port=6379, db=0)
     self.redis_storage = RedisStorage(self.redis_object)
dimension = 1000

# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')

# Create binary hash as child hash
rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14)

# Add rbp as child hash of permutations hash
permutations2.add_child_hash(rbp_perm2)

engine = Engine(dimension,
                lshashes=[permutations2],
                distance=CosineDistance(),
                vector_filters=[NearestFilter(5)],
                storage=MemoryStorage())

i = 0

query = numpy.zeros(dimension)

f = open('features2.txt', 'r')
# Opening, reading from the file::
for next_read_line in f:
    next_read_line = next_read_line.rstrip()

    split_arr = next_read_line.split(" ")
    split_arr = split_arr[1:]
    split_arr = list(map(float, split_arr))

    vector = numpy.asarray(split_arr)
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
Beispiel #9
0
class TestHashStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_hash_memory_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_memory_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])
Beispiel #10
0
 def setUp(self):
     self.storage = MemoryStorage()
     super(MemoryStorageTest, self).setUp()
class TestHashStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_hash_memory_storage_none_config(self):
        conf = self.memory.load_hash_configuration('nonexistentHash')

        self.assertIsNone(conf)

    def test_hash_memory_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_memory_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_none_config(self):
        conf = self.redis_storage.load_hash_configuration('nonexistentHash')

        self.assertIsNone(conf)

    def test_hash_redis_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
     numpy.random.seed(16)
Beispiel #13
0
def memoryStorage():
    return MemoryStorage()
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
     numpy.random.seed(16)
class TestRandomBinaryProjectionTree(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)
        numpy.random.seed(16)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data {}'.format(k)
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(
            self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis(host='localhost',
                               port=6379, db=0)
     self.redis_storage = RedisStorage(self.redis_object)
class TestRandomBinaryProjectionTree(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        #print 'Indexing...'
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        #print 'Querying...'
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            #print "Candidate count = %d" % self.engine.candidate_count(x)
            #print "Result size = %d" % len(n)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)


        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
Beispiel #18
0
class TestStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash',
                                                       bucket_key), [])

    def test_redis_storage_sparse(self):
        self.redis_storage.clean_all_buckets()
        x = scipy.sparse.rand(100, 1, density=0.1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(type(x), type(y))
        self.assertEqual(x.shape[0], y.shape[0])
        self.assertEqual(x.shape[1], y.shape[1])
        self.assertTrue((y - x).sum() == 0.0)
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash',
                                                       bucket_key), [])
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
class TestStorage(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash', bucket_key),
                         [])

    def test_redis_storage_sparse(self):
        self.redis_storage.clean_all_buckets()
        x = scipy.sparse.rand(100, 1, density=0.1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(type(x), type(y))
        self.assertEqual(x.shape[0], y.shape[0])
        self.assertEqual(x.shape[1], y.shape[1])
        self.assertTrue((y - x).sum() == 0.0)
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash', bucket_key),
                         [])