def load_hashmap(self): # Create redis storage adapter # need to start redis service redis_object = Redis(host='localhost', port=6379, db=14) redis_storage = RedisStorage(redis_object) try: config = redis_storage.load_hash_configuration('test') lshash = RandomBinaryProjections(None, None) lshash.apply_config(config) except: # Config is not existing, create hash from scratch, with 10 projections lshash = RandomBinaryProjections('test', 10) nearest = NearestFilter(self.nn) # self.engine = Engine(feature_size, lshashes=[], vector_filters=[]) self.engine = Engine(self.feature_size, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=CosineDistance()) # Do some stuff like indexing or querying with the engine... # Finally store hash configuration in redis for later use redis_storage.store_hash_configuration(lshash)
def loadHashmap(self, feature_size=129, result_n=1000): #这里参数没有用到 ''' feature_size: hash空间维数大小 result_n :返回多少个最近邻 ''' # Create redis storage adapter redis_object = Redis(host='localhost', port=6379, db=0) redis_storage = RedisStorage(redis_object) try: # Get hash config from redis config = redis_storage.load_hash_configuration('test') # Config is existing, create hash with None parameters lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis lshash.apply_config(config) except: # Config is not existing, create hash from scratch, with 10 projections lshash = RandomBinaryProjections('test', 0) # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. nearest = NearestFilter(result_n) #self.engine = Engine(feature_size, lshashes=[], vector_filters=[]) self.engine = Engine(feature_size, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance()) # Do some stuff like indexing or querying with the engine... # Finally store hash configuration in redis for later use redis_storage.store_hash_configuration(lshash)
def __init__(self): redis_object = redis.Redis(host='localhost', port=6379, db=0) redis_storage = RedisStorage(redis_object) # Get hash config from redis config = redis_storage.load_hash_configuration('MyHash') if config is None: # Config is not existing, create hash from scratch, with 5 projections self.lshash = RandomBinaryProjections('MyHash', 5) else: # Config is existing, create hash with None parameters self.lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis self.lshash.apply_config(config) # print("HERE") # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. self.engine = Engine(4, lshashes=[self.lshash], storage=redis_storage) redis_storage.store_hash_configuration(self.lshash)
class TestHashStorage(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis() self.redis_storage = RedisStorage(self.redis_object) def test_hash_memory_storage_none_config(self): conf = self.memory.load_hash_configuration('nonexistentHash') self.assertIsNone(conf) def test_hash_memory_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.memory.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.memory.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_memory_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.memory.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_none_config(self): conf = self.redis_storage.load_hash_configuration('nonexistentHash') self.assertIsNone(conf) def test_hash_redis_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.redis_storage.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.redis_storage.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j])
class TestRandomBinaryProjectionTree(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis() self.redis_storage = RedisStorage(self.redis_object) numpy.random.seed(16) def test_retrieval(self): # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 12, 20) # Create engine for 100 dimensional feature space, do not forget to set # nearest filter to 20, because default is 10 self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 200000 random vectors for k in range(200000): x = numpy.random.randn(100) x_data = 'data {}'.format(k) self.engine.store_vector(x, x_data) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) n = self.engine.neighbours(x) self.assertEqual(len(n), 20) def test_storage_memory(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.memory.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config(self.memory.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k]) def test_storage_redis(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.redis_storage.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config( self.redis_storage.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k])
class TestRandomBinaryProjectionTree(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis(host='localhost', port=6379, db=0) self.redis_storage = RedisStorage(self.redis_object) def test_retrieval(self): # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 12, 20) # Create engine for 100 dimensional feature space, do not forget to set # nearest filter to 20, because default is 10 self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 200000 random vectors #print 'Indexing...' for k in range(200000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) # Now do random queries and check result set size #print 'Querying...' for k in range(10): x = numpy.random.randn(100) n = self.engine.neighbours(x) #print "Candidate count = %d" % self.engine.candidate_count(x) #print "Result size = %d" % len(n) self.assertEqual(len(n), 20) def test_storage_memory(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.memory.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config(self.memory.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k]) def test_storage_redis(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.redis_storage.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config(self.redis_storage.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k])
config = redis_storage.load_hash_configuration('MyHash') if config is None: # Config is not existing, create hash from scratch, with 10 projections lshash = RandomBinaryProjections('MyHash', 50) else: # Config is existing, create hash with None parameters lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis lshash.apply_config(config) # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. engine = Engine(dimension, lshashes=[lshash], storage=redis_storage) redis_storage.store_hash_configuration(lshash) app = Flask(__name__) @app.route('/') def index(): return "Hello, World!" @app.route('/api/identities') def get_person_list(): persons = r.smembers('identities') app.logger.info(persons) return {'identities': [name.decode("utf-8") for name in persons]}
import json import numpy as np import cPickle as pickle from nearpy import Engine from nearpy.hashes import RandomBinaryProjections from nearpy.storage import RedisStorage from redis import Redis from ne import CosineSim dimension = 100 with open("hndbow.index2word", 'r') as f: index2words = json.load(f) wordvecs = np.load("hndbow.syn0.npy") redis_storage = RedisStorage(Redis(host='localhost', port=6379, db=3)) lshash = RandomBinaryProjections('WordHash', 5, rand_seed=123) engine = Engine(dimension, distance=CosineSim(), lshashes=[lshash], storage=redis_storage) for i,w in enumerate(index2words): vec = wordvecs[i] # 1x100 nparray engine.store_vector(vec, w) redis_storage.store_hash_configuration(lshash)
class TestHashStorage(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis(host='localhost', port=6379, db=0) self.redis_storage = RedisStorage(self.redis_object) def test_hash_memory_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.memory.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.memory.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_memory_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.memory.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.redis_storage.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.redis_storage.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j])