Ejemplo n.º 1
0
	def loadHashmap(self, feature_size=129, result_n=1000):  #这里参数没有用到
		'''
		feature_size: hash空间维数大小
		result_n :返回多少个最近邻
		'''
		# Create redis storage adapter
		redis_object = Redis(host='localhost', port=6379, db=0)
		redis_storage = RedisStorage(redis_object)
		try:
			# Get hash config from redis
			config = redis_storage.load_hash_configuration('test')
			# Config is existing, create hash with None parameters
			lshash = RandomBinaryProjections(None, None)
			# Apply configuration loaded from redis
			lshash.apply_config(config)
			
		except:
			# Config is not existing, create hash from scratch, with 10 projections
			lshash = RandomBinaryProjections('test', 0)
			

		# Create engine for feature space of 100 dimensions and use our hash.
		# This will set the dimension of the lshash only the first time, not when
		# using the configuration loaded from redis. Use redis storage to store
		# buckets.
		nearest = NearestFilter(result_n)
		#self.engine = Engine(feature_size, lshashes=[], vector_filters=[])
		self.engine = Engine(feature_size, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance())

		# Do some stuff like indexing or querying with the engine...

		# Finally store hash configuration in redis for later use
		redis_storage.store_hash_configuration(lshash)
Ejemplo n.º 2
0
class RedisStorageTest(StorageTest):
    def setUp(self):
        self.storage = RedisStorage(Redis())
        super(RedisStorageTest, self).setUp()

    def test_store_vector(self):
        x = numpy.random.randn(100, 1).ravel()
        self.check_store_vector(x)

    def test_store_sparse_vector(self):
        x = scipy.sparse.rand(100, 1, density=0.1)
        self.check_store_vector(x)

    def test_get_all_bucket_keys(self):
        self.check_get_all_bucket_keys()

    def test_delete_vector(self):
        self.check_delete_vector(numpy.ones(100))

    def test_store_zero(self):
        x = numpy.ones(100)
        hash_name, bucket_name = "tastHash", "testBucket"
        self.storage.store_vector(hash_name, bucket_name, x, 0)
        bucket = self.storage.get_bucket(hash_name, bucket_name)
        _, data = bucket[0]
        self.assertEqual(data, 0)
Ejemplo n.º 3
0
class TestStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash',
                                                       bucket_key), [])
Ejemplo n.º 4
0
    def __init__(self):
        redis_object = redis.Redis(host='localhost', port=6379, db=0)
        redis_storage = RedisStorage(redis_object)

        # Get hash config from redis
        config = redis_storage.load_hash_configuration('MyHash')

        if config is None:
            # Config is not existing, create hash from scratch, with 5 projections
            self.lshash = RandomBinaryProjections('MyHash', 5)
        else:
            # Config is existing, create hash with None parameters
            self.lshash = RandomBinaryProjections(None, None)
            # Apply configuration loaded from redis
            self.lshash.apply_config(config)
        # print("HERE")

        # Create engine for feature space of 100 dimensions and use our hash.
        # This will set the dimension of the lshash only the first time, not when
        # using the configuration loaded from redis. Use redis storage to store
        # buckets.
        self.engine = Engine(4, lshashes=[self.lshash], storage=redis_storage)
        redis_storage.store_hash_configuration(self.lshash)
Ejemplo n.º 5
0
class NearestNeighbourFinder(object):
    """
    Using an already-projected corpus in the form of a VectorCorpus, allow easy queries to find nearest
    neighbour event chains in the corpus, given a new event chain.

    """
    def __init__(self, model_type, model_name, hash, corpus_path, with_events=False):
        self.hash = hash
        self.corpus_path = corpus_path
        self.model_type = model_type
        self.model_name = model_name
        self.with_events = with_events

        self.model = None
        self.search_engine = None

    def init_engine(self, redis_port=6379):
        # Need to load the model to get information about it
        self.model = NarrativeChainModel.load_by_type(self.model_type, self.model_name)
        vector_size = self.model.vector_size

        # Point the Redis server to the model's database
        db_filename = "vectors.rdb"
        model_dir = self.model.get_model_directory(self.model_name)

        # Prepare an engine for reading vectors from
        try:
            redis = Redis(host='localhost', port=redis_port, db=0)
        except ConnectionError, e:
            raise RuntimeError("could not connect to redis server on port %s. Is it running? (%s)" % (redis_port, e))
        # Set the storage location to be in the model's directory/file
        redis.config_set("dbfilename", "vectors.rdb")
        redis.config_set("dir", model_dir)

        redis_storage = RedisStorage(redis)
        self.search_engine = Engine(vector_size, lshashes=[self.hash], storage=redis_storage,
                                    fetch_vector_filters=[UniqueVectorFilter()], vector_filters=[])
Ejemplo n.º 6
0
Archivo: api.py Proyecto: Kommiu/FaceID
    transforms.ToPILImage(),
    transforms.Resize(64),
    transforms.CenterCrop(64),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

dimension = 512

r = redis.Redis(
    host='redis',
    port=6379,
    # charset='utf-8',
    # decode_responses=True,
)
redis_storage = RedisStorage(r)
# Get hash config from redis
config = redis_storage.load_hash_configuration('MyHash')
if config is None:
    # Config is not existing, create hash from scratch, with 10 projections
    lshash = RandomBinaryProjections('MyHash', 50)
else:
    # Config is existing, create hash with None parameters
    lshash = RandomBinaryProjections(None, None)
    # Apply configuration loaded from redis
    lshash.apply_config(config)

# Create engine for feature space of 100 dimensions and use our hash.
# This will set the dimension of the lshash only the first time, not when
# using the configuration loaded from redis. Use redis storage to store
# buckets.
Ejemplo n.º 7
0
import web

from nearpy import Engine
from nearpy.hashes import RandomBinaryProjections

from redis import Redis
from nearpy.storage import RedisStorage

# Dimension of our vector space
dimension = 68

# Create a random binary hash with 10 bits
rbp = RandomBinaryProjections('rbp', 10)

# Create engine with pipeline configuration
redis_storage = RedisStorage(Redis(host='localhost', port=6379, db=0))
engine = Engine(dimension, lshashes=[rbp], storage=redis_storage)
db = web.database(dbn='sqlite', db='sorch.db')


def insertVector(id, vector):
    engine.store_vector(vector, id)


def getNN(vector):
    return engine.neighbours(vector)


def insertMetadata(id, url, artist, title, length):
    db.insert(id=id, url=url, artist=artist, title=title, length=length)
Ejemplo n.º 8
0
 def setUp(self):
     self.storage = RedisStorage(Redis())
     super(RedisStorageTest, self).setUp()
Ejemplo n.º 9
0
class TestHashStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_hash_memory_storage_none_config(self):
        conf = self.memory.load_hash_configuration('nonexistentHash')

        self.assertIsNone(conf)

    def test_hash_memory_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_memory_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_none_config(self):
        conf = self.redis_storage.load_hash_configuration('nonexistentHash')

        self.assertIsNone(conf)

    def test_hash_redis_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])
Ejemplo n.º 10
0
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
Ejemplo n.º 11
0
class TestStorage(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash', bucket_key),
                         [])

    def test_redis_storage_sparse(self):
        self.redis_storage.clean_all_buckets()
        x = scipy.sparse.rand(100, 1, density=0.1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(type(x), type(y))
        self.assertEqual(x.shape[0], y.shape[0])
        self.assertEqual(x.shape[1], y.shape[1])
        self.assertTrue((y - x).sum() == 0.0)
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash', bucket_key),
                         [])
Ejemplo n.º 12
0
def redisStorage(host='localhost'):
    return RedisStorage(Redis(host=host, port=6379, db=0))
from nearpy import Engine
from nearpy.hashes import RandomBinaryProjections
from nearpy.storage import RedisStorage
import deepranking.files as files
from keras.preprocessing.image import load_img, img_to_array
import pandas as pd
import numpy as np
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model
from deepranking.fashion_utils import triplet_loss_adapted_from_tf
from keras_applications.resnext import preprocess_input
import keras
from redis import Redis

redis_object = Redis(host='localhost', port=6379, db=0)
redis_storage = RedisStorage(redis_object)

config = redis_storage.load_hash_configuration('MyHash')

lshash = RandomBinaryProjections(None, None)
lshash.apply_config(config)
engine = Engine(4096, lshashes=[lshash], storage=redis_storage)

redis_object.close()


class FashionSimilarity:
    def __init__(self):
        self.model = load_model((files.output_directory /
                                 'onlinemining_loss.h5').absolute().as_posix(),
                                custom_objects={
Ejemplo n.º 14
0
class TestHashStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_hash_memory_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_memory_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])
Ejemplo n.º 15
0
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
     numpy.random.seed(16)
Ejemplo n.º 16
0
class TestRandomBinaryProjectionTree(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        #print 'Indexing...'
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        #print 'Querying...'
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            #print "Candidate count = %d" % self.engine.candidate_count(x)
            #print "Result size = %d" % len(n)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)


        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
Ejemplo n.º 17
0
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis(host='localhost',
                               port=6379, db=0)
     self.redis_storage = RedisStorage(self.redis_object)
Ejemplo n.º 18
0
class TestStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_memory_storage(self):
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.memory.store_vector('testHash', bucket_key, x, x_data)
        X = self.memory.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.memory.clean_all_buckets()
        self.assertEqual(self.memory.get_bucket('testHash', bucket_key), [])

    def test_redis_storage(self):
        self.redis_storage.clean_all_buckets()
        x = numpy.random.randn(100, 1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(len(y), len(x))
        self.assertEqual(type(x), type(y))
        for k in range(100):
            self.assertEqual(y[k], x[k])
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash',
                                                       bucket_key), [])

    def test_redis_storage_sparse(self):
        self.redis_storage.clean_all_buckets()
        x = scipy.sparse.rand(100, 1, density=0.1)
        bucket_key = '23749283743928748'
        x_data = ['one', 'two', 'three']
        self.redis_storage.store_vector('testHash', bucket_key, x, x_data)
        X = self.redis_storage.get_bucket('testHash', bucket_key)
        self.assertEqual(len(X), 1)
        y = X[0][0]
        y_data = X[0][1]
        self.assertEqual(type(x), type(y))
        self.assertEqual(x.shape[0], y.shape[0])
        self.assertEqual(x.shape[1], y.shape[1])
        self.assertTrue((y - x).sum() == 0.0)
        self.assertEqual(type(y_data), type(x_data))
        self.assertEqual(len(y_data), len(x_data))
        for k in range(3):
            self.assertEqual(y_data[k], x_data[k])
        self.redis_storage.clean_all_buckets()
        self.assertEqual(self.redis_storage.get_bucket('testHash',
                                                       bucket_key), [])
Ejemplo n.º 19
0
class TestRandomBinaryProjectionTree(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)
        numpy.random.seed(16)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data {}'.format(k)
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(
            self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
Ejemplo n.º 20
0
import json
import numpy as np
import cPickle as pickle

from nearpy import Engine
from nearpy.hashes import RandomBinaryProjections
from nearpy.storage import RedisStorage
from redis import Redis

from ne import CosineSim

dimension = 100

with open("hndbow.index2word", 'r') as f:
    index2words = json.load(f)

wordvecs = np.load("hndbow.syn0.npy")

redis_storage = RedisStorage(Redis(host='localhost', port=6379, db=3))

lshash = RandomBinaryProjections('WordHash', 5, rand_seed=123)

engine = Engine(dimension, distance=CosineSim(), lshashes=[lshash], storage=redis_storage)

for i,w in enumerate(index2words):
    vec = wordvecs[i] # 1x100 nparray
    engine.store_vector(vec, w)

redis_storage.store_hash_configuration(lshash)
Ejemplo n.º 21
0
 def setUp(self):
     self.memory = MemoryStorage()
     self.redis_object = Redis()
     self.redis_storage = RedisStorage(self.redis_object)
Ejemplo n.º 22
0
        log.info("Preparing neighbour search hash")
        # Create binary hash
        binary_hash = RandomBinaryProjections("%s:%s_binary_hash" % (model_type, model_name), hash_size)

        log.info("Connecting to Redis server on port %d" % redis_port)
        # Prepare an engine for storing the vectors in
        try:
            redis = Redis(host='localhost', port=redis_port, db=0)
        except ConnectionError, e:
            raise RuntimeError("could not connect to redis server on port %s. Is it running? (%s)" % (redis_port, e))
        # Set the storage location to be in the model's directory
        redis.config_set("dbfilename", "vectors.rdb")
        redis.config_set("dir", model_dir)
        # Use this as the storage engine for the nearest-neighbour index
        redis_storage = RedisStorage(redis)
        search_engine = Engine(vector_size, lshashes=[binary_hash], storage=redis_storage)

        for vector, source, chain in VectorCorpus.project_from_docs(corpus,
                                                                    model_type,
                                                                    model_name,
                                                                    progress=progress,
                                                                    buffer_size=10000,
                                                                    project_events=project_events,
                                                                    filter_chains=filter_chains):
            data = (source, chain) if include_events else source
            search_engine.store_vector(vector, data)

        finder = NearestNeighbourFinder(model_type, model_name, binary_hash, corpus.directory,
                                        with_events=include_events)
        log.info("Storing finder in %s" % os.path.join(model_dir, "neighbour_finder"))