Ejemplo n.º 1
0
    def setUp(self):
        logging.basicConfig(level=logging.WARNING)
        numpy.random.seed(11)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4, rand_seed=19)
        rbp_conf = {
            'num_permutation': 50,
            'beam_size': 10,
            'num_neighbour': 100
        }

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200,
                                  lshashes=[self.permutations],
                                  distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())
Ejemplo n.º 2
0
def index_user_vectors():
	
	print 'Performing indexing with HashPermutations...'
	
	global engine_perm 
	
	t0 = time.time()
	
	print k_dimen, d_dimen
	
	rbp_perm = RandomBinaryProjections('rbp_perm', d_dimen)
	
	rbp_perm.reset(k_dimen)
	
	# Create permutations meta-hash
	permutations = HashPermutations('permut')
	
	rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':250}
	
        # Add rbp as child hash of permutations hash
	permutations.add_child_hash(rbp_perm, rbp_conf)
	
        # Create engine
        engine_perm = Engine(k_dimen, lshashes=[permutations], distance=CosineDistance())
    
	for u in user_vector:
		
		engine_perm.store_vector(user_vector[u], data=u)
		
	 # Then update permuted index
        permutations.build_permuted_index()
    
	t1 = time.time()
	
	print 'Indexing took %f seconds', (t1-t0)
Ejemplo n.º 3
0
class TestPermutation(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.WARNING)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4)
        rbp_conf = {
            'num_permutation': 50,
            'beam_size': 10,
            'num_neighbour': 100
        }

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200,
                                  lshashes=[self.permutations],
                                  distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())

    def test_runnable(self):

        # First index some random vectors
        matrix = numpy.zeros((1000, 200))
        for i in xrange(1000):
            v = numpy.random.randn(200)
            matrix[i] = v
            self.engine.store_vector(v)
            self.engine_perm.store_vector(v)

        # Then update permuted index
        self.permutations.build_permuted_index()

        # Do random query on engine with permutations meta-hash
        print '\nNeighbour distances with permuted index:'
        query = numpy.random.randn(200)
        results = self.engine_perm.neighbours(query)
        dists = [x[2] for x in results]
        print dists

        # Do random query on engine without permutations meta-hash
        print '\nNeighbour distances without permuted index (distances should be larger):'
        results = self.engine.neighbours(query)
        dists = [x[2] for x in results]
        print dists

        # Real neighbours
        print '\nReal neighbour distances:'
        query = query.reshape((1, 200))
        dists = CosineDistance().distance_matrix(matrix, query)
        dists = dists.reshape((-1, ))
        dists = sorted(dists)
        print dists[:10]
Ejemplo n.º 4
0
class TestPermutation(unittest.TestCase):

    def setUp(self):
        logging.basicConfig(level=logging.WARNING)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4)
        rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200, lshashes=[self.permutations], distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())

    def test_runnable(self):

        # First index some random vectors
        matrix = numpy.zeros((1000,200))
        for i in xrange(1000):
            v = numpy.random.randn(200)
            matrix[i] = v
            self.engine.store_vector(v)
            self.engine_perm.store_vector(v)

        # Then update permuted index
        self.permutations.build_permuted_index()

        # Do random query on engine with permutations meta-hash
        print '\nNeighbour distances with permuted index:'
        query = numpy.random.randn(200)
        results = self.engine_perm.neighbours(query)
        dists = [x[2] for x in results]
        print dists

        # Do random query on engine without permutations meta-hash
        print '\nNeighbour distances without permuted index (distances should be larger):'
        results = self.engine.neighbours(query)
        dists = [x[2] for x in results]
        print dists

        # Real neighbours
        print '\nReal neighbour distances:'
        query = query.reshape((1,200))
        dists = CosineDistance().distance_matrix(matrix,query)
        dists = dists.reshape((-1,))
        dists = sorted(dists)
        print dists[:10]
Ejemplo n.º 5
0
class TestPermutation(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.WARNING)
        numpy.random.seed(11)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4, rand_seed=19)
        rbp_conf = {
            'num_permutation': 50,
            'beam_size': 10,
            'num_neighbour': 100
        }

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200,
                                  lshashes=[self.permutations],
                                  distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())

    def test_runnable(self):

        # First index some random vectors
        matrix = numpy.zeros((1000, 200))
        for i in xrange(1000):
            v = numpy.random.randn(200)
            matrix[i] = v
            self.engine.store_vector(v)
            self.engine_perm.store_vector(v)

        # Then update permuted index
        self.permutations.build_permuted_index()

        # Do random query on engine with permutations meta-hash
        query = numpy.random.randn(200)
        results = self.engine_perm.neighbours(query)
        permuted_dists = [x[2] for x in results]

        # Do random query on engine without permutations meta-hash (distances
        # should be larger):'
        results = self.engine.neighbours(query)
        dists = [x[2] for x in results]

        self.assertLess(permuted_dists[0], dists[0])
Ejemplo n.º 6
0
class lshsearcher:
    def __init__(self):
        self.__dimension = None
        self.__engine_perm = None
        self.__permutations = None

    def _set_confval(self, dimension=None):
        if dimension is None:
            return None
        else:
            self.__dimension = dimension

    def _engine_on(self):
        # Create permutations meta-hash
        self.__permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp_perm = RandomBinaryProjections('rbp_perm', 14)
        rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

        # Add rbp as child hash of permutations hash
        self.__permutations.add_child_hash(rbp_perm, rbp_conf)

        # Create engine
        self.__engine_perm = Engine(self.__dimension, lshashes=[self.__permutations], distance=CosineDistance())

    def conf(self, dimension):
        self._set_confval(dimension)
        self._engine_on()

    def getData(self, v):
        if self.__engine_perm is not None:
            self.__engine_perm.store_vector(v)

    def commitData(self):
        if self.__permutations is not None:
            self.__permutations.build_permuted_index()

    def find(self, v):
        if self.__engine_perm is not None:
            return self.__engine_perm.neighbours(v)
Ejemplo n.º 7
0
def index_user_vectors():

    #print 'Performing indexing with HashPermutations...'

    global engine_perm

    t0 = time.time()

    #print k_dimen, d_dimen

    rbp_perm = RandomBinaryProjections('rbp_perm', d_dimen)

    rbp_perm.reset(k_dimen)

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 250}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine
    engine_perm = Engine(k_dimen,
                         lshashes=[permutations],
                         distance=CosineDistance())

    for u in user_vector:

        engine_perm.store_vector(user_vector[u], data=u)

    # Then update permuted index
    permutations.build_permuted_index()

    t1 = time.time()
Ejemplo n.º 8
0
    def _engine_on(self):
        # Create permutations meta-hash
        self.__permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp_perm = RandomBinaryProjections('rbp_perm', 14)
        rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

        # Add rbp as child hash of permutations hash
        self.__permutations.add_child_hash(rbp_perm, rbp_conf)

        # Create engine
        self.__engine_perm = Engine(self.__dimension, lshashes=[self.__permutations], distance=CosineDistance())
Ejemplo n.º 9
0
    def setUp(self):
        logging.basicConfig(level=logging.WARNING)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4)
        rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200, lshashes=[self.permutations], distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())
Ejemplo n.º 10
0
def example2():

    # Dimension of feature space
    DIM = 100

    # Number of data points (dont do too much because of exact search)
    POINTS = 20000

    ##########################################################

    print 'Performing indexing with HashPermutations...'
    t0 = time.time()

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    # Create binary hash as child hash
    rbp_perm = RandomBinaryProjections('rbp_perm', 14)
    rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine
    engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS,DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_perm.store_vector(v)

    # Then update permuted index
    permutations.build_permuted_index()

    t1 = time.time()
    print 'Indexing took %f seconds' % (t1-t0)

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 3
    print '\nNeighbour distances with HashPermutations:'
    print '  -> Candidate count is %d' % engine_perm.candidate_count(query)
    results = engine_perm.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1,DIM))
    dists = CosineDistance().distance_matrix(matrix,query)
    dists = dists.reshape((-1,))
    dists = sorted(dists)
    print dists[:10]

    ##########################################################

    print '\nPerforming indexing with HashPermutationMapper...'
    t0 = time.time()

    # Create permutations meta-hash
    permutations2 = HashPermutationMapper('permut2')

    # Create binary hash as child hash
    rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14)

    # Add rbp as child hash of permutations hash
    permutations2.add_child_hash(rbp_perm2)

    # Create engine
    engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS,DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_perm2.store_vector(v)

    t1 = time.time()
    print 'Indexing took %f seconds' % (t1-t0)

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 4
    print '\nNeighbour distances with HashPermutationMapper:'
    print '  -> Candidate count is %d' % engine_perm2.candidate_count(query)
    results = engine_perm2.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1,DIM))
    dists = CosineDistance().distance_matrix(matrix,query)
    dists = dists.reshape((-1,))
    dists = sorted(dists)
    print dists[:10]

    ##########################################################

    print '\nPerforming indexing with mutliple binary hashes...'
    t0 = time.time()

    hashes = []
    for k in range(20):
        hashes.append(RandomBinaryProjections('rbp_%d' % k, 10))

    # Create engine
    engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS,DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_rbps.store_vector(v)

    t1 = time.time()
    print 'Indexing took %f seconds' % (t1-t0)

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 4
    print '\nNeighbour distances with mutliple binary hashes:'
    print '  -> Candidate count is %d' % engine_rbps.candidate_count(query)
    results = engine_rbps.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1,DIM))
    dists = CosineDistance().distance_matrix(matrix,query)
    dists = dists.reshape((-1,))
    dists = sorted(dists)
    print dists[:10]
Ejemplo n.º 11
0
def example1():

    # Dimension of feature space
    DIM = 100

    # Number of data points (dont do too much because of exact search)
    POINTS = 10000

    print('Creating engines')

    # We want 12 projections, 20 results at least
    rbpt = RandomBinaryProjectionTree('rbpt', 20, 20)

    # Create engine 1
    engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance())

    # Create binary hash as child hash
    rbp = RandomBinaryProjections('rbp1', 20)

    # Create engine 2
    engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance())

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    # Create binary hash as child hash
    rbp_perm = RandomBinaryProjections('rbp_perm', 20)
    rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine 3
    engine_perm = Engine(DIM,
                         lshashes=[permutations],
                         distance=CosineDistance())

    # Create permutations meta-hash
    permutations2 = HashPermutationMapper('permut2')

    # Create binary hash as child hash
    rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12)

    # Add rbp as child hash of permutations hash
    permutations2.add_child_hash(rbp_perm2)

    # Create engine 3
    engine_perm2 = Engine(DIM,
                          lshashes=[permutations2],
                          distance=CosineDistance())

    print('Indexing %d random vectors of dimension %d' % (POINTS, DIM))

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i, :] = nearpy.utils.utils.unitvec(v)
        engine.store_vector(v, i)
        engine_rbpt.store_vector(v, i)
        engine_perm.store_vector(v, i)
        engine_perm2.store_vector(v, i)

    print('Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys()))
    print('Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys()))

    print('Building permuted index for HashPermutations')

    # Then update permuted index
    permutations.build_permuted_index()

    print('Generate random data')

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 1
    print('\nNeighbour distances with RandomBinaryProjectionTree:')
    print('  -> Candidate count is %d' % engine_rbpt.candidate_count(query))
    results = engine_rbpt.neighbours(query)
    print_results(results)

    # Do random query on engine 2
    print('\nNeighbour distances with RandomBinaryProjections:')
    print('  -> Candidate count is %d' % engine.candidate_count(query))
    results = engine.neighbours(query)
    print_results(results)

    # Do random query on engine 3
    print('\nNeighbour distances with HashPermutations:')
    print('  -> Candidate count is %d' % engine_perm.candidate_count(query))
    results = engine_perm.neighbours(query)
    print_results(results)

    # Do random query on engine 4
    print('\nNeighbour distances with HashPermutations2:')
    print('  -> Candidate count is %d' % engine_perm2.candidate_count(query))
    results = engine_perm2.neighbours(query)
    print_results(results)

    # Real neighbours
    print('\nReal neighbour distances:')
    query = nearpy.utils.utils.unitvec(query)
    query = query.reshape((DIM, 1))
    dists = CosineDistance().distance(matrix, query)
    dists = dists.reshape((-1, ))
    # dists = sorted(dists)

    dists_argsort = numpy.argsort(dists)

    results = [(None, d, dists[d]) for d in dists_argsort[:10]]
    print_results(results)
Ejemplo n.º 12
0
def example2():

    # Dimension of feature space
    DIM = 100

    # Number of data points (dont do too much because of exact search)
    POINTS = 20000

    ##########################################################

    print 'Performing indexing with HashPermutations...'
    t0 = time.time()

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    # Create binary hash as child hash
    rbp_perm = RandomBinaryProjections('rbp_perm', 14)
    rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine
    engine_perm = Engine(DIM,
                         lshashes=[permutations],
                         distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_perm.store_vector(v)

    # Then update permuted index
    permutations.build_permuted_index()

    t1 = time.time()
    print 'Indexing took %f seconds' % (t1 - t0)

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 3
    print '\nNeighbour distances with HashPermutations:'
    print '  -> Candidate count is %d' % engine_perm.candidate_count(query)
    results = engine_perm.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1, DIM))
    dists = CosineDistance().distance_matrix(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print dists[:10]

    ##########################################################

    print '\nPerforming indexing with HashPermutationMapper...'
    t0 = time.time()

    # Create permutations meta-hash
    permutations2 = HashPermutationMapper('permut2')

    # Create binary hash as child hash
    rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14)

    # Add rbp as child hash of permutations hash
    permutations2.add_child_hash(rbp_perm2)

    # Create engine
    engine_perm2 = Engine(DIM,
                          lshashes=[permutations2],
                          distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_perm2.store_vector(v)

    t1 = time.time()
    print 'Indexing took %f seconds' % (t1 - t0)

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 4
    print '\nNeighbour distances with HashPermutationMapper:'
    print '  -> Candidate count is %d' % engine_perm2.candidate_count(query)
    results = engine_perm2.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1, DIM))
    dists = CosineDistance().distance_matrix(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print dists[:10]

    ##########################################################

    print '\nPerforming indexing with mutliple binary hashes...'
    t0 = time.time()

    hashes = []
    for k in range(20):
        hashes.append(RandomBinaryProjections('rbp_%d' % k, 10))

    # Create engine
    engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_rbps.store_vector(v)

    t1 = time.time()
    print 'Indexing took %f seconds' % (t1 - t0)

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 4
    print '\nNeighbour distances with mutliple binary hashes:'
    print '  -> Candidate count is %d' % engine_rbps.candidate_count(query)
    results = engine_rbps.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1, DIM))
    dists = CosineDistance().distance_matrix(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print dists[:10]
Ejemplo n.º 13
0
def example1():

    # Dimension of feature space
    DIM = 100

    # Number of data points (dont do too much because of exact search)
    POINTS = 10000

    print 'Creating engines'

    # We want 12 projections, 20 results at least
    rbpt = RandomBinaryProjectionTree('rbpt', 20, 20)

    # Create engine 1
    engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance())

    # Create binary hash as child hash
    rbp = RandomBinaryProjections('rbp1', 20)

    # Create engine 2
    engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance())

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    # Create binary hash as child hash
    rbp_perm = RandomBinaryProjections('rbp_perm', 20)
    rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine 3
    engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance())

    # Create permutations meta-hash
    permutations2 = HashPermutationMapper('permut2')

    # Create binary hash as child hash
    rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12)

    # Add rbp as child hash of permutations hash
    permutations2.add_child_hash(rbp_perm2)

    # Create engine 3
    engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance())

    print 'Indexing %d random vectors of dimension %d' % (POINTS, DIM)

    # First index some random vectors
    matrix = numpy.zeros((POINTS,DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine.store_vector(v)
        engine_rbpt.store_vector(v)
        engine_perm.store_vector(v)
        engine_perm2.store_vector(v)

    print 'Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys())
    print 'Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys())

    print 'Building permuted index for HashPermutations'

    # Then update permuted index
    permutations.build_permuted_index()

    print 'Generate random data'

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 1
    print '\nNeighbour distances with RandomBinaryProjectionTree:'
    print '  -> Candidate count is %d' % engine_rbpt.candidate_count(query)
    results = engine_rbpt.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Do random query on engine 2
    print '\nNeighbour distances with RandomBinaryProjections:'
    print '  -> Candidate count is %d' % engine.candidate_count(query)
    results = engine.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Do random query on engine 3
    print '\nNeighbour distances with HashPermutations:'
    print '  -> Candidate count is %d' % engine_perm.candidate_count(query)
    results = engine_perm.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Do random query on engine 4
    print '\nNeighbour distances with HashPermutations2:'
    print '  -> Candidate count is %d' % engine_perm2.candidate_count(query)
    results = engine_perm2.neighbours(query)
    dists = [x[2] for x in results]
    print dists

    # Real neighbours
    print '\nReal neighbour distances:'
    query = query.reshape((1,DIM))
    dists = CosineDistance().distance_matrix(matrix,query)
    dists = dists.reshape((-1,))
    dists = sorted(dists)
    print dists[:10]
Ejemplo n.º 14
0
from nearpy import Engine
from nearpy.filters import NearestFilter
from nearpy.distances import CosineDistance
from nearpy.hashes import RandomBinaryProjections
from nearpy.hashes import HashPermutations
from nearpy.hashes import HashPermutationMapper
from nearpy.storage import MemoryStorage
import numpy

dimension = 1000

permutations = HashPermutations('permut')

# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 14)
rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100}

# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)

engine = Engine(dimension,
                lshashes=[permutations],
                distance=CosineDistance(),
                vector_filters=[NearestFilter(5)],
                storage=MemoryStorage())

i = 0

query = numpy.zeros(dimension)

f = open('features2.txt', 'r')