def test_runnable(self): # First index some random vectors matrix = numpy.zeros((1000,200)) for i in xrange(1000): v = numpy.random.randn(200) matrix[i] = v self.engine.store_vector(v) self.engine_perm.store_vector(v) # Then update permuted index self.permutations.build_permuted_index() # Do random query on engine with permutations meta-hash print '\nNeighbour distances with permuted index:' query = numpy.random.randn(200) results = self.engine_perm.neighbours(query) dists = [x[2] for x in results] print dists # Do random query on engine without permutations meta-hash print '\nNeighbour distances without permuted index (distances should be larger):' results = self.engine.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1,200)) dists = CosineDistance().distance_matrix(matrix,query) dists = dists.reshape((-1,)) dists = sorted(dists) print dists[:10]
def test_runnable(self): # First index some random vectors matrix = numpy.zeros((1000, 200)) for i in xrange(1000): v = numpy.random.randn(200) matrix[i] = v self.engine.store_vector(v) self.engine_perm.store_vector(v) # Then update permuted index self.permutations.build_permuted_index() # Do random query on engine with permutations meta-hash print '\nNeighbour distances with permuted index:' query = numpy.random.randn(200) results = self.engine_perm.neighbours(query) dists = [x[2] for x in results] print dists # Do random query on engine without permutations meta-hash print '\nNeighbour distances without permuted index (distances should be larger):' results = self.engine.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1, 200)) dists = CosineDistance().distance_matrix(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print dists[:10]
def example2(): # Dimension of feature space DIM = 100 # Number of data points (dont do too much because of exact search) POINTS = 20000 ########################################################## print 'Performing indexing with HashPermutations...' t0 = time.time() # Create permutations meta-hash permutations = HashPermutations('permut') # Create binary hash as child hash rbp_perm = RandomBinaryProjections('rbp_perm', 14) rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100} # Add rbp as child hash of permutations hash permutations.add_child_hash(rbp_perm, rbp_conf) # Create engine engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS,DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_perm.store_vector(v) # Then update permuted index permutations.build_permuted_index() t1 = time.time() print 'Indexing took %f seconds' % (t1-t0) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 3 print '\nNeighbour distances with HashPermutations:' print ' -> Candidate count is %d' % engine_perm.candidate_count(query) results = engine_perm.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1,DIM)) dists = CosineDistance().distance_matrix(matrix,query) dists = dists.reshape((-1,)) dists = sorted(dists) print dists[:10] ########################################################## print '\nPerforming indexing with HashPermutationMapper...' t0 = time.time() # Create permutations meta-hash permutations2 = HashPermutationMapper('permut2') # Create binary hash as child hash rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14) # Add rbp as child hash of permutations hash permutations2.add_child_hash(rbp_perm2) # Create engine engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS,DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_perm2.store_vector(v) t1 = time.time() print 'Indexing took %f seconds' % (t1-t0) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 4 print '\nNeighbour distances with HashPermutationMapper:' print ' -> Candidate count is %d' % engine_perm2.candidate_count(query) results = engine_perm2.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1,DIM)) dists = CosineDistance().distance_matrix(matrix,query) dists = dists.reshape((-1,)) dists = sorted(dists) print dists[:10] ########################################################## print '\nPerforming indexing with mutliple binary hashes...' t0 = time.time() hashes = [] for k in range(20): hashes.append(RandomBinaryProjections('rbp_%d' % k, 10)) # Create engine engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS,DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_rbps.store_vector(v) t1 = time.time() print 'Indexing took %f seconds' % (t1-t0) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 4 print '\nNeighbour distances with mutliple binary hashes:' print ' -> Candidate count is %d' % engine_rbps.candidate_count(query) results = engine_rbps.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1,DIM)) dists = CosineDistance().distance_matrix(matrix,query) dists = dists.reshape((-1,)) dists = sorted(dists) print dists[:10]
def example1(): # Dimension of feature space DIM = 100 # Number of data points (dont do too much because of exact search) POINTS = 10000 print('Creating engines') # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('rbpt', 20, 20) # Create engine 1 engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance()) # Create binary hash as child hash rbp = RandomBinaryProjections('rbp1', 20) # Create engine 2 engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance()) # Create permutations meta-hash permutations = HashPermutations('permut') # Create binary hash as child hash rbp_perm = RandomBinaryProjections('rbp_perm', 20) rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100} # Add rbp as child hash of permutations hash permutations.add_child_hash(rbp_perm, rbp_conf) # Create engine 3 engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance()) # Create permutations meta-hash permutations2 = HashPermutationMapper('permut2') # Create binary hash as child hash rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12) # Add rbp as child hash of permutations hash permutations2.add_child_hash(rbp_perm2) # Create engine 3 engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance()) print('Indexing %d random vectors of dimension %d' % (POINTS, DIM)) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i, :] = nearpy.utils.utils.unitvec(v) engine.store_vector(v, i) engine_rbpt.store_vector(v, i) engine_perm.store_vector(v, i) engine_perm2.store_vector(v, i) print('Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys())) print('Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys())) print('Building permuted index for HashPermutations') # Then update permuted index permutations.build_permuted_index() print('Generate random data') # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 1 print('\nNeighbour distances with RandomBinaryProjectionTree:') print(' -> Candidate count is %d' % engine_rbpt.candidate_count(query)) results = engine_rbpt.neighbours(query) print_results(results) # Do random query on engine 2 print('\nNeighbour distances with RandomBinaryProjections:') print(' -> Candidate count is %d' % engine.candidate_count(query)) results = engine.neighbours(query) print_results(results) # Do random query on engine 3 print('\nNeighbour distances with HashPermutations:') print(' -> Candidate count is %d' % engine_perm.candidate_count(query)) results = engine_perm.neighbours(query) print_results(results) # Do random query on engine 4 print('\nNeighbour distances with HashPermutations2:') print(' -> Candidate count is %d' % engine_perm2.candidate_count(query)) results = engine_perm2.neighbours(query) print_results(results) # Real neighbours print('\nReal neighbour distances:') query = nearpy.utils.utils.unitvec(query) query = query.reshape((DIM, 1)) dists = CosineDistance().distance(matrix, query) dists = dists.reshape((-1, )) # dists = sorted(dists) dists_argsort = numpy.argsort(dists) results = [(None, d, dists[d]) for d in dists_argsort[:10]] print_results(results)
def example2(): # Dimension of feature space DIM = 100 # Number of data points (dont do too much because of exact search) POINTS = 20000 ########################################################## print 'Performing indexing with HashPermutations...' t0 = time.time() # Create permutations meta-hash permutations = HashPermutations('permut') # Create binary hash as child hash rbp_perm = RandomBinaryProjections('rbp_perm', 14) rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100} # Add rbp as child hash of permutations hash permutations.add_child_hash(rbp_perm, rbp_conf) # Create engine engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_perm.store_vector(v) # Then update permuted index permutations.build_permuted_index() t1 = time.time() print 'Indexing took %f seconds' % (t1 - t0) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 3 print '\nNeighbour distances with HashPermutations:' print ' -> Candidate count is %d' % engine_perm.candidate_count(query) results = engine_perm.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1, DIM)) dists = CosineDistance().distance_matrix(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print dists[:10] ########################################################## print '\nPerforming indexing with HashPermutationMapper...' t0 = time.time() # Create permutations meta-hash permutations2 = HashPermutationMapper('permut2') # Create binary hash as child hash rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14) # Add rbp as child hash of permutations hash permutations2.add_child_hash(rbp_perm2) # Create engine engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_perm2.store_vector(v) t1 = time.time() print 'Indexing took %f seconds' % (t1 - t0) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 4 print '\nNeighbour distances with HashPermutationMapper:' print ' -> Candidate count is %d' % engine_perm2.candidate_count(query) results = engine_perm2.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1, DIM)) dists = CosineDistance().distance_matrix(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print dists[:10] ########################################################## print '\nPerforming indexing with mutliple binary hashes...' t0 = time.time() hashes = [] for k in range(20): hashes.append(RandomBinaryProjections('rbp_%d' % k, 10)) # Create engine engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_rbps.store_vector(v) t1 = time.time() print 'Indexing took %f seconds' % (t1 - t0) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 4 print '\nNeighbour distances with mutliple binary hashes:' print ' -> Candidate count is %d' % engine_rbps.candidate_count(query) results = engine_rbps.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1, DIM)) dists = CosineDistance().distance_matrix(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print dists[:10]
def example1(): # Dimension of feature space DIM = 100 # Number of data points (dont do too much because of exact search) POINTS = 10000 print 'Creating engines' # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('rbpt', 20, 20) # Create engine 1 engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance()) # Create binary hash as child hash rbp = RandomBinaryProjections('rbp1', 20) # Create engine 2 engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance()) # Create permutations meta-hash permutations = HashPermutations('permut') # Create binary hash as child hash rbp_perm = RandomBinaryProjections('rbp_perm', 20) rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100} # Add rbp as child hash of permutations hash permutations.add_child_hash(rbp_perm, rbp_conf) # Create engine 3 engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance()) # Create permutations meta-hash permutations2 = HashPermutationMapper('permut2') # Create binary hash as child hash rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12) # Add rbp as child hash of permutations hash permutations2.add_child_hash(rbp_perm2) # Create engine 3 engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance()) print 'Indexing %d random vectors of dimension %d' % (POINTS, DIM) # First index some random vectors matrix = numpy.zeros((POINTS,DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine.store_vector(v) engine_rbpt.store_vector(v) engine_perm.store_vector(v) engine_perm2.store_vector(v) print 'Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys()) print 'Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys()) print 'Building permuted index for HashPermutations' # Then update permuted index permutations.build_permuted_index() print 'Generate random data' # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 1 print '\nNeighbour distances with RandomBinaryProjectionTree:' print ' -> Candidate count is %d' % engine_rbpt.candidate_count(query) results = engine_rbpt.neighbours(query) dists = [x[2] for x in results] print dists # Do random query on engine 2 print '\nNeighbour distances with RandomBinaryProjections:' print ' -> Candidate count is %d' % engine.candidate_count(query) results = engine.neighbours(query) dists = [x[2] for x in results] print dists # Do random query on engine 3 print '\nNeighbour distances with HashPermutations:' print ' -> Candidate count is %d' % engine_perm.candidate_count(query) results = engine_perm.neighbours(query) dists = [x[2] for x in results] print dists # Do random query on engine 4 print '\nNeighbour distances with HashPermutations2:' print ' -> Candidate count is %d' % engine_perm2.candidate_count(query) results = engine_perm2.neighbours(query) dists = [x[2] for x in results] print dists # Real neighbours print '\nReal neighbour distances:' query = query.reshape((1,DIM)) dists = CosineDistance().distance_matrix(matrix,query) dists = dists.reshape((-1,)) dists = sorted(dists) print dists[:10]