def example1(): # Dimension of feature space DIM = 100 # Number of data points (dont do too much because of exact search) POINTS = 10000 print('Creating engines') # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('rbpt', 20, 20) # Create engine 1 engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance()) # Create binary hash as child hash rbp = RandomBinaryProjections('rbp1', 20) # Create engine 2 engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance()) # Create permutations meta-hash permutations = HashPermutations('permut') # Create binary hash as child hash rbp_perm = RandomBinaryProjections('rbp_perm', 20) rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100} # Add rbp as child hash of permutations hash permutations.add_child_hash(rbp_perm, rbp_conf) # Create engine 3 engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance()) # Create permutations meta-hash permutations2 = HashPermutationMapper('permut2') # Create binary hash as child hash rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12) # Add rbp as child hash of permutations hash permutations2.add_child_hash(rbp_perm2) # Create engine 3 engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance()) print('Indexing %d random vectors of dimension %d' % (POINTS, DIM)) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in xrange(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine.store_vector(v) engine_rbpt.store_vector(v) engine_perm.store_vector(v) engine_perm2.store_vector(v) print('Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys())) print('Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys())) print('Building permuted index for HashPermutations') # Then update permuted index permutations.build_permuted_index() print('Generate random data') # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 1 print('\nNeighbour distances with RandomBinaryProjectionTree:') print(' -> Candidate count is %d' % engine_rbpt.candidate_count(query)) results = engine_rbpt.neighbours(query) dists = [x[2] for x in results] print(dists) # Do random query on engine 2 print('\nNeighbour distances with RandomBinaryProjections:') print(' -> Candidate count is %d' % engine.candidate_count(query)) results = engine.neighbours(query) dists = [x[2] for x in results] print(dists) # Do random query on engine 3 print('\nNeighbour distances with HashPermutations:') print(' -> Candidate count is %d' % engine_perm.candidate_count(query)) results = engine_perm.neighbours(query) dists = [x[2] for x in results] print(dists) # Do random query on engine 4 print('\nNeighbour distances with HashPermutations2:') print(' -> Candidate count is %d' % engine_perm2.candidate_count(query)) results = engine_perm2.neighbours(query) dists = [x[2] for x in results] print(dists) # Real neighbours print('\nReal neighbour distances:') query = query.reshape((1, DIM)) dists = CosineDistance().distance(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print(dists[:10])
def example2(): # Dimension of feature space DIM = 100 # Number of data points (dont do too much because of exact search) POINTS = 20000 ########################################################## print('Performing indexing with HashPermutations...') t0 = time.time() # Create permutations meta-hash permutations = HashPermutations('permut') # Create binary hash as child hash rbp_perm = RandomBinaryProjections('rbp_perm', 14) rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100} # Add rbp as child hash of permutations hash permutations.add_child_hash(rbp_perm, rbp_conf) # Create engine engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in range(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_perm.store_vector(v) # Then update permuted index permutations.build_permuted_index() t1 = time.time() print('Indexing took %f seconds' % (t1 - t0)) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 3 print('\nNeighbour distances with HashPermutations:') print(' -> Candidate count is %d' % engine_perm.candidate_count(query)) results = engine_perm.neighbours(query) dists = [x[2] for x in results] print(dists) # Real neighbours print('\nReal neighbour distances:') query = query.reshape((DIM)) dists = CosineDistance().distance(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print(dists[:10]) ########################################################## print('\nPerforming indexing with HashPermutationMapper...') t0 = time.time() # Create permutations meta-hash permutations2 = HashPermutationMapper('permut2') # Create binary hash as child hash rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14) # Add rbp as child hash of permutations hash permutations2.add_child_hash(rbp_perm2) # Create engine engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in range(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_perm2.store_vector(v) t1 = time.time() print('Indexing took %f seconds' % (t1 - t0)) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 4 print('\nNeighbour distances with HashPermutationMapper:') print(' -> Candidate count is %d' % engine_perm2.candidate_count(query)) results = engine_perm2.neighbours(query) dists = [x[2] for x in results] print(dists) # Real neighbours print('\nReal neighbour distances:') query = query.reshape((DIM)) dists = CosineDistance().distance(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print(dists[:10]) ########################################################## print('\nPerforming indexing with multiple binary hashes...') t0 = time.time() hashes = [] for k in range(20): hashes.append(RandomBinaryProjections('rbp_%d' % k, 10)) # Create engine engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance()) # First index some random vectors matrix = numpy.zeros((POINTS, DIM)) for i in range(POINTS): v = numpy.random.randn(DIM) matrix[i] = v engine_rbps.store_vector(v) t1 = time.time() print('Indexing took %f seconds' % (t1 - t0)) # Get random query vector query = numpy.random.randn(DIM) # Do random query on engine 4 print('\nNeighbour distances with multiple binary hashes:') print(' -> Candidate count is %d' % engine_rbps.candidate_count(query)) results = engine_rbps.neighbours(query) dists = [x[2] for x in results] print(dists) # Real neighbours print('\nReal neighbour distances:') query = query.reshape((DIM)) dists = CosineDistance().distance(matrix, query) dists = dists.reshape((-1, )) dists = sorted(dists) print(dists[:10])