def build_forests(tree_count=250): path = '/home/developer/spaciotesting/' db.testSpacio.update_many({}, {'$unset': {"AnnoyIndex": 1}}) dis_func = 'angular' """ forest for histogram """ t = annoy.AnnoyIndex(696, dis_func) items = db.testSpacio.find({}) for x, item in enumerate(items): v = item['fingerprint'] t.add_item(x, v) db.testSpacio.update_one({'_id': item['_id']}, {'$set': { "AnnoyIndex.fp": x }}) t.build(tree_count) t.save(path + 'histo250.ann') """ forest for spacio """ t = annoy.AnnoyIndex(2304, dis_func) items = db.testSpacio.find({}) for x, item in enumerate(items): v = item['sp'] vector = [] for i in range(len(v)): vector += v[i] t.add_item(x, vector) db.testSpacio.update_one({'_id': item['_id']}, {'$set': { "AnnoyIndex.sp": x }}) t.build(tree_count) t.save(path + 'spacio250.ann')
def prepare_nlp(): nlp = spacy.load('en_core_web_md') # or en_core_web_md qualified = [item for item in nlp.vocab if item.has_vector and item.is_alpha] lexmap = [] t = annoy.AnnoyIndex(300) for i, item in enumerate(islice(sorted(qualified, key=lambda x: x.prob, reverse=True), 100000)): t.add_item(i, item.vector) lexmap.append(item) t.build(25) p = annoy.AnnoyIndex(50) phonmap = [] phonlookup = {} for i, line in enumerate(open("./cmudict-0.7b-simvecs")): word, vec_raw = line.split(" ") word = word.lower().rstrip("(0123)") vec = [float(v) for v in vec_raw.split()] p.add_item(i, vec) phonmap.append(word) phonlookup[word] = vec p.build(25) return nlp, lexmap, phonmap, phonlookup, t, p
def fit(self, Ciu): # delay loading the annoy library in case its not installed here import annoy # train the model super(AnnoyAlternatingLeastSquares, self).fit(Ciu) # build up an Annoy Index with all the item_factors (for calculating # similar items) if self.approximate_similar_items: log.debug("Building annoy similar items index") self.similar_items_index = annoy.AnnoyIndex( self.item_factors.shape[1], 'angular') for i, row in enumerate(self.item_factors): self.similar_items_index.add_item(i, row) self.similar_items_index.build(self.n_trees) # build up a separate index for the inner product (for recommend # methods) if self.approximate_recommend: log.debug("Building annoy recommendation index") self.max_norm, extra = augment_inner_product_matrix( self.item_factors) self.recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular') for i, row in enumerate(extra): self.recommend_index.add_item(i, row) self.recommend_index.build(self.n_trees)
def build_annoy_recommender(als_model: AlternatingLeastSquares, user_labels: np.ndarray, item_labels: np.ndarray, approximate_similar_items=True, approximate_recommend=True, n_trees: int = 1000): # build up an Annoy Index with all the item_factors (for calculating similar items) if approximate_similar_items: log.info("Building annoy similar items index") similar_items_index = annoy.AnnoyIndex( als_model.item_factors.shape[1], 'angular') for i, row in enumerate(als_model.item_factors): similar_items_index.add_item(i, row) similar_items_index.build(n_trees) # build up a separate index for the inner product (for recommend methods) if approximate_recommend: log.info("Building annoy recommendation index") max_norm, extra = augment_inner_product_matrix(als_model.item_factors) recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular') for i, row in enumerate(extra): recommend_index.add_item(i, row) recommend_index.build(n_trees) return ImplicitAnnoyRecommender(als_model, recommend_index=recommend_index, max_norm=max_norm, user_labels=user_labels, item_labels=item_labels)
def match(descriptor1, descriptor2): f = 128 matches = [] t = annoy.AnnoyIndex(f, "euclidean") t.build(1000) nFeatures = len(descriptor1) for j in range(nFeatures): t.add_item(j, descriptor1[j][1]) nFeatures = len(descriptor2) for n in range(nFeatures): fd = descriptor2[n][1] #search for the best match for a feature in other images ind, dist = t.get_nns_by_vector(fd, 2, search_k=-1, include_distances = True) #print("ind, dist", ind, dist) if(dist[1] == 0 or dist[0]/ dist[1] < 0.8): matches.append([descriptor1[ind[0]][0][0], descriptor1[ind[0]][0][1], descriptor2[n][0][0], descriptor2[n][0][1]]) t2 = annoy.AnnoyIndex(f, "euclidean") t2.build(1000) nFeatures = len(descriptor2) for j in range(nFeatures): t2.add_item(j, descriptor2[j][1]) nFeatures = len(descriptor1) for n in range(nFeatures): fd = descriptor1[n][1] #search for the best match for a feature in other images ind2, dist2 = t2.get_nns_by_vector(fd, 2, search_k=-1, include_distances = True) if(dist2[1] == 0 or dist2[0]/dist2[1] < 0.8): matches.append([descriptor1[n][0][0], descriptor1[n][0][1], descriptor2[ind2[0]][0][0], descriptor2[ind2[0]][0][1]]) print("Feature matching complete...") return matches
def build_index(vecs, file_name): t = annoy.AnnoyIndex(512, "dot") for i in range(len(vecs)): t.add_item(i, vecs[i]) t.build(n_trees=100) # tested 100 on bdd, works well, could do more. t.save(file_name) u = annoy.AnnoyIndex(512, "dot") u.load(file_name) # verify can load. return u
def fit(self, X, y=None) -> RandomProjectionTree: """ Build the annoy.Index and insert data from X. Parameters ---------- X: np.array Data to be indexed y: any Ignored Returns ------- self: RandomProjectionTree An instance of RandomProjectionTree with a built index """ if y is None: X = check_array(X) else: X, y = check_X_y(X, y) self.y_train_ = y self.n_samples_fit_ = X.shape[0] self.n_features_ = X.shape[1] self.X_dtype_ = X.dtype if self.metric == 'minkowski': # for compatibility self.metric = 'euclidean' metric = self.metric if self.metric != 'sqeuclidean' else 'euclidean' self.effective_metric_ = metric annoy_index = annoy.AnnoyIndex(X.shape[1], metric=metric) if self.mmap_dir == 'auto': self.annoy_ = create_tempfile_preferably_in_dir( prefix='skhubness_', suffix='.annoy', directory='/dev/shm') logging.warning( f'The index will be stored in {self.annoy_}. ' f'It will NOT be deleted automatically, when this instance is destructed.' ) elif isinstance(self.mmap_dir, str): self.annoy_ = create_tempfile_preferably_in_dir( prefix='skhubness_', suffix='.annoy', directory=self.mmap_dir) else: # e.g. None self.mmap_dir = None if self.verbose: enumerate_X = tqdm( enumerate(X), desc='Build RPtree', total=len(X), ) else: enumerate_X = enumerate(X) for i, x in enumerate_X: annoy_index.add_item(i, x.tolist()) annoy_index.build(self.n_trees) if self.mmap_dir is None: self.annoy_ = annoy_index else: annoy_index.save(self.annoy_, ) return self
def create_index(fn, num_trees=30, verbose=False): fn_annoy = fn + '.annoy' fn_lmdb = fn + '.lmdb' # stores word <-> id mapping word, vec = get_vectors(fn).next() size = len(vec) if verbose: print("Vector size: {}".format(size)) env = lmdb.open(fn_lmdb, map_size=int(1e9)) if not os.path.exists(fn_annoy) or not os.path.exists(fn_lmdb): i = 0 a = annoy.AnnoyIndex(size) with env.begin(write=True) as txn: for word, vec in get_vectors(fn): a.add_item(i, vec) id = 'i%d' % i word = 'w' + word txn.put(id, word) txn.put(word, id) i += 1 if verbose: if i % 1000 == 0: print(i, '...') if verbose: print("Starting to build") a.build(num_trees) if verbose: print("Finished building") a.save(fn_annoy) if verbose: print("Annoy index saved to: {}".format(fn_annoy)) print("lmdb map saved to: {}".format(fn_lmdb)) else: print("Annoy index and lmdb map already in path")
def fit(self,corpus): """ Fit a document similarity model Parameters ---------- corpus : object a corpus object that follows DefaultJsonCorpus Returns ------- trained DocumentSimilarity object """ if self.model_type == 'sklearn_nmf': model = self.create_sklearn_model(corpus) else: model = self.create_gensim_model(corpus) self.index = similarities.Similarity(self.work_folder+"/gensim_index",model,self.vec_size) self.index_annoy = annoy.AnnoyIndex(self.vec_size, metric='angular') for i, vec in enumerate(model): self.index_annoy.add_item(i, list(gensim.matutils.sparse2full(vec, self.vec_size).astype(float))) self.index_annoy.build(self.annoy_trees) self.seq2meta = {} self.id2meta = {} for j in corpus.get_meta(): self.seq2meta[j['corpus_seq_id']] = j self.id2meta[j['id']] = j return self
def load( self, ann_path: str, annoy_data_dimensionality: Optional[int] = None, annoy_mertic: Optional[str] = None, annoy_prefault: bool = False, ) -> None: """ Loads an approximate nearest neighbour (ANN) instance from disk. Parameters ---------- ann_path : str Path of saved ANN instance (directory if ann_alg is "scann", filepath otherwise). annoy_data_dimensionality : int, optional Dimensionality of data (required if ann_alg is set to "annoy"). annoy_mertic : str, optional Distance metric (required if ann_alg is set to "annoy"). annoy_prefault : bool, optional Whether or not to enable the `prefault` option when loading Annoy index (defaults to False). """ if self._ann_alg == "scann": self._ann_index = scann.scann_ops_pybind.load_searcher(ann_path) elif self._ann_alg == "annoy": self._ann_index = annoy.AnnoyIndex(f=annoy_data_dimensionality, metric=annoy_mertic) self._ann_index.load(fn=ann_path, prefault=annoy_prefault)
def __build_index(self, index_file): self.embedding_size = self.embeddings.shape[1] self.index = an.AnnoyIndex(self.embedding_size, metric='angular') for embedding_ind in range(self.embeddings.shape[0]): embedding = self.embeddings[embedding_ind, :] self.index.add_item(embedding_ind, embedding) self.index.build(self.n_trees) if self.id_map is None: self.id_map = dict([(i, i) for i in range(self.embeddings.shape[0])]) self.inverse_id_map = dict([(v, k) for k, v in self.id_map.items()]) if index_file: embeddings_file = index_file + '.embeddings' state = { 'embedding_size': self.embedding_size, 'id_map': self.id_map, } self.index.save(embeddings_file) with open(index_file, 'wb') as _index_file: pickle.dump(state, _index_file)
def _build_index(self, documents, encodings): self.annoy_index = annoy.AnnoyIndex(self.dimension, metric=self.metric) for i, (document, encoding) in enumerate(zip(documents, encodings)): self.annoy_index.add_item(i, encoding) self.document_ids.append(document["id"]) self.annoy_index.build(self.num_trees) self._save_annoy_index()
def read(self): with open(self.pathIds, 'rb') as f: data = cPickle.load(f) n = data[0] self.ids = data[1:] self.index = annoy.AnnoyIndex(n) self.index.load(self.pathAnnoy)
def __init__(self): self.tree = annoy.AnnoyIndex(Amadeus.ANNOY_WORD_EMBADDING_NUM) self.tree.load(Amadeus.ANNOY_TREE) self.dic_keys = np.load(Amadeus.ANNOY_DICT_KEYS) self.ws = Amadeus.brain.wordseg.base_wordseg.JiebaSeg() self.w2v = W2V() self.dic = np.load(Amadeus.ANNOY_DICT).item()
def __init__(self, gensim_emb, texts, trees_n=10): self.gensim_emb = gensim_emb self.morph = pymorphy2.MorphAnalyzer() self.tag_conv = converters.converter('opencorpora-int', 'ud20') self.tag_cache = {} self.id2text = list(sorted(set(texts))) textid2tokens = [[ tok + '_' + self.get_tag(tok) for tok in txt.split(' ') ] for txt in self.id2text] tokenid2token = [ tok for tok in sorted( set(tok for txt_toks in textid2tokens for tok in txt_toks)) if tok in self.gensim_emb.vocab ] token2tokenid = {tok: i for i, tok in enumerate(tokenid2token)} self.tokenid2vec = [self.gensim_emb[tok] for tok in tokenid2token] self.tokenid2textid = collections.defaultdict(set) self.text2tokenid = collections.defaultdict(set) for txt_i, txt_toks in enumerate(textid2tokens): txt = self.id2text[txt_i] for tok in txt_toks: tok_id = token2tokenid.get(tok, None) if tok_id is not None: self.tokenid2textid[tok_id].add(txt_i) self.text2tokenid[txt].add(tok_id) self.vector_idx = annoy.AnnoyIndex(self.gensim_emb.vectors.shape[1], 'angular') for tok_i, tok_vec in enumerate(self.tokenid2vec): self.vector_idx.add_item(tok_i, tok_vec) self.vector_idx.build(trees_n)
def build_index(self): i = 0 print "Building Index ...." ann = annoy.AnnoyIndex(self.size, self.metric) with self.env.begin(write=True) as txn: for word, vec in get_vectors_file(self.fname): # add the vector to annoy index ann.add_item(i, vec) # use the same id to point to word id = 'i%d' % i # make it a string word = 'w' + word # index by id txn.put(id, word) # index by word txn.put(word, id) i += 1 # print the progress if i % 1000 == 0: print i, "..." # build the forest of trees. More trees give higher precision when querying ann.build(self.number_of_trees) # save the index to disk ann.save(self.anndb) # load the new index self.ann.load(self.anndb) return "Built ann index of size: {}, and loaded it in memory".format(i)
def create_index(self, data_paths: List[RichPath], metadata: Dict[str, Any]) -> None: def representation_iter(): data_chunk_iterator = (r.read_by_file_suffix() for r in data_paths) with self.__model.sess.as_default(): for raw_data_chunk in data_chunk_iterator: for raw_sample in raw_data_chunk: loaded_sample = {} use_example = self.__model._load_data_from_sample( self.__model.hyperparameters, self.__model.metadata, raw_sample=raw_sample, result_holder=loaded_sample, is_train=False) if not use_example: continue _, fetches = self.__model._run_epoch_in_batches( loaded_sample, '(indexing)', is_train=False, quiet=True, additional_fetch_dict={ 'target_representations': self.__model.ops['target_representations'] }) target_representations = fetches[ 'target_representations'] idx = 0 for node_idx, annotation_data in raw_sample[ 'supernodes'].items(): node_idx = int(node_idx) if 'ignored_supernodes' in loaded_sample and node_idx in loaded_sample[ 'ignored_supernodes']: continue annotation = annotation_data['annotation'] if ignore_type_annotation(annotation): idx += 1 continue yield target_representations[idx], annotation idx += 1 index = annoy.AnnoyIndex(self.__type_representation_size, 'manhattan') indexed_element_types = [] logging.info('Creating index...') for i, (representation, type) in enumerate(representation_iter()): index.add_item(i, representation) indexed_element_types.append(type) logging.info('Indexing...') index.build(20) logging.info('Index Created.') with tempfile.NamedTemporaryFile() as f: index.save(f.name) with open(f.name, 'rb') as fout: metadata['index'] = fout.read() metadata['indexed_element_types'] = indexed_element_types
def setup(self, data: torch.Tensor) -> None: """ `data` denotes the "stored tensors". These are the tensors within which we want to find the nearest neighbors to a query tensor, via a call to the `get_nearest_neighbors` method. Before we can call `get_nearest_neighbors`, we need to first store the stored tensors, by doing processing that indexes the stored tensors in a form that enables nearest-neighbors computation. This method does that preprocessing, and is assumed to be called before any call to `get_nearest_neighbors`. In particular, it creates the trees used to index the stored tensors. This index is built to enable computation of vectors that have the largest dot-product with the query tensors. The tensors in the "stored tensors" can be of a common, but arbitrary shape, denoted *, so that `data` is of shape (N, *), where N is the number of tensors in the stored tensors. Therefore, the 0-th dimension indexes the tensors in the stored tensors. Args: data (tensor): A tensor of shape (N, *) representing the stored tensors. The 0-th dimension indexes the tensors in the stored tensors, so that `data[i]` is the tensor with index `i`. The nearest neighbors of a query will be referred to by their index. """ import annoy data = data.view((len(data), -1)) projection_dim = data.shape[1] self.knn_index = annoy.AnnoyIndex(projection_dim, "dot") for (i, projection) in enumerate(data): self.knn_index.add_item(i, projection) self.knn_index.build(self.num_trees)
def match(descriptors): num_image = len(descriptors) f = 128 trees = [] for i in range(num_image): t = annoy.AnnoyIndex(f, "euclidean") t.build(500) nFeatures = len(descriptors[i]) for j in range(nFeatures): t.add_item(j, descriptors[i][j][1]) trees.append(t) best_matches = [] for i in range(num_image): best_match = [] nFeatures = len(descriptors[i]) for n in range(nFeatures): best = np.array([-1] * num_image) fd = descriptors[i][n][1] #search for the best match for a feature in other images for j in range(num_image): #skip the same image if (i == j): continue #other point ind, dist = t.get_nns_by_vector(fd, 2, search_k=-1, include_distances=True) #print("ind, dist", ind, dist) if (dist[1] == 0 or dist[0] / dist[1] < 0.8): best[j] = ind[0] best_match.append(best) best_matches.append(best_match) return best_matches printf("Feature matching complete...")
def __init__(self, tree_path, database_path): self.model = annoy.AnnoyIndex(128, "angular") self.model.load(tree_path) with open(database_path, "r") as f: self.database = json.load(f)
def build(self, data, k, cp): n_items, vector_length = data.shape #initalize parameters self.method_param = init_method_param("annoy", data=data, cp=cp) ntrees = self.method_param["n_trees"] #build index self.index = annoy.AnnoyIndex(vector_length, metric=self.metric) for i in range(n_items): self.index.add_item(i, data[i]) self.index.build(ntrees) # def query_train(self, data, k): #add search_k parameter: tradeoff between speed and accuracy? #neighbors_single, distances_single = np.asarray(self.index.get_nns_by_vector(data[i], n=k, search_k=-1, include_distances=True)) #output array with points x neighbors: neighbors = np.empty((data.shape[0], k), dtype=int) distances = np.empty((data.shape[0], k)) for i in range(len(data)): neighbors_single, distances_single = np.asarray( self.index.get_nns_by_item(i, n=k, search_k=-1, include_distances=True)) neighbors[i] = neighbors_single distances[i] = distances_single #print("neighbors.shape: {}".format(neighbors.shape)) #print("neighbors[0]: {}".format(neighbors[0])) #print(neighbors.shape) #print("distances.shape: {}".format(distances.shape)) #print("distances[0]: {}".format(distances[0])) return neighbors, distances
def fit(self, X): self.n_samples_fit_ = X.shape[0] self.annoy_ = annoy.AnnoyIndex(X.shape[1], metric=self.metric) for i, x in enumerate(X): self.annoy_.add_item(i, x.tolist()) self.annoy_.build(self.n_trees) return self
def __init__( self, module_url, index_file_path, mapping_file_path, dimensions, random_projection_matrix_file, ): # Load the TF-Hub module print('Loading the TF-Hub module...') self.embed_fn = hub.load(module_url) print('TF-hub module is loaded.') dimensions = self.embed_fn(['']).shape[1] self.random_projection_matrix = None if tf.io.gfile.exists(random_projection_matrix_file): with open(random_projection_matrix_file, 'rb') as handle: self.random_projection_matrix = pickle.load(handle) dimensions = self.random_projection_matrix.shape[1] self.index = annoy.AnnoyIndex(dimensions, metric=_METRIC) self.index.load(index_file_path, prefault=True) print('Annoy index is loaded.') with open(mapping_file_path, 'rb') as handle: self.mapping = pickle.load(handle) print('Mapping file is loaded.')
def __create_annoy_index(self, data, space='angular', n_trees=30, load=True, filepath=None, save=False): """ Create or Load Approximate Nearest Neighbors index Args: data (array): Thread word vectors space (str): Distance (metric) function can be "angular", "euclidean", "manhattan", "hamming", or "dot" n_trees (int): Number of trees in a forest. More trees gives higher precision when querying. load (boolean): Load model (True) -- Create model (False) filepath (str): Path to Nearest Neighbors save (boolean): Save model (True) -- Only used if load=False Returns: index (object): Annoy object """ index = annoy.AnnoyIndex(self.dim, metric=space) if load: # only need to init if index is saved index.load(filepath) else: for i, vect in enumerate(data): # add data index.add_item(i, vect) # build moel index.build(n_trees) # save indexes if save: index.save(filepath) return index
def musicnn_penultimate(args): import musicnn.extractor import numpy as np index = annoy.AnnoyIndex(200, 'euclidean') def slow_embed(z): i, wav = z try: x = musicnn.extractor.extractor(str(wav)) with open(f'musicnn/{wav.stem}.pickle', 'wb') as f: pickle.dump(x, f) x = x[2]['penultimate'].mean(axis=0) x = x / np.linalg.norm(x) return i, x except UnboundLocalError: return i, None results = Parallel(n_jobs=6)( delayed(slow_embed)(z) for z in tqdm(enumerate(sorted(args.wav_dir.glob('*.wav'))))) for i, x in results: if x is None: continue index.add_item(i, x) index.build(100) index.save('musicnn.annoy') return index
def rebuild(self, keyTransform=None): vecs = self.readVectors(keyTransform) ids = [] n = None for k, v in vecs.items(): ids.append(k) if len(v['vector']) == 0: pass elif n is None: n = len(v['vector']) else: assert (n == len(v['vector'])) ids.sort() ai = annoy.AnnoyIndex(n) for i, (k, v) in enumerate(vecs.items()): if len(v['vector']) == 0: continue j = binary_search(ids, k) ai.add_item(j, v['vector']) if i % 10000 == 0: logger.info('loading vector %d into annoy index' % i) logger.info('building annoy datastructure') ai.build(10) logger.info('saving annoy datastructure') ai.save(self.pathAnnoy) with open(self.pathIds, 'wb') as f: cPickle.dump([n] + ids, f) self.ids = ids self.index = ai
def make_adj_mat( X, n_neighbors=15, metric="euclidean", n_trees=50, seed=None, use_dists=False, symmetrize=True, drop_first=True, ): t = annoy.AnnoyIndex(X.shape[1], metric) if seed is not None: t.set_seed(seed) [t.add_item(i, x) for i, x in enumerate(X)] t.build(n_trees) # construct the adjacency matrix for the graph adj = lil_matrix((X.shape[0], X.shape[0])) for i in range(X.shape[0]): neighs_, dists_ = t.get_nns_by_item(i, n_neighbors + 1, include_distances=True) if drop_first: neighs = neighs_[1:] dists = dists_[1:] else: neighs = neighs_[:n_neighbors] dists = dists_[:n_neighbors] adj[i, neighs] = dists if use_dists else 1 if symmetrize: adj[neighs, i] = dists if use_dists else 1 # symmetrize on the fly return adj, t
def add_index(self, user_rooms): # user_rooms = [{user_id: '', room_id: '', topic: ''}] #print user_rooms self.ann = annoy.AnnoyIndex(self.size, self.metric) with self.env.begin(write=True) as txn: for user_room in user_rooms: phrase = user_room.get("topic") if phrase: id = 'i%d' % self.index_size # make it a string room_word = 'w' + str( user_room.get("user_id")) + ':' + str( user_room.get("room_id")) + ':' + str(phrase) if isinstance(room_word, unicode): room_word = room_word.encode('utf-8') # avoid duplicate user-rooms vector #print txn.get(room_word) #if not txn.get(room_word): #print user_room # get the vector if isinstance(phrase, str): phrase = phrase.decode('utf-8') _phrase, vec = get_vectors_cloud(phrase) #print vec # add the vector to annoy index self.ann.add_item(self.index_size, vec) # use the same id to point to word # index by id txn.put(id, room_word) # index by user_room txn.put(room_word, id) self.index_size += 1 return "Added user rooms to index. New size: {}".format( self.index_size)
def run(database_path, index_path, n_items): connection = sqlite3.connect(database_path) index = annoy.AnnoyIndex(128, 'euclidean') index.load(index_path) image_loader = photos_2_db.ImageLoader() face_detector = photos_2_db.FaceDetector() landmarks_predictor = photos_2_db.LandmarksPredictor() face_recognizer = photos_2_db.FaceRecognizer() while True: input_image = input('Image path >>> ') if not os.path.isfile(index_path): print('File %s not exists.' % index_path) continue images = image_loader(file_path=input_image, width=200, height=200) faces = face_detector(images=images) landmarks = landmarks_predictor(images=images, faces=faces) features = face_recognizer(images=images, landmarks=landmarks) for feature in features[0]: f = list(feature) nearest = index.get_nns_by_vector(f, n_items, search_k=-1, include_distances=True) for idx, distance in zip(nearest[0], nearest[1]): cur = connection.cursor() cur.execute('SELECT * FROM features where id=%s' % idx) rows = cur.fetchall() for row in rows: im = Image.open(row[1]) im.show()
def predict(self, annoytreepath): import annoy self._annoy = annoy.AnnoyIndex(f=200, metric=self._metric) self._annoy.load(annoytreepath) with open('word2id.pickle', 'rb') as f: self.word2id = cPickle.load(f) with open('id2word.pickle', 'rb') as f: self.id2word = cPickle.load(f)