def __init__(self, train_data=None, test_data=None, save_path="doc2vec.pt", log=utils.get_logging()): self.model = gensim.models.doc2vec.Doc2Vec(size=50, min_count=2, iter=55) self.train_data = train_data self.test_data = test_data self.save_path = save_path self.log = log
#!/usr/bin/env python # encoding: utf-8 from __future__ import print_function import argparse from functools import partial import numpy as np import multiprocessing as mp import torch import figet from figet import utils log = utils.get_logging() def tune(baseline, dist, type_, num_types, init_threshold): idx2threshold = {idx: init_threshold for idx in xrange(num_types)} pool = mp.Pool(processes=8) func = partial(search_threshold, init_threshold=init_threshold, num_types=num_types, dist=dist, type_=type_, baseline=baseline) for idx, best_t in pool.map(func, xrange(num_types)): idx2threshold[idx] = best_t return idx2threshold def search_threshold(idx, init_threshold, num_types, dist, type_, baseline):
from pyflann import * from pyflann.exceptions import FLANNException import numpy as np from figet.utils import get_logging from figet.Constants import COARSE_FLAG, FINE_FLAG, UF_FLAG from figet.hyperbolic import poincare_distance import torch from operator import itemgetter log = get_logging() cos_sim_func = torch.nn.CosineSimilarity(dim=0) def cosine_distance(a, b): return 1 - cos_sim_func(a, b) class kNN(object): def __init__(self, type2vec, type_vocab, metric): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.type2vec = type2vec.to(self.device).type(torch.float) self.type_vocab = type_vocab self.hyperbolic = metric == "hyperbolic" self.query_factor = 25 if self.hyperbolic else 1 self.neighs_per_granularity = { COARSE_FLAG: 1, FINE_FLAG: 1, UF_FLAG: 3 }