Beispiel #1
0
 def __init__(self,
              train_data=None,
              test_data=None,
              save_path="doc2vec.pt",
              log=utils.get_logging()):
     self.model = gensim.models.doc2vec.Doc2Vec(size=50,
                                                min_count=2,
                                                iter=55)
     self.train_data = train_data
     self.test_data = test_data
     self.save_path = save_path
     self.log = log
Beispiel #2
0
#!/usr/bin/env python
# encoding: utf-8

from __future__ import print_function

import argparse
from functools import partial
import numpy as np
import multiprocessing as mp
import torch
import figet
from figet import utils

log = utils.get_logging()


def tune(baseline, dist, type_, num_types, init_threshold):
    idx2threshold = {idx: init_threshold for idx in xrange(num_types)}
    pool = mp.Pool(processes=8)
    func = partial(search_threshold,
                   init_threshold=init_threshold,
                   num_types=num_types,
                   dist=dist,
                   type_=type_,
                   baseline=baseline)
    for idx, best_t in pool.map(func, xrange(num_types)):
        idx2threshold[idx] = best_t
    return idx2threshold


def search_threshold(idx, init_threshold, num_types, dist, type_, baseline):
from pyflann import *
from pyflann.exceptions import FLANNException
import numpy as np
from figet.utils import get_logging
from figet.Constants import COARSE_FLAG, FINE_FLAG, UF_FLAG
from figet.hyperbolic import poincare_distance
import torch
from operator import itemgetter

log = get_logging()
cos_sim_func = torch.nn.CosineSimilarity(dim=0)


def cosine_distance(a, b):
    return 1 - cos_sim_func(a, b)


class kNN(object):
    def __init__(self, type2vec, type_vocab, metric):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.type2vec = type2vec.to(self.device).type(torch.float)
        self.type_vocab = type_vocab
        self.hyperbolic = metric == "hyperbolic"
        self.query_factor = 25 if self.hyperbolic else 1

        self.neighs_per_granularity = {
            COARSE_FLAG: 1,
            FINE_FLAG: 1,
            UF_FLAG: 3
        }