Ejemplo n.º 1
0
 def __init__(self, n_topics, name=''):
     super(MyLda, self).__init__()
     """
     Initializes a model without training.
     """
     self.K = n_topics
     assert (type(name) == str)
     if name == '':
         self.name = 'lda_' + str(self.K)
     else:
         self.name = name
     self.tokenizer = Tokenizer()
     self.has_vocab = False
     self.has_corpus = False
     self.is_trained = False
     self.has_viz_data = False
Ejemplo n.º 2
0
import sys
from collections import defaultdict
from pymongo import MongoClient
from gensim import models
from gensim import matutils
from sklearn.externals import joblib
from alife.mockdb import get_mock
from alife.txtmine import stemmer
from alife.util import model_loader
from alife.util.general import cosine_dist, euclidean_dist, save_dict
from alife.txtmine.tokenizer import Tokenizer
from alife.visualize.w2v_vis import embedding_fig
from pprint import pprint

_db = MongoClient().patents
_tokenizer = Tokenizer()
_friendly_patents = [('zeolites', 4061724), ('semiconductors', 4064521),
                     ('nonwoven webs', 4340563), ('rsa', 4405829),
                     ('stents', 4655771), ('pcr', 4683202),
                     ('bubble jet', 4723129), ('cell phone', 5103459),
                     ('microarrays', 5143854), ('browser', 5572643)]
_names, _pnos = zip(*_friendly_patents)


def _dist(v1, v2):
    return np.dot(matutils.unitvec(v1), matutils.unitvec(v2))


def load_w2v(filename):
    #Loads a word2vec model stored at the given location.
    return models.word2vec.Word2Vec.load(filename)