def __init__(self,
                 n_topics,
                 corpus,
                 words2vec_ny,
                 words2vec,
                 vocab_ny,
                 alpha=None):
        self.n_dk = None
        self.corpus = corpus  # corpus.index2doc
        self.priors = None
        self.n_topics = n_topics
        self.vocab_ny = vocab_ny  #set([])
        self.topic_params = defaultdict(dict)
        self.topic_params2 = defaultdict(dict)
        self.word_vec_size = words2vec.vector_size
        self.alpha = alpha
        self.vocab2topic = {}
        self.words2vec = words2vec
        self.words2vec_ny = words2vec_ny
        self.solver = cholesky.Helper()
        self.AVGLD = util.CalculateComplexity()
        self.N_sum = 0
        self.logDeterminant = []
        self.z_dw = []

        assert isinstance(self.n_topics, int), 'n_topic should be an integer'
Ejemplo n.º 2
0
 def __init__(self,
              num_topics,
              corpus,
              word_vector_filepath=None,
              word_vector_model=None,
              alpha=0.2,
              outputfile=None,
              preprocess=False):
     self.doc_topic_CT = None
     self.corpus = corpus
     self.priors = None
     self.word_vecs = {}
     self.numtopics = num_topics
     self.vocab = set([])
     self.topic_params = defaultdict(dict)
     self.wordvecFP = word_vector_filepath
     self.word_vec_size = None
     self.alpha = alpha
     self.solver = cholesky.Helper()
     self.wvmodel = word_vector_model
     self.test_word_topics = defaultdict(list)
     self.test_word_topic_count = defaultdict(int)
     self.word_topics = {}
     self.output_file_name = outputfile
     self.preprocess = preprocess
Ejemplo n.º 3
0
 def __init__(self, num_topics, corpus, word_vector_filepath=None, word_vector_model=None, run_name=str(1)):
     self.doc_topic_CT = None
     self.word_topics = {}
     self.corpus = corpus
     self.priors = None
     self.word_vecs = {}
     self.numtopics = num_topics
     self.vocab = set([])
     self.topic_params = defaultdict(dict)
     self.wordvecFP = word_vector_filepath
     self.word_index = {}
     self.word_vec_size = None
     self.alpha = 20. / self.numtopics
     self.solver = cholesky.Helper()
     self.wvmodel = word_vector_model
     self.doc_word_counts = {}
     self.run_name = run_name