Beispiel #1
0
class LDA(object):
    def __init__(self, model, vocab, corpus=None, topics=200, passes=1):
        self._model_file = model
        self._dict_file = vocab
        self._corpus_file = corpus
        self._topics = topics
        self._passes = passes

    def train(self):
        self._corpus = SentenceDocCorpus(self._corpus_file)
        self._lda = LdaModel(self._corpus,
                             num_topics=self._topics,
                             id2word=self._corpus.dictionary,
                             passes=self._passes)
        self._dictionary = self._corpus.dictionary

        self._lda.save(self._model_file)
        self._dictionary.save(self._dict_file)

    def load(self):
        self._lda = LdaModel.load(self._model_file)
        self._dictionary = Dictionary.load(self._dict_file)

    def topics(self, words):
        return self._lda[self._dictionary.doc2bow(common.filter(words))]

    def topic_vector(self, words):
        return np.array([
            v for k, v in self._lda.__getitem__(
                self._dictionary.doc2bow(common.filter(words)), eps=0)
        ])
Beispiel #2
0
class LDA(object):
    def __init__(self, model, vocab, corpus=None, topics=200, passes=1):
        self._model_file = model
        self._dict_file = vocab
        self._corpus_file = corpus
        self._topics = topics
        self._passes = passes

    def train(self):
        self._corpus = SentenceDocCorpus(self._corpus_file)
        self._lda = LdaModel(self._corpus, num_topics = self._topics, id2word = self._corpus.dictionary, passes = self._passes)
        self._dictionary = self._corpus.dictionary
        
        self._lda.save(self._model_file)
        self._dictionary.save(self._dict_file)

    def load(self):
        self._lda = LdaModel.load(self._model_file)
        self._dictionary = Dictionary.load(self._dict_file)

    def topics(self, words):
        return self._lda[self._dictionary.doc2bow(common.filter(words))]

    def topic_vector(self, words):
        return np.array([v for k, v in self._lda.__getitem__(self._dictionary.doc2bow(common.filter(words)), eps=0)])
class LDA(BaseEstimator, TransformerMixin):
    def __init__(self, **params):
        self.params = params

    def fit(self, X, y=None):
        corpus = Sparse2Corpus(X, documents_columns=False)
        self.lda = LdaModel(corpus, **self.params)
        return self

    def transform(self, X, y=None):
        corpus = Sparse2Corpus(X, documents_columns=False)
        topics = np.array([map(lambda x: x[1], self.lda.__getitem__(c, eps=0)) for c in corpus])
        print topics.shape
        return topics
Beispiel #4
0
class LDA(BaseEstimator, TransformerMixin):
    def __init__(self, **params):
        self.params = params

    def fit(self, X, y=None):
        corpus = Sparse2Corpus(X, documents_columns=False)
        self.lda = LdaModel(corpus, **self.params)
        return self

    def transform(self, X, y=None):
        corpus = Sparse2Corpus(X, documents_columns=False)
        topics = np.array([
            map(lambda x: x[1], self.lda.__getitem__(c, eps=0)) for c in corpus
        ])
        print topics.shape
        return topics