Example #1
0
 def text_similarity(self, t1, t2):
     if self._model == 'tfidf':
         t1_vec = matutils.any2sparse(self.text2model(t1))
         t2_vec = matutils.any2sparse(self.text2model(t2))
         return matutils.cossim(t1_vec, t2_vec)
     else:
         t1_vec = matutils.any2sparse(self.text2model(t1))
         t2_vec = matutils.any2sparse(self.text2model(t2))
         return matutils.cossim(t1_vec, t2_vec)
Example #2
0
 def text_similarity(self, t1, t2):
     if self._model == 'tfidf':
         t1_vec = matutils.any2sparse(self.text2model(t1))
         t2_vec = matutils.any2sparse(self.text2model(t2))
         return matutils.cossim(t1_vec, t2_vec)
     else:
         t1_vec = matutils.any2sparse(self.text2model(t1))
         t2_vec = matutils.any2sparse(self.text2model(t2))
         return matutils.cossim(t1_vec, t2_vec)
Example #3
0
 def transformed_corpus():
     for doc in input_data:
         if numpy_output:
             yield self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality))
         else:
             yield matutils.any2sparse(
                 self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality)))
Example #4
0
    def __getitem__(self, input_data, numpy_input=False, numpy_output=False, chunksize=10000):
        if numpy_input:
            if numpy_output:
                return self._get_hidden_representations(input_data)
            else:
                return matutils.any2sparse(self._get_hidden_representations(input_data))
        else:
            is_corpus, input_data = utils.is_corpus(input_data)
            if not is_corpus:
                input_data = [input_data]

            if chunksize:
                def transformed_corpus():
                    for doc_chunk in utils.grouper(input_data, chunksize):
                        chunk = matutils.corpus2dense(doc_chunk, self.input_dimensionality)
                        hidden = self._get_hidden_representations(chunk)
                        for column in hidden.T:
                            if numpy_output:
                                yield column
                            else:
                                yield matutils.any2sparse(column)

            else:
                def transformed_corpus():
                    for doc in input_data:
                        if numpy_output:
                            yield self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality))
                        else:
                            yield matutils.any2sparse(
                                self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality)))

            if not is_corpus:
                return list(transformed_corpus()).pop()
            else:
                return transformed_corpus()
Example #5
0
 def transformed_corpus():
     for chunk_no, doc_chunk in utils.grouper(bow, chunksize):
         chunk = matutils.corpus2dense(doc_chunk,
                                       self.input_dimensionality)
         hidden = self._get_hidden_representations(chunk)
         for column in hidden.T:
             yield matutils.any2sparse(column)
Example #6
0
 def transformed_corpus():
     for doc_chunk in utils.grouper(input_data, chunksize):
         chunk = matutils.corpus2dense(doc_chunk, self.input_dimensionality)
         hidden = self._get_hidden_representations(chunk)
         for column in hidden.T:
             if numpy_output:
                 yield column
             else:
                 yield matutils.any2sparse(column)
Example #7
0
def from_dense_vec(vec):
    """
    Converts a vector into Gensim BoW format
    """
    return matutils.any2sparse(vec)
Example #8
0
 def consine_similarity(self, v1, v2):
     return matutils.cossim(matutils.any2sparse(v1),
                            matutils.any2sparse(v2))
Example #9
0
 def transformed_corpus():
     for doc in bow:
         yield matutils.any2sparse(
             self._get_hidden_representations(
                 matutils.corpus2dense(doc,
                                       self.input_dimensionality)))