def text_similarity(self, t1, t2): if self._model == 'tfidf': t1_vec = matutils.any2sparse(self.text2model(t1)) t2_vec = matutils.any2sparse(self.text2model(t2)) return matutils.cossim(t1_vec, t2_vec) else: t1_vec = matutils.any2sparse(self.text2model(t1)) t2_vec = matutils.any2sparse(self.text2model(t2)) return matutils.cossim(t1_vec, t2_vec)
def transformed_corpus(): for doc in input_data: if numpy_output: yield self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality)) else: yield matutils.any2sparse( self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality)))
def __getitem__(self, input_data, numpy_input=False, numpy_output=False, chunksize=10000): if numpy_input: if numpy_output: return self._get_hidden_representations(input_data) else: return matutils.any2sparse(self._get_hidden_representations(input_data)) else: is_corpus, input_data = utils.is_corpus(input_data) if not is_corpus: input_data = [input_data] if chunksize: def transformed_corpus(): for doc_chunk in utils.grouper(input_data, chunksize): chunk = matutils.corpus2dense(doc_chunk, self.input_dimensionality) hidden = self._get_hidden_representations(chunk) for column in hidden.T: if numpy_output: yield column else: yield matutils.any2sparse(column) else: def transformed_corpus(): for doc in input_data: if numpy_output: yield self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality)) else: yield matutils.any2sparse( self._get_hidden_representations(matutils.corpus2dense(doc, self.input_dimensionality))) if not is_corpus: return list(transformed_corpus()).pop() else: return transformed_corpus()
def transformed_corpus(): for chunk_no, doc_chunk in utils.grouper(bow, chunksize): chunk = matutils.corpus2dense(doc_chunk, self.input_dimensionality) hidden = self._get_hidden_representations(chunk) for column in hidden.T: yield matutils.any2sparse(column)
def transformed_corpus(): for doc_chunk in utils.grouper(input_data, chunksize): chunk = matutils.corpus2dense(doc_chunk, self.input_dimensionality) hidden = self._get_hidden_representations(chunk) for column in hidden.T: if numpy_output: yield column else: yield matutils.any2sparse(column)
def from_dense_vec(vec): """ Converts a vector into Gensim BoW format """ return matutils.any2sparse(vec)
def consine_similarity(self, v1, v2): return matutils.cossim(matutils.any2sparse(v1), matutils.any2sparse(v2))
def transformed_corpus(): for doc in bow: yield matutils.any2sparse( self._get_hidden_representations( matutils.corpus2dense(doc, self.input_dimensionality)))