def topic_dimen_reduce(words, word2vec): dictionary, matrix = terms_analysis.get_words_matrix(words, word2vec) pca = PCA(n_components=50) pca_matrix = pca.fit_transform(matrix) tsne = TSNE(n_components=2) t_matrix = tsne.fit_transform(pca_matrix) return dictionary, t_matrix
def w2v_dimen_reduce(word2vec_fn): word2vec = models.Word2Vec.load(word2vec_fn) vocab = word2vec.vocab.keys() dictionary, matrix = terms_analysis.get_words_matrix(vocab, word2vec) # t_matrix = tsne.tsne(matrix[:10000], 2, 50, 20.0) # return t_matrix del vocab pca = PCA(n_components=50) pca_matrix = pca.fit_transform(matrix[:5000]) del matrix tsne = TSNE(n_components=2) t_matrix = tsne.fit_transform(pca_matrix) return dictionary[:5000], t_matrix