def get_20news_model(self, vector_size, epochs, train_new_model=False): corpus = ProjectCorpus() train_corpus = corpus.get_news20_corpus() save_path = "20news/models/vs_{}_epochs_{}.model".format( vector_size, epochs) model = self.get_model(train_corpus, save_path, vector_size, epochs, train_new_model) return model
def get_old_aggregate_model(self, vector_size, epochs, train_new_model=False): corpus = ProjectCorpus() train_corpus = corpus.get_old_aggregate_corpus() save_path = "aggregate_old/models/vs_{}_epochs_{}.model".format( vector_size, epochs) model = self.get_model(train_corpus, save_path, vector_size, epochs, train_new_model) return model
import gensim import numpy as np from src.ProjectCorpus import ProjectCorpus import matplotlib.pyplot as plt vector_size = 3 epochs = 20 corpus = ProjectCorpus() train_corpus = corpus.get_small_corpus() save_dir = "/home/dj/PycharmProjects/cs475/src/doc2vec_models/summer_2020" save_path = "{}/small_corpus/models/vs_{}_epochs_{}.model".format( save_dir, vector_size, epochs) model = gensim.models.doc2vec.Doc2Vec(vector_size=vector_size, min_count=2, epochs=epochs) model.build_vocab(train_corpus) model.train(train_corpus, total_examples=model.corpus_count, epochs=model.epochs) model.save(save_path) doc_vectors = np.array(model.docvecs.vectors_docs) embedded_doc_vectors_with_words = [None for _ in range(len(doc_vectors))] for doc_id in range(len(doc_vectors)): if doc_id <= 9: label = "m" color = dict(facecolor='blue', alpha=0.5)
from src.ExperimentModel import ExperimentModel from src.ProjectCorpus import ProjectCorpus from src.TrainDoc2VecModel import TrainDoc2VecModel corpus_obj = ProjectCorpus() corpus_path = corpus_obj.aggregate_corpus_path model_trainer = TrainDoc2VecModel() model = model_trainer.get_aggregate_model(15, 30) class_ranges = [ ExperimentModel.create_class_range(0, 50), ExperimentModel.create_class_range(50, 88), ExperimentModel.create_class_range(88, 128) ] experiment_model = ExperimentModel(corpus_path, model, class_ranges) print(experiment_model.svm_classify()) experiment_model.generate_tsne_representation( 12.0, "/home/dj/PycharmProjects/cs475/src/doc2vec_models/summer_2020/aggregate")