Example #1
0
    def get_20news_model(self, vector_size, epochs, train_new_model=False):

        corpus = ProjectCorpus()
        train_corpus = corpus.get_news20_corpus()
        save_path = "20news/models/vs_{}_epochs_{}.model".format(
            vector_size, epochs)

        model = self.get_model(train_corpus, save_path, vector_size, epochs,
                               train_new_model)

        return model
Example #2
0
    def get_old_aggregate_model(self,
                                vector_size,
                                epochs,
                                train_new_model=False):

        corpus = ProjectCorpus()
        train_corpus = corpus.get_old_aggregate_corpus()
        save_path = "aggregate_old/models/vs_{}_epochs_{}.model".format(
            vector_size, epochs)

        model = self.get_model(train_corpus, save_path, vector_size, epochs,
                               train_new_model)

        return model
Example #3
0
import gensim
import numpy as np
from src.ProjectCorpus import ProjectCorpus
import matplotlib.pyplot as plt

vector_size = 3
epochs = 20

corpus = ProjectCorpus()
train_corpus = corpus.get_small_corpus()
save_dir = "/home/dj/PycharmProjects/cs475/src/doc2vec_models/summer_2020"
save_path = "{}/small_corpus/models/vs_{}_epochs_{}.model".format(
    save_dir, vector_size, epochs)

model = gensim.models.doc2vec.Doc2Vec(vector_size=vector_size,
                                      min_count=2,
                                      epochs=epochs)

model.build_vocab(train_corpus)
model.train(train_corpus,
            total_examples=model.corpus_count,
            epochs=model.epochs)
model.save(save_path)

doc_vectors = np.array(model.docvecs.vectors_docs)

embedded_doc_vectors_with_words = [None for _ in range(len(doc_vectors))]
for doc_id in range(len(doc_vectors)):
    if doc_id <= 9:
        label = "m"
        color = dict(facecolor='blue', alpha=0.5)
Example #4
0
from src.ExperimentModel import ExperimentModel
from src.ProjectCorpus import ProjectCorpus
from src.TrainDoc2VecModel import TrainDoc2VecModel

corpus_obj = ProjectCorpus()
corpus_path = corpus_obj.aggregate_corpus_path

model_trainer = TrainDoc2VecModel()
model = model_trainer.get_aggregate_model(15, 30)

class_ranges = [
    ExperimentModel.create_class_range(0, 50),
    ExperimentModel.create_class_range(50, 88),
    ExperimentModel.create_class_range(88, 128)
]
experiment_model = ExperimentModel(corpus_path, model, class_ranges)

print(experiment_model.svm_classify())
experiment_model.generate_tsne_representation(
    12.0,
    "/home/dj/PycharmProjects/cs475/src/doc2vec_models/summer_2020/aggregate")