コード例 #1
0
def main(args):
    word2vec = KeyedVectors.load(args.word2vec, mmap='r')
    data = data_utils.load_data(args.file)

    if args.use_gensim:
        data_utils.load_gensim(args.word2vec)
    else:
        data_utils.load_embeddings(args.embeddings)
        data_utils.load_w2i(args.w2i)

    compute_recall(data, word2vec)
コード例 #2
0
ファイル: main.py プロジェクト: Caiit/IR2
def main(args):
    global device

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    word2vec = KeyedVectors.load(args.word2vec, mmap='r')
    data = data_utils.load_data(args.file)

    if args.use_gensim:
        data_utils.load_gensim(args.word2vec)
    else:
        data_utils.load_embeddings(args.embeddings)
        data_utils.load_w2i(args.w2i)

    run(data, word2vec)
コード例 #3
0
def preprocess():
    # Data Preparation
    # ==================================================

    # Load data
    print("Loading data...")
    x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file)

    # Build vocabulary
    max_document_length = max([len(x.split(" ")) for x in x_text])
    vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

    del x, y, x_shuffled, y_shuffled

    print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

    vocab_size = 75100
    embedding_path = './data/embeddings.npy'
    embedding = utils.load_embeddings(embedding_path, vocab_size, FLAGS.embedding_dim)

    return x_train, y_train, vocab_processor, x_dev, y_dev, embedding
コード例 #4
0
def create_model(data_loaders, word_vocab, wordpiece_vocab, hierarchy, total_wordpieces):
	from model import E2EETModel, MentionLevelModel
	if cf.TASK == "end_to_end":
		model = E2EETModel(	embedding_dim = cf.EMBEDDING_DIM,
							hidden_dim = cf.HIDDEN_DIM,
							vocab_size = len(wordpiece_vocab),
							label_size = len(hierarchy),
							model_options = cf.MODEL_OPTIONS,
							total_wordpieces = total_wordpieces,
							category_counts = hierarchy.get_train_category_counts(),
							hierarchy_matrix = hierarchy.hierarchy_matrix,
							embedding_model = cf.EMBEDDING_MODEL,
							vocab_size_word = len(word_vocab),
							pretrained_embeddings = None if cf.EMBEDDING_MODEL in ["random", "bert"] else load_embeddings(cf.EMBEDDING_MODEL, word_vocab, cf.EMBEDDING_DIM))

	elif cf.TASK == "mention_level":
		model = MentionLevelModel(	embedding_dim = cf.EMBEDDING_DIM,
							hidden_dim = cf.HIDDEN_DIM,
							vocab_size = len(wordpiece_vocab),
							label_size = len(hierarchy),
							model_options = cf.MODEL_OPTIONS,
							total_wordpieces = total_wordpieces,
							category_counts = hierarchy.get_train_category_counts(),
							hierarchy_matrix = hierarchy.hierarchy_matrix,
							context_window = cf.MODEL_OPTIONS['context_window'],
							mention_window = cf.MODEL_OPTIONS['mention_window'],
							attention_type = cf.MODEL_OPTIONS['attention_type'],
							use_context_encoders = cf.MODEL_OPTIONS['use_context_encoders'])
	return model
コード例 #5
0
def do_train(args):
    # Set up some parameters.
    config = Config()
    helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args)
    embeddings = load_embeddings(args, helper)
    config.embed_size = embeddings.shape[1]
    helper.save(config.output_path)

    handler = logging.FileHandler(config.log_output)
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(
        logging.Formatter('%(asctime)s:%(levelname)s: %(message)s'))
    logging.getLogger().addHandler(handler)

    report = None

    with tf.Graph().as_default():
        logger.info("Building model...", )
        start = time.time()
        model = NGramModel(helper, config, embeddings)
        logger.info("took %.2f seconds", time.time() - start)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)
            model.fit(session, saver, train_raw, dev_raw, train, dev)
コード例 #6
0
def get_all(args):
    """
    Gets the training and test data, and templates.
    """

    global embedding_size
    global num_words

    print("Load data...")
    data_train = data_utils.load_data(args.folder + "/train_data.json")
    data_test = data_utils.load_data(args.folder + "/dev_data.json")
    print(len(data_train))

    data_utils.load_embeddings(args.embeddings)
    embedding_size = len(data_utils.embeddings[0])
    data_utils.load_w2i(args.w2i)
    num_words = len(data_utils.w2i)
    w2emb = data_utils.load_w2emb(args.w2emb)
    templates_emb = data_utils.load_templates("../../data/templates.pkl")
    gensim_model = KeyedVectors.load(args.word2vec, mmap='r')

    print("Do the templates...")
    templates_emb = [y for x in templates_emb for y in x]
    cut_templates = [temp[-args.max_length:] for temp in templates_emb]
    templates_emb = [
        np.pad(temp1, ((0, args.max_length - len(temp1)), (0, 0)),
               "constant",
               constant_values=(num_words)) for temp1 in cut_templates
    ]
    templates_emb = torch.Tensor(templates_emb)

    print("Go through training data...")
    training_data = get_data(args.saved_train, data_train,
                             data_utils.embeddings, data_utils.w2i,
                             gensim_model, args)
    test_data = get_data(args.saved_test, data_test, data_utils.embeddings,
                         data_utils.w2i, gensim_model, args)
    return training_data, test_data, templates_emb, w2emb
コード例 #7
0
def do_seq2seq_prediction():
    config = Config()

    # Load training data
    helper, data = load_and_preprocess_data('data/data.txt')
    inputs = data
    labels = inputs
    train_examples_raw = zip(
        inputs, labels)  # This is a list of (input, label) tuples.

    # Load pretrained embedding matrix
    # Embedding matrix has shape of (n_tokens, embed_size)
    embeddings = load_embeddings('data/vocab.txt', 'data/wordVectors.txt',
                                 helper)
    # config.n_tokens = embeddings.shape[0]
    # config.embed_size = embeddings.shape[1]
    helper.save(config.model_path)

    #Create and train a seq2seq autoencoder
    with tf.Graph().as_default():
        print "Building model..."
        cell_size = 100
        cell_type = "lstm"
        cell_init = "identity"
        clip_gradients = True
        activation_choice = "tanh"
        print "We are considering {:} of size N = {:} with activation being {:}.".format(
            cell_type, cell_size, activation_choice)
        if clip_gradients:
            print "Gradient clipping turned on."
        else:
            print "Gradient clipping turned off."
        model = Seq2seq_autoencoder(helper, config, embeddings, cell_size,
                                    cell_type, cell_init, clip_gradients,
                                    activation_choice)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init)
            model.fit(sess, saver, train_examples_raw)
コード例 #8
0
def test_encoding(args):
    config = build_seq2seq_config(args)
    helper = ModelHelper.load(args.model_path)
    input_data = load_data(args.data)

    embeddings = load_embeddings(args, helper)
    config.n_tokens = embeddings.shape[0]
    config.embed_size = embeddings.shape[1]

    with tf.Graph().as_default():
        model = Seq2seq_autoencoder(helper, config, embeddings)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init)
            print model.config.model_output
            saver.restore(sess, model.config.model_output)
            encodings = model.encode(sess, input_data)

            print encodings
コード例 #9
0
def do_evaluate(args):
    config = build_seq2seq_config(args)
    helper = ModelHelper.load(args.model_path)
    inputs_raw = load_data(args.data)

    embeddings = load_embeddings(args, helper)
    config.n_tokens = embeddings.shape[0]
    config.embed_size = embeddings.shape[1]

    with tf.Graph().as_default():
        model = Seq2seq_autoencoder(helper, config, embeddings)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        vectorized = model.helper.vectorize(inputs_raw)
        inputs = model.preprocess_sequence_data(zip(vectorized, vectorized))

        with tf.Session() as sess:
            sess.run(init)
            saver.restore(sess, model.config.model_output)
            test_loss = model.evaluate(sess, inputs)

    result_path = "ae_results/%s.txt" % args.model_path.split('/')[1]
    with open(result_path, 'w') as result_file:
        result_file.write("%f" % test_loss)
コード例 #10
0
def do_train(args):
    config = Config(args)
    print "== Seq2Seq Config =="
    print "  Cell size:", config.cell_size
    print "  Cell type:", config.cell_type
    print "  Cell init:", config.cell_init
    print "  Activation:", config.activation_choice
    print "  Gradient clipping:", config.clip_gradients
    print "  Feed decoder:", config.feed_decoder

    # Load training data
    helper, train, dev = load_and_preprocess_data(args)
    inputs = train
    labels = train
    train_examples_raw = zip(
        inputs, labels)  # This is a list of (input, label) tuples.
    dev_set_raw = zip(dev, dev)

    # Load pretrained embedding matrix
    # Embedding matrix has shape of (n_tokens, embed_size)
    embeddings = load_embeddings(args, helper)
    config.n_tokens = embeddings.shape[0]
    config.embed_size = embeddings.shape[1]
    helper.save(config.model_path)

    #Create and train a seq2seq autoencoder
    with tf.Graph().as_default():
        print "Building model..."
        model = Seq2seq_autoencoder(helper, config, embeddings)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init)
            model.fit(sess, saver, train_examples_raw, dev_set_raw)
コード例 #11
0
ファイル: train.py プロジェクト: manueltonneau/glove-text-cnn
x_dev = pad_dataset(eval_df.text_tokenized.values.tolist(), 128)


def create_label(label):
    if label == 1:
        return [0, 1]
    elif label == 0:
        return [1, 0]


y_train = np.array((train_df['class'].apply(create_label)).values.tolist())
y_dev = np.array((eval_df['class'].apply(create_label)).values.tolist())

vocab_size = len(wdict)
embedding_path = FLAGS.embeddings_path
embedding = utils.load_embeddings(embedding_path, vocab_size,
                                  FLAGS.embedding_dim)
print("Embeddings loaded, Vocabulary Size: {:d}. Starting training ...".format(
    vocab_size))


def prepare_filepath_for_storing_model(output_dir: str) -> str:
    """Prepare the filepath where the trained model will be stored.

    :param output_dir: Directory where to store outputs (trained models).
    :return: path_to_store_model: Path where to store the trained model.
    """
    path_to_store_model = os.path.join(output_dir, 'models')
    if not os.path.exists(path_to_store_model):
        os.makedirs(path_to_store_model)
    return path_to_store_model
コード例 #12
0
        train_df.drop(columns=["text"], inplace=True)

        logger.info("Building dataset...")
        vocab2id = build_vocab(docs=X_train, min_count=config.min_count)
        pkl.dump(
            vocab2id,
            open(
                os.path.join(args.model_dir,
                             "vocab_{}.vocab".format(args.model)), "wb"))
        train_data = build_dataset(X_train,
                                   vocab2id,
                                   max_doc_len=config.max_doc_len)
        train_df.drop(columns=["text_words"], inplace=True)

        logger.info("Loading embeddings...")
        embeddings = load_embeddings(args.embedding_path, vocab2id)

        device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")

        if args.nsplits > 1:
            SKF = StratifiedKFold(n_splits=args.nsplits, shuffle=True)
            for fold_idx, (train_idx,
                           val_idx) in enumerate(SKF.split(X_train, y_train)):
                logger.info("*" * 20 + "Training {}-fold...".format(fold_idx))

                model = load_model(config,
                                   args.model,
                                   embeddings,
                                   embeddings.shape[1],
                                   len(y_train.unique()),
コード例 #13
0
def main():

    l.write('# Loading and Setting Up Data')

    l.write('Loading Training Data')
    with s3_read('ml/data/news_classifier/train_data.json') as file:
        data = pd.read_json(file, orient="records")
        data = data[:1000]

    l.write('Loading embeddings')

    with s3_read('ml/glove_embeddings/glove.6B.100d.txt') as file:
        embeddings = data_utils.load_embeddings(file, embedding_dim=100)

    l.write('Preparing data')

    train_test_split = 0.95
    split_idx = math.floor(len(data) * train_test_split)

    train_data = data.iloc[0:split_idx]
    valid_data = data.iloc[split_idx:]

    encoding = WordEmbeddingEncoding(data, embeddings)
    encoding.prepare()

    train_dataset = WordTokenDataset(train_data, encoding)
    train_dataset.prepare()

    valid_dataset = WordTokenDataset(valid_data, encoding)
    valid_dataset.prepare()

    print('# Training the Model')

    hyperparams_list = [
        {
            'weighting': 'uniform',
            'lr': 0.001,
            'batch_size': 100
        },
        {
            'weighting': 'uniform',
            'lr': 0.01,
            'batch_size': 100
        },
        {
            'weighting': 'uniform',
            'lr': 0.001,
            'batch_size': 50
        },
        {
            'weighting': 'uniform',
            'lr': 0.01,
            'batch_size': 50
        },
    ]

    models = []
    train_losses_list = []
    valid_losses = []

    accepted_tokens = {t for t in embeddings.index}

    for i, hyperparams in enumerate(hyperparams_list):
        l.write(f'Model {i+1} / {len(hyperparams_list)}')

        start_time = time()

        batch_size = hyperparams['batch_size']
        lr = hyperparams['lr']
        weighting = hyperparams['weighting']

        # 1. Setup Data Loader

        data_loader = DataLoader(dataset=train_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 collate_fn=data_utils.collate_samples)

        # 2. Create the Model

        model = Model(embeddings=embeddings,
                      n_classes=encoding.n_classes(),
                      weighting=weighting)

        # 3. Setup Criterion and Optimizer

        criterion = torch.nn.CrossEntropyLoss()

        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        # 4. Train the Model

        train_losses = train(model,
                             criterion,
                             optimizer,
                             train_dataset,
                             data_loader,
                             epochs=EPOCHS)

        # 5. Calculate Validation Loss

        with torch.no_grad():
            valid_samples = valid_dataset[:]

            outputs = model(valid_samples)

            valid_loss = criterion(outputs, valid_samples.label)
            valid_losses.append(valid_loss)

        end_time = time()

        models.append(model)
        train_losses_list.append(train_losses)

        l.write(f'Model completed in {(end_time - start_time)/60:.02f}m.\n')

    l.write('# Results')

    uniform_mask = [hp['weighting'] == 'uniform' for hp in hyperparams_list]

    models = [m for i, m in enumerate(models) if uniform_mask[i]]
    train_losses_list = [
        losses for i, losses in enumerate(train_losses_list) if uniform_mask[i]
    ]
    valid_losses = [
        loss.item() for i, loss in enumerate(valid_losses) if uniform_mask[i]
    ]

    best_model_idx = valid_losses.index(min(valid_losses))
    best_model = models[best_model_idx]

    l.write(f'Best Model: {best_model_idx+1}')
    l.write('Computing Model Accuracy...')

    samples = valid_dataset[:]

    predictions = best_model.predict(samples)

    total = len(samples.label)
    correct = torch.sum(predictions == samples.label)

    l.write(f'Accuracy of Model: {(float(correct) / total)*100:.02f}%.')

    l.write('Persisting Models...')

    with s3_write('ml/models/news_classifier/glove_model.torch', 'b') as file:
        torch.save(best_model.state_dict(), file)

    l.write('Done!')
コード例 #14
0
ファイル: main.py プロジェクト: timt51/question_retrieval
import models
import train_utils
import helpers

##############################################################################
# Settings
##############################################################################
CUDA = False

##############################################################################
# Load the dataset
##############################################################################
Data = namedtuple("Data", \
        "corpus train dev test embeddings word_to_index")
data_utils.download_ask_ubuntu_dataset()
EMBEDDINGS, WORD_TO_INDEX = data_utils.load_embeddings()
CORPUS = data_utils.load_corpus(WORD_TO_INDEX)
TRAIN_DATA = data_utils.load_train_data()
DEV_DATA, TEST_DATA = data_utils.load_eval_data()
DATA = Data(CORPUS, TRAIN_DATA, DEV_DATA, TEST_DATA,\
            EMBEDDINGS, WORD_TO_INDEX)

##############################################################################
# Train and evaluate the models for Part 1
##############################################################################
RESULTS = []
MARGINS = [0.2]
MAX_EPOCHS = 50
BATCH_SIZE = 32
FILTER_WIDTHS = [3]
POOL_METHOD = "average"
コード例 #15
0
def main():

    print('Loading and Setting Up Data...')

    embeddings = data_utils.load_embeddings(
        './data/glove.6B/glove.6B.100d.txt', embedding_dim=100)

    data = pd.read_json('./data/train_data.json', orient='records')

    train_test_split = 0.95
    split_idx = math.floor(len(data) * train_test_split)

    train_data = data.iloc[0:split_idx]
    valid_data = data.iloc[split_idx:]

    encoding = WordEmbeddingEncoding(data, embeddings)
    encoding.prepare()

    train_dataset = WordTokenDataset(train_data, encoding)
    train_dataset.prepare()

    valid_dataset = WordTokenDataset(valid_data, encoding)
    valid_dataset.prepare()

    print('Creating Model...')

    hyperparams_list = [
        {
            'weighting': 'uniform',
            'lr': 0.001,
            'batch_size': 100
        },
        {
            'weighting': 'uniform',
            'lr': 0.01,
            'batch_size': 100
        },
        {
            'weighting': 'uniform',
            'lr': 0.001,
            'batch_size': 50
        },
        {
            'weighting': 'uniform',
            'lr': 0.01,
            'batch_size': 50
        },
        {
            'weighting': 'tf_idf',
            'lr': 0.001,
            'batch_size': 100
        },
        {
            'weighting': 'tf_idf',
            'lr': 0.01,
            'batch_size': 100
        },
        {
            'weighting': 'tf_idf',
            'lr': 0.001,
            'batch_size': 50
        },
        {
            'weighting': 'tf_idf',
            'lr': 0.01,
            'batch_size': 50
        },
    ]

    class Model(torch.nn.Module):
        def __init__(self, embeddings, n_classes, weighting):
            super(Model, self).__init__()

            self.weighting = weighting

            torch_embeddings = torch.FloatTensor(embeddings.values)
            self.embedding_bag = torch.nn.EmbeddingBag.from_pretrained(
                torch_embeddings, mode='sum')
            self.linear = torch.nn.Linear(self.embedding_bag.embedding_dim,
                                          n_classes)

        def forward(self, samples):
            if weighting == 'tf_idf':
                weights = samples.create_tf_idf_weights()
            else:
                weights = samples.create_uniform_weights()

            x = self.embedding_bag(samples.sequence,
                                   samples.offset,
                                   per_sample_weights=weights)
            output = self.linear(x)
            return output

        def predict(self, samples):
            with torch.no_grad():
                outputs = self(samples)
                predictions = torch.argmax(outputs, axis=1)

            return predictions

    print('Training the Model...')

    def train(model,
              criterion,
              optimizer,
              dataset,
              data_loader,
              epochs,
              log=True):
        train_losses = []

        for epoch in range(epochs):
            losses = []

            for i, samples in enumerate(data_loader):
                optimizer.zero_grad()
                output = model(samples)
                loss = criterion(output, samples.label)
                loss.backward()
                optimizer.step()

                losses.append(loss)

            train_loss = torch.mean(torch.stack(losses))
            train_losses.append(train_loss)

            if log and (epoch + 1) % 10 == 0:
                train_loss_estimator_size = 10000
                train_loss_estimator_start = max(
                    1,
                    len(dataset) - train_loss_estimator_size)
                random_start = torch.randint(high=train_loss_estimator_start,
                                             size=(1, )).item()

                samples = dataset[random_start:(random_start +
                                                train_loss_estimator_size)]
                predictions = model.predict(samples)
                labels = samples.label

                total = len(labels)
                correct = torch.sum(labels == predictions)

                print(f'Epoch {epoch + 1}')
                print(f'Accuracy: {float(correct)/total*100:.02f}%.')
                print(f'Training Loss: {train_loss.item()}')
                print()

        return train_losses

    models = []
    train_losses_list = []
    valid_losses = []

    accepted_tokens = {t for t in embeddings.index}

    for i, hyperparams in enumerate(hyperparams_list):
        print(f'Starting training Model {i+1} / {len(hyperparams_list)}...')

        start_time = time()

        batch_size = hyperparams['batch_size']
        lr = hyperparams['lr']
        weighting = hyperparams['weighting']

        # 1. Setup Data Loader

        data_loader = DataLoader(dataset=train_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 collate_fn=data_utils.collate_samples)

        # 2. Create the Model

        model = Model(embeddings=embeddings,
                      n_classes=encoding.n_classes(),
                      weighting=weighting)

        # 3. Setup Criterion and Optimizer

        criterion = torch.nn.CrossEntropyLoss()

        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        # 4. Train the Model

        train_losses = train(model,
                             criterion,
                             optimizer,
                             train_dataset,
                             data_loader,
                             epochs=EPOCHS)

        # 5. Calculate Validation Loss

        with torch.no_grad():
            valid_samples = valid_dataset[:]

            outputs = model(valid_samples)

            valid_loss = criterion(outputs, valid_samples.label)
            valid_losses.append(valid_loss)

        end_time = time()

        models.append(model)
        train_losses_list.append(train_losses)

        print(f'Model completed in {(end_time - start_time)/60:.02f}m.')
        print()

    print('Checking Results...')

    uniform_mask = [hp['weighting'] == 'uniform' for hp in hyperparams_list]

    uniform_models = [m for i, m in enumerate(models) if uniform_mask[i]]
    uniform_train_losses_list = [
        losses for i, losses in enumerate(train_losses_list) if uniform_mask[i]
    ]
    uniform_valid_losses = [
        loss.item() for i, loss in enumerate(valid_losses) if uniform_mask[i]
    ]

    tf_idf_models = [m for i, m in enumerate(models) if not uniform_mask[i]]
    tf_idf_train_losses_list = [
        losses for i, losses in enumerate(train_losses_list)
        if not uniform_mask[i]
    ]
    tf_idf_valid_losses = [
        loss.item() for i, loss in enumerate(valid_losses)
        if not uniform_mask[i]
    ]

    best_uniform_model_idx = uniform_valid_losses.index(
        min(uniform_valid_losses))
    best_uniform_model = uniform_models[best_uniform_model_idx]

    best_tf_idf_model_idx = tf_idf_valid_losses.index(min(tf_idf_valid_losses))
    best_tf_idf_model = tf_idf_models[best_tf_idf_model_idx]

    print(f'Best Uniform Model: {best_uniform_model_idx+1}')
    print(f'Best TF-IDF Model:  {best_tf_idf_model_idx+1}')

    print('Computing Uniform Model Accuracy...')

    samples = valid_dataset[:]

    uniform_predictions = best_uniform_model.predict(valid_samples)

    total = len(valid_samples.label)
    correct = torch.sum(uniform_predictions == valid_samples.label)

    print(f'Accuracy of Uniform Model: {(float(correct) / total)*100:.02f}%.')

    print('Computing TF-IDF Model Accuracy...')

    tf_idf_predictions = best_tf_idf_model.predict(samples)

    total = len(samples.label)
    correct = torch.sum(tf_idf_predictions == samples.label)

    print(f'Accuracy of TF-IDF Model: {(float(correct) / total)*100:.02f}%.')

    print('Persisting Models...')

    torch.save(best_uniform_model.state_dict(),
               './models/uniform_glove_model.torch')
    torch.save(best_tf_idf_model.state_dict(), './models/tf_idf_model.torch')

    print('Done!')