Beispiel #1
0
def test_train_rnn_model():
    data_dir = './data/'
    data = load_data.load_rotten_tomatoes_sentiment_analysis_dataset(data_dir)
    acc, loss = train_rnn_model.train_rnn_model(data)
    assert acc == pytest.approx(0.68, 0.02)
    assert loss == pytest.approx(0.82, 0.02)
 def test_train_sequence_model(self):
     data = load_data.load_rotten_tomatoes_sentiment_analysis_dataset(
         data_dir)
     acc, loss = train_sequence_model.train_sequence_model(data)
     self.assertTrue(0.66 < acc < 0.70)
     self.assertTrue(0.80 < loss < 0.84)
    history = model.fit(
            x_train,
            train_labels,
            epochs=epochs,
            callbacks=callbacks,
            validation_data=(x_val, val_labels),
            verbose=2,  # Logs once per epoch.
            batch_size=batch_size)

    # Print results.
    history = history.history
    print('Validation accuracy: {acc}, loss: {loss}'.format(
            acc=history['val_acc'][-1], loss=history['val_loss'][-1]))

    # Save model.
    model.save('rotten_tomatoes_sepcnn_model.h5')
    return history['val_acc'][-1], history['val_loss'][-1]


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='./data',
                        help='input data directory')
    FLAGS, unparsed = parser.parse_known_args()

    # Using the Rotten tomatoes movie reviews dataset to demonstrate
    # training sequence model.
    data = load_data.load_rotten_tomatoes_sentiment_analysis_dataset(
            FLAGS.data_dir)
    train_sequence_model(data)
Beispiel #4
0
@author: nikhil
"""
import numpy as np
import load_data
import explore_data

# ((train_texts, train_labels), (validation_texts, validation_labels)) = load_data.load_imdb_sentiment_analysis_dataset("./data/")
# explore_data.get_num_classes(train_labels)
# explore_data.get_num_words_per_sample(train_texts)
# explore_data.plot_frequency_distribution_of_ngrams(train_texts)
# explore_data.plot_sample_length_distribution(train_texts)
# explore_data.plot_class_distribution(train_labels)



((train_texts, train_labels), (validation_texts, validation_labels)) = load_data.load_rotten_tomatoes_sentiment_analysis_dataset("./data/")


count = 0
for i in train_labels:
	if i == 1:
		count+=1
c2=0
c3=0
train_labels_final = []
train_texts_final = []
for i in range(len(train_labels)):
	if train_labels[i]==0 or train_labels[i]==4:
		continue
	elif train_labels[i]==1:
		train_labels_final.append(1)