Exemple #1
0
pos_data_train = read_data_from_csv("./training_training_data_pos.csv")
pos_data_test = read_data_from_csv("./testing_data_pos.csv")
neg_data_train = read_data_from_csv("./training_training_data_neg.csv")
neg_data_test = read_data_from_csv("./testing_data_neg.csv")
print(pos_data_test)

pos_labels_train = np.ones(len(pos_data_train), dtype=int)
neg_labels_train = np.zeros(len(neg_data_train), dtype=int)

train_data = np.concatenate((pos_data_train, neg_data_train))
train_label = np.concatenate((pos_labels_train, neg_labels_train))

pos_labels_test = np.ones(len(pos_data_test), dtype=int)
neg_labels_test = np.zeros(len(neg_data_test), dtype=int)

test_data = np.concatenate((pos_data_test, neg_data_test))
test_label = np.concatenate((pos_labels_test, neg_labels_test))

histories = []
labels = ["CNN", "NN"]
cnn_history = model.train(train_data, train_label, test_data, test_label)
nn_history = nn.train(train_data, train_label, test_data, test_label)
histories.append(cnn_history)
histories.append(nn_history)

plot_accuracy(histories, labels)
# model.plot_accuracy()
# model.eval(test_data, test_label)
# model.predict(test_data)
Exemple #2
0
from preprocessor import Preprocessor
from text_cnn import TextCNN
#Define parameters :
model_name = 'merged'  # other sentiment analysis predefined names include : imdb, yelp, amazon; QA models include : qa_1000 and qa_5500
embedding_dim = 50
n_classes = 2  #change to 6 for question answering task
max_words = 100
kernel_sizes = [1, 2, 3]
n_filters = [40, 40, 40]

pre = Preprocessor(source=model_name,
                   max_words=max_words,
                   embedding_dim=embedding_dim,
                   n_classes=n_classes)
X_train, y_train, X_test, y_test = pre.get_sequences(create_tokenizer=True)

#embedding_matrix = pre.create_embedding_matrix()

text_cnn = TextCNN(embedding_dim=embedding_dim,
                   text_length=max_words,
                   n_class=n_classes,
                   kernel_sizes=kernel_sizes,
                   n_filters=n_filters,
                   batch_size=32,
                   epochs=15,
                   hidden_layers=[10, 10, 10])
model = text_cnn.train(X_train, y_train, X_test, y_test, None,
                       len(pre.tokenizer.word_index) + 1)
model.summary()
text_cnn.evaluate(X_train, y_train, X_test, y_test)
text_cnn.save_model(model_name)