pos_data_train = read_data_from_csv("./training_training_data_pos.csv") pos_data_test = read_data_from_csv("./testing_data_pos.csv") neg_data_train = read_data_from_csv("./training_training_data_neg.csv") neg_data_test = read_data_from_csv("./testing_data_neg.csv") print(pos_data_test) pos_labels_train = np.ones(len(pos_data_train), dtype=int) neg_labels_train = np.zeros(len(neg_data_train), dtype=int) train_data = np.concatenate((pos_data_train, neg_data_train)) train_label = np.concatenate((pos_labels_train, neg_labels_train)) pos_labels_test = np.ones(len(pos_data_test), dtype=int) neg_labels_test = np.zeros(len(neg_data_test), dtype=int) test_data = np.concatenate((pos_data_test, neg_data_test)) test_label = np.concatenate((pos_labels_test, neg_labels_test)) histories = [] labels = ["CNN", "NN"] cnn_history = model.train(train_data, train_label, test_data, test_label) nn_history = nn.train(train_data, train_label, test_data, test_label) histories.append(cnn_history) histories.append(nn_history) plot_accuracy(histories, labels) # model.plot_accuracy() # model.eval(test_data, test_label) # model.predict(test_data)
from preprocessor import Preprocessor from text_cnn import TextCNN #Define parameters : model_name = 'merged' # other sentiment analysis predefined names include : imdb, yelp, amazon; QA models include : qa_1000 and qa_5500 embedding_dim = 50 n_classes = 2 #change to 6 for question answering task max_words = 100 kernel_sizes = [1, 2, 3] n_filters = [40, 40, 40] pre = Preprocessor(source=model_name, max_words=max_words, embedding_dim=embedding_dim, n_classes=n_classes) X_train, y_train, X_test, y_test = pre.get_sequences(create_tokenizer=True) #embedding_matrix = pre.create_embedding_matrix() text_cnn = TextCNN(embedding_dim=embedding_dim, text_length=max_words, n_class=n_classes, kernel_sizes=kernel_sizes, n_filters=n_filters, batch_size=32, epochs=15, hidden_layers=[10, 10, 10]) model = text_cnn.train(X_train, y_train, X_test, y_test, None, len(pre.tokenizer.word_index) + 1) model.summary() text_cnn.evaluate(X_train, y_train, X_test, y_test) text_cnn.save_model(model_name)