plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss {}'.format( datasets_helper.get_dataset_name())) plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig( results_saver.get_plot_path(datasets_helper.get_dataset_name(), "loss")) plt.clf() acc = history.history['acc'] val_acc = history.history['val_acc'] plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy {}'.format( datasets_helper.get_dataset_name())) plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig( results_saver.get_plot_path(datasets_helper.get_dataset_name(), "acc")) plt.clf() results_saver.add_log("Finished testing dataset {}".format( datasets_helper.get_dataset_name())) results_saver.write_2D_list("results", results) results_saver.end_logging()
doc_term_matrix = [dictionary.doc2bow(doc) for doc in texts]""" model = Lda(num_of_topics, num_of_important_words, passes=5, iterations=5) """gensim.models.LdaModel( doc_term_matrix, num_topics=num_of_topics, id2word=dictionary, passes=2, iterations=2)""" #LDA section test_LDA = False if test_LDA: model.train(documents) topic_words_lda = extract_important_words(model.get_topics(), True) print(topic_words_lda) log_writer.write_2D_list('topic_words_lda', topic_words_lda, 'w+') test_model(documents, labels, model, log_writer, 'standard_lda') #plot_clustering_chart(model,True,documents,log_writer,'lda',dataset_helper.get_dataset_name(),dataset_helper.get_num_of_topics()) measureCoherence(topic_words_lda, log_writer, model.dictionary, documents, 'lda', dataset_helper.get_dataset_name()) else: model.dictionary = corpora.Dictionary( [text.split() for text in documents]) neural_lda_in = NeuralTopicMatrix(weight_in, reverse_word_map, num_of_topics, tokenizer) neural_lda_out = NeuralTopicMatrix(weight_out, reverse_word_map, num_of_topics, tokenizer) #neural_lda_combined = NeuralTopicMatrix(combined_weight, reverse_word_map,num_of_topics,tokenizer) test_model(documents, labels, neural_lda_in, log_writer, 'neural_lda_in') test_model(documents, labels, neural_lda_out, log_writer, 'neural_lda_out') #test_model(documents, labels, neural_lda_combined, log_writer,'neural_lda_combined')
datasets_helper, preprocess=preprocess, preload_dataset=True, is_predicting=False, tokenizer_mode=tokenizer_mode) result = model.evaluate(x=test) print(result) result.append(datasets_helper.get_dataset_name()) #result.append(model.summary()) results.append(result) results_saver.add_log("Done. Finishing this dataset.") gnr = TrainingTextGenerator(datasets_helper.get_test_file_path(), batch_size, datasets_helper.get_num_of_test_texts(), num_of_words, tokenizer, ";", datasets_helper, preprocess=preprocess, preload_dataset=True, is_predicting=True, tokenizer_mode=tokenizer_mode) finish_dataset(model, gnr, datasets_helper, results_saver, history) results_saver.add_log( "Finished testing dataset {}".format( datasets_helper.get_dataset_name()), True) results_saver.write_2D_list("results", results, 'a+') results_saver.end_logging()
datasets_helper.get_dataset_name())) texts_for_train = datasets_helper.get_dataset(DatasetType.TRAIN) log_writer.add_log("Preprocessing finished") log_writer.add_log( "Starting preprocessing texts of {} for testing".format( datasets_helper.get_dataset_name())) texts_for_testing = datasets_helper.get_dataset(DatasetType.TEST) log_writer.add_log("Preprocessing finished") statistics = [] tester.set_new_preprocess_docs(texts_for_train, texts_for_testing) test_params = { "dataset_name": datasets_helper.get_dataset_name(), 'dataset_helper': datasets_helper } tester.do_test(model, num_of_tests, statistics, models_params[model], test_params, is_stable[model]) statistics.append([datasets_helper.get_dataset_name()]) statistics.append([]) output_csv.extend(statistics) log_writer.write_2D_list( "stats".format(datasets_helper.get_dataset_name(), start_time), output_csv, 'a+') log_writer.add_log( 'Done testing {} dataset.'.format( datasets_helper.get_dataset_name()), True) log_writer.end_logging()
log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, regularization)) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, 'g', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss {}'.format( dataset_helper.get_dataset_name())) plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig(log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss")) plt.clf() """topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in] topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out] log_writer = LogWriter(log_file_desc='LDATestsRegularize{}'.format(regularization)) log_writer.write_2D_list('topic_words_in', topic_words_in) log_writer.write_2D_list('topic_words_out', topic_words_out)""" topic_words_in_max = get_extremes(weight_in, num_of_topics, num_of_important_words, reverse_word_map, True, 'topic_words_in_max', log_writer, dataset_helper.get_dataset_name()) topic_words_in_min = get_extremes(weight_in, num_of_topics, num_of_important_words, reverse_word_map, False, 'topic_words_in_min', log_writer, dataset_helper.get_dataset_name()) topic_words_out_max = get_extremes(weight_out, num_of_topics, num_of_important_words, reverse_word_map,
data_sets[i][3], start_time, i, model_settings_index, index, j)) statistics[len(statistics) - 1].append(accuracy) log_writer.add_log( "Testing LSA model done with {}% accuracy".format( accuracy * 100)) log_writer.add_log("\n\n\n") statistics.append([]) statistics_to_merge.append(statistics) """for model_settings_index, model_settings in enumerate(hdp_variations): for j in range(num_of_test): test_checker_hdp = TestChecker(texts_for_testing, data_sets[i][2], log_writer) hdp = Hdp(4, 15) hdp.train(texts_for_train) log_writer.add_log("Starting testing HDP model") accuracy = test_checker_hdp.test_model(hdp, "\\results\\hdp\\{}\\{}\\{}\\{}".format(i, model_settings_index, index, j)) log_writer.add_log("Testing HDP model done with {}% accuracy".format(accuracy * 100)) log_writer.add_log("\n\n\n")""" output_lda_csv = [] for item in statistics_to_merge: for statistic in item: output_lda_csv.append(statistic) log_writer.write_2D_list( "\\results\\results-stats\\stats{}{}".format(data_sets[i][3], start_time), output_lda_csv) log_writer.end_logging()