patience=param[2], verbose=0, mode='auto', baseline=None, restore_best_weights=False) ]) weight_in = autoencoder.get_weights()[0] weight_out = autoencoder.get_weights()[2] #tst = autoencoder.get_weights() blob = np.array([]) weight_in = weight_in.transpose() #combined_weight = np.dot(weight_in.transpose(), weight_out) num_of_important_words = 20 log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, ""), result_desc="NeuralTopicModel") log_writer.write_any('model', autoencoder.to_json(), 'w+', True) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, 'g', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss {}'.format( dataset_helper.get_dataset_name())) plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig( log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss")) plt.clf()
from dataset_loader.dataset_helper import Dataset_Helper from results_saver import LogWriter import os import sys from neural_networks.aliaser import * import tkinter as tk from tkinter import simpledialog file_dir = os.path.dirname(__file__) sys.path.append(file_dir) root = tk.Tk() root.withdraw() preprocess = True datasets_helper = Dataset_Helper(preprocess) results_saver = LogWriter(log_file_desc=simpledialog.askstring( title="Test Name", prompt="Insert test name:", initialvalue='CONV_GRU_')) results = [] num_of_words = 10000 while datasets_helper.next_dataset(): results_saver.add_log("Starting testing dataset {}".format( datasets_helper.get_dataset_name())) validation_count = datasets_helper.get_num_of_train_texts() // 10 tokenizer = Tokenizer(num_words=num_of_words, filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n', lower=False, split=' ') generator = datasets_helper.text_generator() results_saver.add_log("Starting preprocessing and tokenization.") tokenizer.fit_on_texts(generator) results_saver.add_log("Done. Building model now.")
ModelType.RF: { 'n_estimators': 20, 'max_features': max_feauters }, ModelType.DT: { 'max_features': max_feauters } } start_time = get_time_in_millis() preprocess = True models_for_test = test_model.keys() for model in models_for_test: if not test_model[model]: continue log_writer = LogWriter(log_file_desc='_{}_{}'.format( 'prep' if preprocess else 'no-prep', model.name), result_desc='Classic') tester = GeneralTester(log_writer, start_time) datasets_helper = Dataset_Helper(preprocess=preprocess) datasets_helper.set_wanted_datasets([0, 2, 3]) while datasets_helper.next_dataset(): if 'topic_count' in models_params[model]: models_params[model][ 'topic_count'] = datasets_helper.get_num_of_topics() topic_names = [(index, item) for index, item in enumerate( datasets_helper.get_dataset_topic_names())] tester.set_new_dataset(datasets_helper.get_num_of_topics(), topic_names) output_csv = [] """for key,value in test_model.items(): if not value:
from training_text_generator_RNN import Training_Text_Generator_RNN from dataset_loader.dataset_helper import Dataset_Helper from results_saver import LogWriter import os import sys from neural_networks.aliaser import * file_dir = os.path.dirname(__file__) sys.path.append(file_dir) """config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} ) sess = tf.Session(config=config) keras.backend.set_session(sess)""" datasets_helper = Dataset_Helper(preprocess=True) datasets_helper.set_wanted_datasets([0]) results_saver = LogWriter(log_file_desc="Bidirectional-no-relu") results = [] num_of_words = 15000 while datasets_helper.next_dataset(): results_saver.add_log("Starting testing dataset {}".format( datasets_helper.get_dataset_name())) validation_count = 200 #datasets_helper.get_num_of_train_texts() // 10 tokenizer = Tokenizer(num_words=num_of_words) #, #filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n', #lower=False, split=' ') generator = datasets_helper.text_generator() results_saver.add_log("Starting preprocessing and tokenization.") tokenizer.fit_on_texts(generator) results_saver.add_log("Done. Building model now.")
from dataset_loader.dataset_helper import Dataset_Helper from results_saver import LogWriter, finish_dataset import os import sys import tkinter as tk from tkinter import simpledialog file_dir = os.path.dirname(__file__) sys.path.append(file_dir) root = tk.Tk() root.withdraw() preprocess = True datasets_helper = Dataset_Helper(preprocess) results_saver = LogWriter(log_file_desc=simpledialog.askstring( title="Test Name", prompt="Insert test name:", initialvalue='LSTM_Embedding_Learned_')) results = [] num_of_words = 7500 max_seq_len = 250 datasets_helper.set_wanted_datasets([3]) while datasets_helper.next_dataset(): results_saver.add_log("Starting testing dataset {}".format( datasets_helper.get_dataset_name())) tokenizer = Tokenizer(num_words=num_of_words) #filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n', #lower=False, split=' ') generator = datasets_helper.text_generator() results_saver.add_log("Starting preprocessing and tokenization.") tokenizer.fit_on_texts(generator) results_saver.add_log("Done. Building model now.")
min_delta=0, patience=1000, verbose=0, mode='auto', baseline=None, restore_best_weights=False) ]) weight_in = autoencoder.get_weights()[2] weight_out = autoencoder.get_weights()[4] blob = np.array([]) weight_in = weight_in.transpose() #tst = autoencoder.get_weights() num_of_important_words = 20 from results_saver import LogWriter log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, regularization)) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, 'g', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss {}'.format( dataset_helper.get_dataset_name())) plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig(log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss")) plt.clf() """topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in] topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out]
'dropouts': hp.randint('dropouts', 3), 'dropout_values': hp.uniform('dropout_values', 0.01, 0.2), 'epochs': 20, #hp.randint('epochs',20), 'batch_size': batch_size, 'learning_rate': hp.choice('learning_rate', [0.001, 0.01, 0.0005]), 'optimizer': hp.choice('optimizer', ['adam', 'rmsprop']), 'results_saver': results_saver } return space file_dir = os.path.dirname(__file__) sys.path.append(file_dir) datasets_helper = Dataset_Helper(False) results_saver = LogWriter(log_file_desc="hyperopt-best-param-search") results = [] datasets_helper.set_wanted_datasets([1]) models_to_test = ['lstm', 'dense', 'embedding', 'bidi'] """datasets_helper.next_dataset() space = create_base_params('lstm',datasets_helper) smpl = sample(space) print(sample(space))""" for model in models_to_test: while datasets_helper.next_dataset(): space = create_base_params(model, datasets_helper, results_saver) best = fmin(optimize_model, space=space, algo=tpe.suggest, max_evals=30, max_queue_len=1,
import numpy as np import matplotlib.pyplot as plt from training_text_generator_RNN_embedding import Training_Text_Generator_RNN_Embedding from dataset_loader.dataset_helper import Dataset_Helper from results_saver import LogWriter import os import sys from neural_networks.aliaser import * file_dir = os.path.dirname(__file__) sys.path.append(file_dir) datasets_helper = Dataset_Helper() results_saver = LogWriter( log_file_desc="Bidirectional-recurrent-dropout-Embed-preprocessing") results = [] num_of_words = 10000 while datasets_helper.next_dataset(): results_saver.add_log("Starting testing dataset {}".format( datasets_helper.get_dataset_name())) validation_count = datasets_helper.get_num_of_train_texts() // 10 tokenizer = Tokenizer(num_words=num_of_words, filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n', lower=False, split=' ') generator = datasets_helper.text_generator() results_saver.add_log("Starting preprocessing and tokenization.") tokenizer.fit_on_texts(generator) results_saver.add_log("Done. Building model now.")
return for item in all_vars[depth]: f = [a for a in field] f.append(item) create_variations(depth + 1, f, all_vars, possibilities) def get_time_in_millis(): """ :return: system time in milliseconds """ return int(round(time.time()) * 1000) log_writer = LogWriter("log.txt") base_path = os.getcwd() csv_folder = base_path + "\\csv_folder\\" data_sets = [ (csv_folder + "4" + "\\train.csv", csv_folder + "4" + "\\test.csv", 20, "-20newsgroups-"), (csv_folder + "1" + "\\train.csv", csv_folder + "1" + "\\test.csv", 10, "-reuters-") ] #,(csv_folder+"2"+"\\train.csv",csv_folder+"2"+"\\test.csv",14)] #data_sets = [(csv_folder+"2"+"\\train.csv",csv_folder+"2"+"\\test.csv",14)] strip_nums_params = use_stemmer_params = use_lemmatizer_params = strip_short_params = [ True, False ] preproces_all_vals = [ strip_nums_params, use_stemmer_params, use_lemmatizer_params,