patience=param[2],
                                                verbose=0,
                                                mode='auto',
                                                baseline=None,
                                                restore_best_weights=False)
                              ])
    weight_in = autoencoder.get_weights()[0]
    weight_out = autoencoder.get_weights()[2]
    #tst = autoencoder.get_weights()
    blob = np.array([])

    weight_in = weight_in.transpose()
    #combined_weight = np.dot(weight_in.transpose(), weight_out)
    num_of_important_words = 20

    log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, ""),
                           result_desc="NeuralTopicModel")

    log_writer.write_any('model', autoencoder.to_json(), 'w+', True)
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, 'g', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss {}'.format(
        dataset_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss"))
    plt.clf()
from dataset_loader.dataset_helper import Dataset_Helper
from results_saver import LogWriter
import os
import sys
from neural_networks.aliaser import *
import tkinter as tk
from tkinter import simpledialog

file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
root = tk.Tk()
root.withdraw()

preprocess = True
datasets_helper = Dataset_Helper(preprocess)
results_saver = LogWriter(log_file_desc=simpledialog.askstring(
    title="Test Name", prompt="Insert test name:", initialvalue='CONV_GRU_'))
results = []
num_of_words = 10000

while datasets_helper.next_dataset():
    results_saver.add_log("Starting testing dataset {}".format(
        datasets_helper.get_dataset_name()))
    validation_count = datasets_helper.get_num_of_train_texts() // 10
    tokenizer = Tokenizer(num_words=num_of_words,
                          filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n',
                          lower=False,
                          split=' ')
    generator = datasets_helper.text_generator()
    results_saver.add_log("Starting preprocessing and tokenization.")
    tokenizer.fit_on_texts(generator)
    results_saver.add_log("Done. Building model now.")
Exemple #3
0
    ModelType.RF: {
        'n_estimators': 20,
        'max_features': max_feauters
    },
    ModelType.DT: {
        'max_features': max_feauters
    }
}
start_time = get_time_in_millis()
preprocess = True
models_for_test = test_model.keys()
for model in models_for_test:
    if not test_model[model]:
        continue
    log_writer = LogWriter(log_file_desc='_{}_{}'.format(
        'prep' if preprocess else 'no-prep', model.name),
                           result_desc='Classic')
    tester = GeneralTester(log_writer, start_time)
    datasets_helper = Dataset_Helper(preprocess=preprocess)
    datasets_helper.set_wanted_datasets([0, 2, 3])
    while datasets_helper.next_dataset():
        if 'topic_count' in models_params[model]:
            models_params[model][
                'topic_count'] = datasets_helper.get_num_of_topics()
        topic_names = [(index, item) for index, item in enumerate(
            datasets_helper.get_dataset_topic_names())]
        tester.set_new_dataset(datasets_helper.get_num_of_topics(),
                               topic_names)
        output_csv = []
        """for key,value in test_model.items():
            if not value:
from training_text_generator_RNN import Training_Text_Generator_RNN
from dataset_loader.dataset_helper import Dataset_Helper
from results_saver import LogWriter
import os
import sys
from neural_networks.aliaser import *

file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
"""config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} )
sess = tf.Session(config=config)
keras.backend.set_session(sess)"""

datasets_helper = Dataset_Helper(preprocess=True)
datasets_helper.set_wanted_datasets([0])
results_saver = LogWriter(log_file_desc="Bidirectional-no-relu")
results = []
num_of_words = 15000

while datasets_helper.next_dataset():
    results_saver.add_log("Starting testing dataset {}".format(
        datasets_helper.get_dataset_name()))
    validation_count = 200  #datasets_helper.get_num_of_train_texts() // 10
    tokenizer = Tokenizer(num_words=num_of_words)  #,
    #filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n',
    #lower=False, split=' ')
    generator = datasets_helper.text_generator()
    results_saver.add_log("Starting preprocessing and tokenization.")
    tokenizer.fit_on_texts(generator)
    results_saver.add_log("Done. Building model now.")
from dataset_loader.dataset_helper import Dataset_Helper
from results_saver import LogWriter, finish_dataset
import os
import sys
import tkinter as tk
from tkinter import simpledialog

file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
root = tk.Tk()
root.withdraw()

preprocess = True
datasets_helper = Dataset_Helper(preprocess)
results_saver = LogWriter(log_file_desc=simpledialog.askstring(
    title="Test Name",
    prompt="Insert test name:",
    initialvalue='LSTM_Embedding_Learned_'))
results = []
num_of_words = 7500
max_seq_len = 250
datasets_helper.set_wanted_datasets([3])
while datasets_helper.next_dataset():
    results_saver.add_log("Starting testing dataset {}".format(
        datasets_helper.get_dataset_name()))
    tokenizer = Tokenizer(num_words=num_of_words)
    #filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n',
    #lower=False, split=' ')
    generator = datasets_helper.text_generator()
    results_saver.add_log("Starting preprocessing and tokenization.")
    tokenizer.fit_on_texts(generator)
    results_saver.add_log("Done. Building model now.")
Exemple #6
0
                                            min_delta=0,
                                            patience=1000,
                                            verbose=0,
                                            mode='auto',
                                            baseline=None,
                                            restore_best_weights=False)
                          ])
weight_in = autoencoder.get_weights()[2]
weight_out = autoencoder.get_weights()[4]
blob = np.array([])
weight_in = weight_in.transpose()
#tst = autoencoder.get_weights()
num_of_important_words = 20
from results_saver import LogWriter

log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, regularization))
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'g', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss {}'.format(
    dataset_helper.get_dataset_name()))
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig(log_writer.get_plot_path(dataset_helper.get_dataset_name(),
                                     "loss"))
plt.clf()
"""topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in]
topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out]
Exemple #7
0
        'dropouts': hp.randint('dropouts', 3),
        'dropout_values': hp.uniform('dropout_values', 0.01, 0.2),
        'epochs': 20,  #hp.randint('epochs',20),
        'batch_size': batch_size,
        'learning_rate': hp.choice('learning_rate', [0.001, 0.01, 0.0005]),
        'optimizer': hp.choice('optimizer', ['adam', 'rmsprop']),
        'results_saver': results_saver
    }
    return space


file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)

datasets_helper = Dataset_Helper(False)
results_saver = LogWriter(log_file_desc="hyperopt-best-param-search")
results = []
datasets_helper.set_wanted_datasets([1])
models_to_test = ['lstm', 'dense', 'embedding', 'bidi']
"""datasets_helper.next_dataset()
space = create_base_params('lstm',datasets_helper)
smpl = sample(space)
print(sample(space))"""
for model in models_to_test:
    while datasets_helper.next_dataset():
        space = create_base_params(model, datasets_helper, results_saver)
        best = fmin(optimize_model,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=30,
                    max_queue_len=1,
Exemple #8
0
import numpy as np
import matplotlib.pyplot as plt
from training_text_generator_RNN_embedding import Training_Text_Generator_RNN_Embedding
from dataset_loader.dataset_helper import Dataset_Helper
from results_saver import LogWriter
import os
import sys
from neural_networks.aliaser import *

file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)

datasets_helper = Dataset_Helper()
results_saver = LogWriter(
    log_file_desc="Bidirectional-recurrent-dropout-Embed-preprocessing")
results = []
num_of_words = 10000

while datasets_helper.next_dataset():
    results_saver.add_log("Starting testing dataset {}".format(
        datasets_helper.get_dataset_name()))
    validation_count = datasets_helper.get_num_of_train_texts() // 10
    tokenizer = Tokenizer(num_words=num_of_words,
                          filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n',
                          lower=False,
                          split=' ')
    generator = datasets_helper.text_generator()
    results_saver.add_log("Starting preprocessing and tokenization.")
    tokenizer.fit_on_texts(generator)
    results_saver.add_log("Done. Building model now.")
Exemple #9
0
        return

    for item in all_vars[depth]:
        f = [a for a in field]
        f.append(item)
        create_variations(depth + 1, f, all_vars, possibilities)


def get_time_in_millis():
    """
    :return: system time in milliseconds
    """
    return int(round(time.time()) * 1000)


log_writer = LogWriter("log.txt")
base_path = os.getcwd()
csv_folder = base_path + "\\csv_folder\\"
data_sets = [
    (csv_folder + "4" + "\\train.csv", csv_folder + "4" + "\\test.csv", 20,
     "-20newsgroups-"),
    (csv_folder + "1" + "\\train.csv", csv_folder + "1" + "\\test.csv", 10,
     "-reuters-")
]  #,(csv_folder+"2"+"\\train.csv",csv_folder+"2"+"\\test.csv",14)]
#data_sets = [(csv_folder+"2"+"\\train.csv",csv_folder+"2"+"\\test.csv",14)]

strip_nums_params = use_stemmer_params = use_lemmatizer_params = strip_short_params = [
    True, False
]
preproces_all_vals = [
    strip_nums_params, use_stemmer_params, use_lemmatizer_params,