Beispiel #1
0
def optimize_model(args):
    print(args)
    datasets_helper = Dataset_Helper(False)
    datasets_helper.set_wanted_datasets([args['dataset_num']])
    datasets_helper.next_dataset()
    tokenizer = Tokenizer(num_words=args['num_of_words'])
    generator = datasets_helper.text_generator()
    tokenizer.fit_on_texts(generator)
    optimizer = create_optimizer(args['optimizer'], args['learning_rate'])
    model = resolve_network_type(args['network_type'])
    model.set_params(args)
    model.optimizer = optimizer
    if args['network_type'] == 'embedding':
        model.tokenizer = tokenizer
    model.compile_model()
    model.fit(datasets_helper=datasets_helper,
              tokenizer=tokenizer,
              validation_count=500)
    results = model.evaluate(datasets_helper=datasets_helper,
                             tokenizer=tokenizer)
    print(results)
    args['results_saver'].write_any(
        'logs', [get_important_params_from_args(results[1], args)], 'a')
    del model
    del tokenizer
    del generator
    del datasets_helper
    tf.compat.v2.keras.backend.clear_session()
    return -np.amax(results[1])
    seed(42)
    tf.random.set_seed(42)
    test_name = param[0]
    i += 1
    #config = tf.compat.v1.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} )
    #sess = tf.compat.v1.Session(config=config)
    #tf.keras.backend.set_session(sess)
    #results_saver = LogWriter(log_file_desc="Autoencoder")
    results = []

    #mycolors = np.array([color for name, color in mcolors.XKCD_COLORS.items()])
    from sys import getsizeof
    num_of_words = 10000

    dataset_helper = Dataset_Helper(True)
    dataset_helper.set_wanted_datasets([param[1]])
    dataset_helper.next_dataset()
    num_of_topics = dataset_helper.get_num_of_topics()
    documents = dataset_helper.get_texts_as_list()
    labels = dataset_helper.get_labels(dataset_helper.get_train_file_path())
    tokenizer = Tokenizer(num_words=num_of_words)
    tokenizer.fit_on_texts(documents)
    #items= tokenizer.word_index
    reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
    matrix = tokenizer.texts_to_matrix(documents, mode='binary')
    print(getsizeof(documents))
    print(getsizeof(tokenizer))
    print(getsizeof(matrix))
    #mydict = corpora.Dictionary([line.split() for line in documents],prune_at=num_of_words)
    #corpus = [mydict.doc2bow(line.split()) for line in documents]
import matplotlib.pyplot as plt
from training_text_generator_RNN import Training_Text_Generator_RNN
from dataset_loader.dataset_helper import Dataset_Helper
from results_saver import LogWriter
import os
import sys
from neural_networks.aliaser import *

file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
"""config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} )
sess = tf.Session(config=config)
keras.backend.set_session(sess)"""

datasets_helper = Dataset_Helper(preprocess=True)
datasets_helper.set_wanted_datasets([0])
results_saver = LogWriter(log_file_desc="Bidirectional-no-relu")
results = []
num_of_words = 15000

while datasets_helper.next_dataset():
    results_saver.add_log("Starting testing dataset {}".format(
        datasets_helper.get_dataset_name()))
    validation_count = 200  #datasets_helper.get_num_of_train_texts() // 10
    tokenizer = Tokenizer(num_words=num_of_words)  #,
    #filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n',
    #lower=False, split=' ')
    generator = datasets_helper.text_generator()
    results_saver.add_log("Starting preprocessing and tokenization.")
    tokenizer.fit_on_texts(generator)
    results_saver.add_log("Done. Building model now.")
Beispiel #4
0
    ModelType.DT: {
        'max_features': max_feauters
    }
}
start_time = get_time_in_millis()
preprocess = True
models_for_test = test_model.keys()
for model in models_for_test:
    if not test_model[model]:
        continue
    log_writer = LogWriter(log_file_desc='_{}_{}'.format(
        'prep' if preprocess else 'no-prep', model.name),
                           result_desc='Classic')
    tester = GeneralTester(log_writer, start_time)
    datasets_helper = Dataset_Helper(preprocess=preprocess)
    datasets_helper.set_wanted_datasets([0, 2, 3])
    while datasets_helper.next_dataset():
        if 'topic_count' in models_params[model]:
            models_params[model][
                'topic_count'] = datasets_helper.get_num_of_topics()
        topic_names = [(index, item) for index, item in enumerate(
            datasets_helper.get_dataset_topic_names())]
        tester.set_new_dataset(datasets_helper.get_num_of_topics(),
                               topic_names)
        output_csv = []
        """for key,value in test_model.items():
            if not value:
                models_params.pop(key)"""
        log_writer.write_any("model-settings",
                             json.dumps(models_params[model]), 'w+', True)
        seed = 5
Beispiel #5
0
sys.path.append(file_dir)
root = tk.Tk()
root.withdraw()
test_name = simpledialog.askstring(title="Test Name",
                                   prompt="Insert test name:",
                                   initialvalue='LDATests')
#config = tf.compat.v1.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} )
#sess = tf.compat.v1.Session(config=config)
#tf.keras.backend.set_session(sess)
#results_saver = LogWriter(log_file_desc="Autoencoder")
results = []
#mycolors = np.array([color for name, color in mcolors.XKCD_COLORS.items()])

num_of_words = 10000
dataset_helper = Dataset_Helper(True)
dataset_helper.set_wanted_datasets([2])
dataset_helper.next_dataset()
num_of_topics = dataset_helper.get_num_of_topics()
documents = dataset_helper.get_texts_as_list()
labels = dataset_helper.get_labels(dataset_helper.get_train_file_path())
tokenizer = Tokenizer(num_words=num_of_words)
tokenizer.fit_on_texts(documents)
#items= tokenizer.word_index
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
matrix = tokenizer.texts_to_matrix(documents, mode='binary')

#mydict = corpora.Dictionary([line.split() for line in documents],prune_at=num_of_words)
#corpus = [mydict.doc2bow(line.split()) for line in documents]

#tfidf = TfidfModel(corpus)
#print(tfidf)