Esempio n. 1
0
def optimize_model(args):
    print(args)
    datasets_helper = Dataset_Helper(False)
    datasets_helper.set_wanted_datasets([args['dataset_num']])
    datasets_helper.next_dataset()
    tokenizer = Tokenizer(num_words=args['num_of_words'])
    generator = datasets_helper.text_generator()
    tokenizer.fit_on_texts(generator)
    optimizer = create_optimizer(args['optimizer'], args['learning_rate'])
    model = resolve_network_type(args['network_type'])
    model.set_params(args)
    model.optimizer = optimizer
    if args['network_type'] == 'embedding':
        model.tokenizer = tokenizer
    model.compile_model()
    model.fit(datasets_helper=datasets_helper,
              tokenizer=tokenizer,
              validation_count=500)
    results = model.evaluate(datasets_helper=datasets_helper,
                             tokenizer=tokenizer)
    print(results)
    args['results_saver'].write_any(
        'logs', [get_important_params_from_args(results[1], args)], 'a')
    del model
    del tokenizer
    del generator
    del datasets_helper
    tf.compat.v2.keras.backend.clear_session()
    return -np.amax(results[1])
preprocess = True
datasets_helper = Dataset_Helper(preprocess)
results_saver = LogWriter(log_file_desc=simpledialog.askstring(
    title="Test Name", prompt="Insert test name:", initialvalue='CONV_GRU_'))
results = []
num_of_words = 10000

while datasets_helper.next_dataset():
    results_saver.add_log("Starting testing dataset {}".format(
        datasets_helper.get_dataset_name()))
    validation_count = datasets_helper.get_num_of_train_texts() // 10
    tokenizer = Tokenizer(num_words=num_of_words,
                          filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n',
                          lower=False,
                          split=' ')
    generator = datasets_helper.text_generator()
    results_saver.add_log("Starting preprocessing and tokenization.")
    tokenizer.fit_on_texts(generator)
    results_saver.add_log("Done. Building model now.")

    batch_size = 256
    gauss_noise = 0.5
    epochs = 1
    val_split = 0.2
    val_data_count = int(datasets_helper.get_num_of_train_texts() * val_split)
    model = Sequential()
    model.add(Conv1D(40, 1, activation='relu', input_shape=(1, num_of_words)))
    model.add(MaxPooling1D(1))
    model.add(Conv1D(40, 1, activation='relu'))
    model.add(GRU(40, dropout=0.1, recurrent_dropout=0.5))
    model.add(Dense(datasets_helper.get_num_of_topics(), activation='softmax'))