def optimize_model(args): print(args) datasets_helper = Dataset_Helper(False) datasets_helper.set_wanted_datasets([args['dataset_num']]) datasets_helper.next_dataset() tokenizer = Tokenizer(num_words=args['num_of_words']) generator = datasets_helper.text_generator() tokenizer.fit_on_texts(generator) optimizer = create_optimizer(args['optimizer'], args['learning_rate']) model = resolve_network_type(args['network_type']) model.set_params(args) model.optimizer = optimizer if args['network_type'] == 'embedding': model.tokenizer = tokenizer model.compile_model() model.fit(datasets_helper=datasets_helper, tokenizer=tokenizer, validation_count=500) results = model.evaluate(datasets_helper=datasets_helper, tokenizer=tokenizer) print(results) args['results_saver'].write_any( 'logs', [get_important_params_from_args(results[1], args)], 'a') del model del tokenizer del generator del datasets_helper tf.compat.v2.keras.backend.clear_session() return -np.amax(results[1])
preprocess = True datasets_helper = Dataset_Helper(preprocess) results_saver = LogWriter(log_file_desc=simpledialog.askstring( title="Test Name", prompt="Insert test name:", initialvalue='CONV_GRU_')) results = [] num_of_words = 10000 while datasets_helper.next_dataset(): results_saver.add_log("Starting testing dataset {}".format( datasets_helper.get_dataset_name())) validation_count = datasets_helper.get_num_of_train_texts() // 10 tokenizer = Tokenizer(num_words=num_of_words, filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n', lower=False, split=' ') generator = datasets_helper.text_generator() results_saver.add_log("Starting preprocessing and tokenization.") tokenizer.fit_on_texts(generator) results_saver.add_log("Done. Building model now.") batch_size = 256 gauss_noise = 0.5 epochs = 1 val_split = 0.2 val_data_count = int(datasets_helper.get_num_of_train_texts() * val_split) model = Sequential() model.add(Conv1D(40, 1, activation='relu', input_shape=(1, num_of_words))) model.add(MaxPooling1D(1)) model.add(Conv1D(40, 1, activation='relu')) model.add(GRU(40, dropout=0.1, recurrent_dropout=0.5)) model.add(Dense(datasets_helper.get_num_of_topics(), activation='softmax'))