def optimize_model(args): print(args) datasets_helper = Dataset_Helper(False) datasets_helper.set_wanted_datasets([args['dataset_num']]) datasets_helper.next_dataset() tokenizer = Tokenizer(num_words=args['num_of_words']) generator = datasets_helper.text_generator() tokenizer.fit_on_texts(generator) optimizer = create_optimizer(args['optimizer'], args['learning_rate']) model = resolve_network_type(args['network_type']) model.set_params(args) model.optimizer = optimizer if args['network_type'] == 'embedding': model.tokenizer = tokenizer model.compile_model() model.fit(datasets_helper=datasets_helper, tokenizer=tokenizer, validation_count=500) results = model.evaluate(datasets_helper=datasets_helper, tokenizer=tokenizer) print(results) args['results_saver'].write_any( 'logs', [get_important_params_from_args(results[1], args)], 'a') del model del tokenizer del generator del datasets_helper tf.compat.v2.keras.backend.clear_session() return -np.amax(results[1])
seed(42) tf.random.set_seed(42) test_name = param[0] i += 1 #config = tf.compat.v1.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} ) #sess = tf.compat.v1.Session(config=config) #tf.keras.backend.set_session(sess) #results_saver = LogWriter(log_file_desc="Autoencoder") results = [] #mycolors = np.array([color for name, color in mcolors.XKCD_COLORS.items()]) from sys import getsizeof num_of_words = 10000 dataset_helper = Dataset_Helper(True) dataset_helper.set_wanted_datasets([param[1]]) dataset_helper.next_dataset() num_of_topics = dataset_helper.get_num_of_topics() documents = dataset_helper.get_texts_as_list() labels = dataset_helper.get_labels(dataset_helper.get_train_file_path()) tokenizer = Tokenizer(num_words=num_of_words) tokenizer.fit_on_texts(documents) #items= tokenizer.word_index reverse_word_map = dict(map(reversed, tokenizer.word_index.items())) matrix = tokenizer.texts_to_matrix(documents, mode='binary') print(getsizeof(documents)) print(getsizeof(tokenizer)) print(getsizeof(matrix)) #mydict = corpora.Dictionary([line.split() for line in documents],prune_at=num_of_words) #corpus = [mydict.doc2bow(line.split()) for line in documents]
import matplotlib.pyplot as plt from training_text_generator_RNN import Training_Text_Generator_RNN from dataset_loader.dataset_helper import Dataset_Helper from results_saver import LogWriter import os import sys from neural_networks.aliaser import * file_dir = os.path.dirname(__file__) sys.path.append(file_dir) """config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} ) sess = tf.Session(config=config) keras.backend.set_session(sess)""" datasets_helper = Dataset_Helper(preprocess=True) datasets_helper.set_wanted_datasets([0]) results_saver = LogWriter(log_file_desc="Bidirectional-no-relu") results = [] num_of_words = 15000 while datasets_helper.next_dataset(): results_saver.add_log("Starting testing dataset {}".format( datasets_helper.get_dataset_name())) validation_count = 200 #datasets_helper.get_num_of_train_texts() // 10 tokenizer = Tokenizer(num_words=num_of_words) #, #filters='#$%&()*+-<=>@[\\]^_`{|}~\t\n', #lower=False, split=' ') generator = datasets_helper.text_generator() results_saver.add_log("Starting preprocessing and tokenization.") tokenizer.fit_on_texts(generator) results_saver.add_log("Done. Building model now.")
ModelType.DT: { 'max_features': max_feauters } } start_time = get_time_in_millis() preprocess = True models_for_test = test_model.keys() for model in models_for_test: if not test_model[model]: continue log_writer = LogWriter(log_file_desc='_{}_{}'.format( 'prep' if preprocess else 'no-prep', model.name), result_desc='Classic') tester = GeneralTester(log_writer, start_time) datasets_helper = Dataset_Helper(preprocess=preprocess) datasets_helper.set_wanted_datasets([0, 2, 3]) while datasets_helper.next_dataset(): if 'topic_count' in models_params[model]: models_params[model][ 'topic_count'] = datasets_helper.get_num_of_topics() topic_names = [(index, item) for index, item in enumerate( datasets_helper.get_dataset_topic_names())] tester.set_new_dataset(datasets_helper.get_num_of_topics(), topic_names) output_csv = [] """for key,value in test_model.items(): if not value: models_params.pop(key)""" log_writer.write_any("model-settings", json.dumps(models_params[model]), 'w+', True) seed = 5
sys.path.append(file_dir) root = tk.Tk() root.withdraw() test_name = simpledialog.askstring(title="Test Name", prompt="Insert test name:", initialvalue='LDATests') #config = tf.compat.v1.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} ) #sess = tf.compat.v1.Session(config=config) #tf.keras.backend.set_session(sess) #results_saver = LogWriter(log_file_desc="Autoencoder") results = [] #mycolors = np.array([color for name, color in mcolors.XKCD_COLORS.items()]) num_of_words = 10000 dataset_helper = Dataset_Helper(True) dataset_helper.set_wanted_datasets([2]) dataset_helper.next_dataset() num_of_topics = dataset_helper.get_num_of_topics() documents = dataset_helper.get_texts_as_list() labels = dataset_helper.get_labels(dataset_helper.get_train_file_path()) tokenizer = Tokenizer(num_words=num_of_words) tokenizer.fit_on_texts(documents) #items= tokenizer.word_index reverse_word_map = dict(map(reversed, tokenizer.word_index.items())) matrix = tokenizer.texts_to_matrix(documents, mode='binary') #mydict = corpora.Dictionary([line.split() for line in documents],prune_at=num_of_words) #corpus = [mydict.doc2bow(line.split()) for line in documents] #tfidf = TfidfModel(corpus) #print(tfidf)