# PREFIX_INDEX_FOLDER + "hand_picked_Spotify_test.txt", # PREFIX_INDEX_FOLDER + "liszt_classical_archives_test.txt" ] build_data(index_files_dict=index_files_dict, meta_info_path=data_folder + '/temp.p', quantization=script_param['quantization'], temporal_granularity=script_param['temporal_granularity'], store_folder=data_folder, logging=logging) ############################################################ # Hyper parameter space ############################################################ model_space = Model_class.get_hp_space() optim_space = Optimization_method.get_hp_space() space = {'model': model_space, 'optim': optim_space, 'train': train_param, 'script': script_param} ############################################################ # MongoDB ############################################################ host = "localhost" port = 27017 db_name =\ unit_type[0] + '-' +\ script_param['temporal_granularity'][0] + '-' +\ str(script_param['quantization']) + '-' +\ Optimization_method.name() + '-' +\ Model_class.name() mongo_adress = 'mongo://' + host + ':' + str(port) + '/' + db_name
def train_hopt(max_evals, csv_file_path): # Create/reinit csv file open(csv_file_path, 'w').close() logger_hopt.info((u'WITH HYPERPARAMETER OPTIMIZATION').encode('utf8')) logger_hopt.info((u'**** Model : ' + Model_class.name()).encode('utf8')) logger_hopt.info((u'**** Optimization technic : ' + Optimization_method.name()).encode('utf8')) logger_hopt.info((u'**** Temporal granularity : ' + temporal_granularity).encode('utf8')) if binary_unit: logger_hopt.info((u'**** Binary unit (intensity discarded)').encode('utf8')) else: logger_hopt.info((u'**** Real valued unit (intensity taken into consideration)').encode('utf8')) logger_hopt.info((u'**** Quantization : ' + str(quantization)).encode('utf8')) # Define hyper-parameter search space for the model # Those are given by the static methods get_param_dico and get_hp_space model_space = Model_class.get_hp_space() optim_space = Optimization_method.get_hp_space() space = {'model': model_space, 'optim': optim_space} # Get the headers (i.e. list of hyperparameters tuned for printing and # save purposes) header = model_space.keys() + optim_space.keys() + ['accuracy'] global run_counter run_counter = 0 def run_wrapper(params): global run_counter run_counter += 1 logger_hopt.info(('\n').encode('utf8')) logger_hopt.info((u'#'*40).encode('utf8')) logger_hopt.info((u'# Config : {}'.format(run_counter)).encode('utf8')) # Build model and optim dico ################ model_param = params['model'] optim_param = params['optim'] ############################################# # Weights plotted and stored in a folder #### # Same for generated midi sequences ######### weights_folder = result_folder + '/' + str(run_counter) + '/' + 'weights' if not os.path.isdir(weights_folder): os.makedirs(weights_folder) generated_folder = result_folder + '/' + str(run_counter) + '/generated_sequences' if not os.path.isdir(generated_folder): os.makedirs(generated_folder) model_folder = result_folder + '/' + str(run_counter) + '/model' if not os.path.isdir(model_folder): os.makedirs(model_folder) ############################################# # Load data ################################# time_load_0 = time.time() piano_train, orchestra_train, train_index, \ piano_valid, orchestra_valid, valid_index, \ piano_test, orchestra_test, test_index, generation_index \ = load_data(LOCAL_SCRATCH + '/Data', model_param['temporal_order'], model_param['batch_size'], binary_unit=binary_unit, skip_sample=1, logger_load=logger_load) time_load_1 = time.time() logger_load.info('TTT : Loading data took {} seconds'.format(time_load_1-time_load_0)) ############## # visualize_mat(piano_train.get_value(), 'DEBUG', 'piano_train') # visualize_mat(orchestra_train.get_value(), 'DEBUG', 'orchestra_train') # visualize_mat(piano_test.get_value(), 'DEBUG', 'piano_test') # visualize_mat(orchestra_test.get_value(), 'DEBUG', 'orchestra_test') # visualize_mat(piano_valid.get_value(), 'DEBUG', 'piano_valid') # visualize_mat(orchestra_valid.get_value(), 'DEBUG', 'orchestra_valid') ############## # For large datasets # http://deeplearning.net/software/theano/tutorial/aliasing.html # use borrow=True (avoid copying the whole matrix) ? # Load as much as the GPU can handle, train then load other # part of the dataset using shared_variable.set_value(new_value) ############################################# # Train ##################################### time_train_0 = time.time() model, dico_res = train(piano_train, orchestra_train, train_index, piano_valid, orchestra_valid, valid_index, model_param, optim_param, max_iter, weights_folder) time_train_1 = time.time() logger_train.info('TTT : Training data took {} seconds'.format(time_train_1-time_train_0)) error = -dico_res['accuracy'] # Search for a min ############################################# # Generate ################################## time_generate_0 = time.time() generate(model, piano_test, orchestra_test, generation_index, generation_length, seed_size, quantization_write, generated_folder, logger_generate) time_generate_1 = time.time() logger_generate.info('TTT : Generating data took {} seconds'.format(time_generate_1-time_generate_0)) ############################################# # Save ###################################### save_model_file = open(model_folder + '/model.pkl', 'wb') pickle.dump(model, save_model_file, protocol=pickle.HIGHEST_PROTOCOL) ############################################# # log logger_hopt.info((u'# Accuracy : {}'.format(dico_res['accuracy'])).encode('utf8')) logger_hopt.info((u'###################\n').encode('utf8')) # Write the result in result.csv with open(csv_file_path, 'ab') as csvfile: writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=header) writer.writerow(dico_res) return error # Calling get_param_dico with None return an empty dictionary, # Useful to get the header of hparam with open(csv_file_path, 'ab') as csvfile: # Write headers if they don't already exist writerHead = csv.writer(csvfile, delimiter=',') writerHead.writerow(header) best = fmin(run_wrapper, space, algo=tpe.suggest, max_evals=max_evals) return best