elif sys.argv[2] == 'adam_L2': from acidano.utils.optim import adam_L2 as Optimization_method elif sys.argv[2] == 'rmsprop': from acidano.utils.optim import rmsprop as Optimization_method elif sys.argv[2] == 'sgd_nesterov': from acidano.utils.optim import sgd_nesterov as Optimization_method else: raise ValueError(sys.argv[2] + " is not an optimization method") ############################################################ # System paths ############################################################ logging.info('System paths') SOURCE_DIR = os.getcwd() result_folder = SOURCE_DIR + u'/../Results/' + unit_type + '/' + script_param['temporal_granularity'] + '/' +\ 'quantization_' + str(script_param['quantization']) + '/' + Optimization_method.name() + '/' + Model_class.name() # Check if the result folder exists if not os.path.isdir(result_folder): os.makedirs(result_folder) # Count number of files in the folder and name this hparam run by the number hparam_run_counter = 0 while(os.path.isdir(result_folder + '/hrun_' + str(hparam_run_counter))): hparam_run_counter += 1 result_folder = result_folder + '/hrun_' + str(hparam_run_counter) os.mkdir(result_folder) script_param['result_folder'] = result_folder # Data : .pkl files data_folder = SOURCE_DIR + '/../Data'
def train_hopt(max_evals, csv_file_path): # Create/reinit csv file open(csv_file_path, 'w').close() logger_hopt.info((u'WITH HYPERPARAMETER OPTIMIZATION').encode('utf8')) logger_hopt.info((u'**** Model : ' + Model_class.name()).encode('utf8')) logger_hopt.info((u'**** Optimization technic : ' + Optimization_method.name()).encode('utf8')) logger_hopt.info((u'**** Temporal granularity : ' + temporal_granularity).encode('utf8')) if binary_unit: logger_hopt.info((u'**** Binary unit (intensity discarded)').encode('utf8')) else: logger_hopt.info((u'**** Real valued unit (intensity taken into consideration)').encode('utf8')) logger_hopt.info((u'**** Quantization : ' + str(quantization)).encode('utf8')) # Define hyper-parameter search space for the model # Those are given by the static methods get_param_dico and get_hp_space model_space = Model_class.get_hp_space() optim_space = Optimization_method.get_hp_space() space = {'model': model_space, 'optim': optim_space} # Get the headers (i.e. list of hyperparameters tuned for printing and # save purposes) header = model_space.keys() + optim_space.keys() + ['accuracy'] global run_counter run_counter = 0 def run_wrapper(params): global run_counter run_counter += 1 logger_hopt.info(('\n').encode('utf8')) logger_hopt.info((u'#'*40).encode('utf8')) logger_hopt.info((u'# Config : {}'.format(run_counter)).encode('utf8')) # Build model and optim dico ################ model_param = params['model'] optim_param = params['optim'] ############################################# # Weights plotted and stored in a folder #### # Same for generated midi sequences ######### weights_folder = result_folder + '/' + str(run_counter) + '/' + 'weights' if not os.path.isdir(weights_folder): os.makedirs(weights_folder) generated_folder = result_folder + '/' + str(run_counter) + '/generated_sequences' if not os.path.isdir(generated_folder): os.makedirs(generated_folder) model_folder = result_folder + '/' + str(run_counter) + '/model' if not os.path.isdir(model_folder): os.makedirs(model_folder) ############################################# # Load data ################################# time_load_0 = time.time() piano_train, orchestra_train, train_index, \ piano_valid, orchestra_valid, valid_index, \ piano_test, orchestra_test, test_index, generation_index \ = load_data(LOCAL_SCRATCH + '/Data', model_param['temporal_order'], model_param['batch_size'], binary_unit=binary_unit, skip_sample=1, logger_load=logger_load) time_load_1 = time.time() logger_load.info('TTT : Loading data took {} seconds'.format(time_load_1-time_load_0)) ############## # visualize_mat(piano_train.get_value(), 'DEBUG', 'piano_train') # visualize_mat(orchestra_train.get_value(), 'DEBUG', 'orchestra_train') # visualize_mat(piano_test.get_value(), 'DEBUG', 'piano_test') # visualize_mat(orchestra_test.get_value(), 'DEBUG', 'orchestra_test') # visualize_mat(piano_valid.get_value(), 'DEBUG', 'piano_valid') # visualize_mat(orchestra_valid.get_value(), 'DEBUG', 'orchestra_valid') ############## # For large datasets # http://deeplearning.net/software/theano/tutorial/aliasing.html # use borrow=True (avoid copying the whole matrix) ? # Load as much as the GPU can handle, train then load other # part of the dataset using shared_variable.set_value(new_value) ############################################# # Train ##################################### time_train_0 = time.time() model, dico_res = train(piano_train, orchestra_train, train_index, piano_valid, orchestra_valid, valid_index, model_param, optim_param, max_iter, weights_folder) time_train_1 = time.time() logger_train.info('TTT : Training data took {} seconds'.format(time_train_1-time_train_0)) error = -dico_res['accuracy'] # Search for a min ############################################# # Generate ################################## time_generate_0 = time.time() generate(model, piano_test, orchestra_test, generation_index, generation_length, seed_size, quantization_write, generated_folder, logger_generate) time_generate_1 = time.time() logger_generate.info('TTT : Generating data took {} seconds'.format(time_generate_1-time_generate_0)) ############################################# # Save ###################################### save_model_file = open(model_folder + '/model.pkl', 'wb') pickle.dump(model, save_model_file, protocol=pickle.HIGHEST_PROTOCOL) ############################################# # log logger_hopt.info((u'# Accuracy : {}'.format(dico_res['accuracy'])).encode('utf8')) logger_hopt.info((u'###################\n').encode('utf8')) # Write the result in result.csv with open(csv_file_path, 'ab') as csvfile: writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=header) writer.writerow(dico_res) return error # Calling get_param_dico with None return an empty dictionary, # Useful to get the header of hparam with open(csv_file_path, 'ab') as csvfile: # Write headers if they don't already exist writerHead = csv.writer(csvfile, delimiter=',') writerHead.writerow(header) best = fmin(run_wrapper, space, algo=tpe.suggest, max_evals=max_evals) return best
def train(piano_train, orchestra_train, train_index, piano_valid, orchestra_valid, valid_index, model_param, optim_param, max_iter, weights_folder): ############################################################ ############################################################ ############################################################ # model_param and optim_param are dictionaries # If you use train directly, bypassing the hparameter loop, # be careful that the keys match the constructor arguments of both model and optimizer # Log them logger_train.info((u'##### Model parameters').encode('utf8')) for k, v in model_param.iteritems(): logger_train.info((u'# ' + k + ' : {}'.format(v)).encode('utf8')) logger_train.info((u'##### Optimization parameters').encode('utf8')) for k, v in optim_param.iteritems(): logger_train.info((u'# ' + k + ' : {}'.format(v)).encode('utf8')) logger_generate.info((u'##### Generation parameters').encode('utf8')) logger_generate.info((u'# generation_length : ' + str(generation_length)).encode('utf8')) logger_generate.info((u'# seed_size : ' + str(seed_size)).encode('utf8')) logger_generate.info((u'# quantization_write : ' + str(quantization_write)).encode('utf8')) ################################################################ ################################################################ ################################################################ # DATA piano_dim = piano_train.get_value().shape[1] orchestra_dim = orchestra_train.get_value().shape[1] n_train_batches = len(train_index) n_val_batches = len(valid_index) logger_load.info((u'##### Data').encode('utf8')) logger_load.info((u'# n_train_batch : {}'.format(n_train_batches)).encode('utf8')) logger_load.info((u'# n_val_batch : {}'.format(n_val_batches)).encode('utf8')) ################################################################ ################################################################ ################################################################ # MODEL # dimensions dictionary dimensions = {'batch_size': model_param['batch_size'], 'temporal_order': model_param['temporal_order'], 'piano_dim': piano_dim, 'orchestra_dim': orchestra_dim} model = Model_class(model_param, dimensions) # Define an optimizer optimizer = Optimization_method(optim_param) ############################################################ ############################################################ ############################################################ # COMPILE FUNCTIONS # Compilation of the training function is encapsulated in the class since the 'givens' # can vary with the model train_iteration = model.get_train_function(piano_train, orchestra_train, optimizer, name='train_iteration') # Same for the validation validation_error = model.get_validation_error(piano_valid, orchestra_valid, name='validation_error') ############################################################ ############################################################ ############################################################ # TRAINING logger_train.info("#") logger_train.info("# Training") epoch = 0 OVERFITTING = False DIVERGING = False val_tab = np.zeros(max(1,max_iter)) while (not OVERFITTING and not DIVERGING and epoch!=max_iter): # go through the training set train_cost_epoch = [] train_monitor_epoch = [] for batch_index in xrange(n_train_batches): this_cost, this_monitor = train_iteration(train_index[batch_index]) # Keep track of cost train_cost_epoch.append(this_cost) train_monitor_epoch.append(this_monitor) # Validation # For binary unit, it's an accuracy measure. # For real valued units its a gaussian centered value with variance 1 accuracy = [] for batch_index in xrange(n_val_batches): _, _, accuracy_batch = validation_error(valid_index[batch_index]) accuracy += [accuracy_batch] # Early stopping criterion # Note that sum_{i=0}^{n} der = der(n) - der(0) # So mean over successive derivatives makes no sense # 1/ Get the mean derivative between 5 and 10 = # \sum_{i=validation_order}^{validation_order+initial_derivative_length} E(i) - E(i-validation_order) / validation_order # # 2/ At each iteration, compare the mean derivative over the last five epochs : # \sum_{i=0}^{validation_order} E(t) mean_accuracy = 100 * np.mean(accuracy) val_tab[epoch] = mean_accuracy if epoch == initial_derivative_length-1: ind = np.arange(validation_order-1, initial_derivative_length) increase_reference = (val_tab[ind] - val_tab[ind-validation_order+1]).sum() / (validation_order * len(ind)) if increase_reference <= 0: # Early stop if the model didn't really improved over the first iteration DIVERGING = True elif epoch >= initial_derivative_length: ind = np.arange(epoch - check_derivative_length + 1, epoch+1) derivative_mean = (val_tab[ind] - val_tab[ind-validation_order+1]).sum() / (validation_order * len(ind)) # Mean derivative is less than 10% of increase reference if derivative_mean < 0.1 * increase_reference: OVERFITTING = True # Monitor learning logger_train.info(('Epoch : {} , Monitor : {} , Cost : {} , Valid acc : {}' .format(epoch, np.mean(train_monitor_epoch), np.mean(train_cost_epoch), mean_accuracy)) .encode('utf8')) if DIVERGING: logger_train.info('DIVERGING !!') elif OVERFITTING: logger_train.info('OVERFITTING !!') # Plot weights every ?? epoch if((epoch%20==0) or (epoch<5) or OVERFITTING or DIVERGING): weights_folder_epoch = weights_folder + '/' + str(epoch) if not os.path.isdir(weights_folder_epoch): os.makedirs(weights_folder_epoch) model.save_weights(weights_folder_epoch) epoch += 1 # Return results best_accuracy = np.amax(val_tab) dico_res = model_param dico_res.update(optim_param) dico_res['accuracy'] = best_accuracy return model, dico_res
else: raise ValueError(sys.argv[2] + " is not an optimization method") # System paths MAIN_DIR = os.getcwd().decode('utf8') + u'/' if 'LSCRATCH' in os.environ.keys(): # We are on Guillimin LOCAL_SCRATCH = os.environ['LSCRATCH'] else: LOCAL_SCRATCH = '..' data_folder = LOCAL_SCRATCH + u'/Data' if ONLY_BUILD_DB: result_folder = MAIN_DIR + u'../Results/' + 'Only_build_db' else: result_folder = MAIN_DIR + u'../Results/' + temporal_granularity + '/' + Optimization_method.name() + '/' + Model_class.name() result_file = result_folder + u'/hopt_results.csv' if ONLY_BUILD_DB: log_file_path = result_folder + '/build_db.log' else: log_file_path = result_folder + '/' + Model_class.name() + u'.log' # Fixed hyper parameter max_evals = 50 # number of hyper-parameter configurations evaluated max_iter = 100 # nb max of iterations when training 1 configuration of hparams # Config is set now, no need to modify source below for standard use # Validation validation_order = 5 initial_derivative_length = 20 check_derivative_length = 5