def generate_BBB_controller(domain_name, task_identity, window_size, number_demonstrations, epochs, number_mini_batches, activation_unit, learning_rate, hidden_units, number_samples_variance_reduction, precision_alpha, weights_prior_mean_1, weights_prior_mean_2, weights_prior_deviation_1, weights_prior_deviation_2, mixture_pie, rho_mean, extra_likelihood_emphasis): configuration_identity = LOGS_DIRECTORY + domain_name + '/' + str( number_demonstrations) + '/bbb_controller/window_size_' + str( window_size) + '/' + str(task_identity) + '/' training_logs_configuration_identity = configuration_identity + 'training/' if not os.path.exists(training_logs_configuration_identity): os.makedirs(training_logs_configuration_identity) directory_to_save_tensorboard_data = training_logs_configuration_identity + TENSORBOARD_DIRECTORY saved_models_during_iterations_bbb = training_logs_configuration_identity + SAVED_MODELS_DURING_ITERATIONS_DIRECTORY saved_final_model_bbb = training_logs_configuration_identity + SAVED_FINAL_MODEL_DIRECTORY #if not os.path.exists(INPUT_MANIPULATION_DIRECTORY): # os.makedirs(INPUT_MANIPULATION_DIRECTORY) if not os.path.exists(directory_to_save_tensorboard_data): os.makedirs(directory_to_save_tensorboard_data) if not os.path.exists(saved_models_during_iterations_bbb): os.makedirs(saved_models_during_iterations_bbb) if not os.path.exists(saved_final_model_bbb): os.makedirs(saved_final_model_bbb) start_time = datetime.now() moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask( domain_name=domain_name, task_identity=task_identity, window_size=window_size, number_demonstrations=number_demonstrations) # House-keeping to make data amenable for good training mean_x, deviation_x = get_mean_and_deviation(data=moving_windows_x) moving_windows_x = NORMALIZE(moving_windows_x, mean_x, deviation_x) mean_y, deviation_y = get_mean_and_deviation(data=moving_windows_y) moving_windows_y = NORMALIZE(moving_windows_y, mean_y, deviation_y) file_name_to_save_input_manipulation_data = training_logs_configuration_identity + 'training_meta_data.pkl' normalization_data_to_store = { MEAN_KEY_X: mean_x, DEVIATION_KEY_X: deviation_x, MEAN_KEY_Y: mean_y, DEVIATION_KEY_Y: deviation_y, DRIFT_PER_TIME_STEP_KEY: drift_per_time_step, MOVING_WINDOWS_X_SIZE_KEY: moving_windows_x_size, WINDOW_SIZE_KEY: window_size } with open(file_name_to_save_input_manipulation_data, 'wb') as f: pickle.dump(normalization_data_to_store, f) print(GREEN('Creating the BBB based Bayesian NN')) BBB_Regressor = BBBNNRegression( number_mini_batches=number_mini_batches, number_features=moving_windows_x.shape[1], number_output_units=moving_windows_y.shape[1], activation_unit=activation_unit, learning_rate=learning_rate, hidden_units=hidden_units, number_samples_variance_reduction=number_samples_variance_reduction, precision_alpha=precision_alpha, weights_prior_mean_1=weights_prior_mean_1, weights_prior_mean_2=weights_prior_mean_2, weights_prior_deviation_1=weights_prior_deviation_1, weights_prior_deviation_2=weights_prior_deviation_2, mixture_pie=mixture_pie, rho_mean=rho_mean, extra_likelihood_emphasis=extra_likelihood_emphasis) print(GREEN('BBB based Bayesian NN created successfully')) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(directory_to_save_tensorboard_data, sess.graph) saver = tf.train.Saver(max_to_keep=3, keep_checkpoint_every_n_hours=2) previous_minimum_loss = sys.float_info.max mini_batch_size = int(moving_windows_x.shape[0] / number_mini_batches) for epoch_iterator in tqdm(range(epochs)): moving_windows_x, moving_windows_y = randomize( moving_windows_x, moving_windows_y) ptr = 0 for mini_batch_iterator in range(number_mini_batches): x_batch = moving_windows_x[ptr:ptr + mini_batch_size, :] y_batch = moving_windows_y[ptr:ptr + mini_batch_size, :] _, loss, summary = sess.run([ BBB_Regressor.train(), BBB_Regressor.getMeanSquaredError(), BBB_Regressor.summarize() ], feed_dict={ BBB_Regressor.X_input: x_batch, BBB_Regressor.Y_input: y_batch }) sess.run(BBB_Regressor.update_mini_batch_index()) if loss < previous_minimum_loss: saver.save(sess, saved_models_during_iterations_bbb + 'iteration', global_step=epoch_iterator, write_meta_graph=False) previous_minimum_loss = loss ptr += mini_batch_size writer.add_summary(summary, global_step=tf.train.global_step( sess, BBB_Regressor.global_step)) #if epoch_iterator % 2500 == 0: # print(RED('Training progress: ' + str(epoch_iterator) + '/' + str(epochs))) writer.close() saver.save(sess, saved_final_model_bbb + 'final', write_state=False)
def data_efficient_imitation_across_multiple_tasks(domain_name, window_size, number_demonstrations, adapt_detector_threshold, start_monitoring_at_time_step, detector_c, detector_m, initial_detector_threshold, epochs, number_mini_batches, activation_unit, learning_rate, hidden_units, number_samples_variance_reduction, precision_alpha, weights_prior_mean_1, weights_prior_mean_2, weights_prior_deviation_1, weights_prior_deviation_2, mixture_pie, rho_mean, extra_likelihood_emphasis): COPY_OF_ALL_MUJOCO_TASK_IDENTITIES = copy.deepcopy(ALL_MUJOCO_TASK_IDENTITIES) for simulation_iterator in range(TOTAL_SIMULATION_ITERATIONS): random.seed(simulation_iterator) random.shuffle(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES) ###### Naive Controller ###### ''' all_gathered_x, all_gathered_y = None, None tasks_trained_on, tasks_encountered = [], [] print(GREEN('Starting runs for the naive controller')) for task_iterator, current_task_identity in enumerate(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES): print(RED('Simulation iteration is ' + str(simulation_iterator) + ' and task iterator is ' + str(task_iterator))) tasks_trained_on.append(current_task_identity) tasks_encountered.append(current_task_identity) moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask(domain_name=domain_name, task_identity=current_task_identity, window_size=window_size, number_demonstrations=number_demonstrations) if all_gathered_x is None: all_gathered_x, all_gathered_y = copy.deepcopy(moving_windows_x), copy.deepcopy(moving_windows_y) else: all_gathered_x, all_gathered_y = np.append(all_gathered_x, moving_windows_x, axis=0), np.append(all_gathered_y, moving_windows_y, axis=0) disposible_training_x, disposible_training_y = copy.deepcopy(all_gathered_x), copy.deepcopy(all_gathered_y) mean_x, deviation_x = get_mean_and_deviation(data = disposible_training_x) disposible_training_x = NORMALIZE(disposible_training_x, mean_x, deviation_x) mean_y, deviation_y = get_mean_and_deviation(data = disposible_training_y) disposible_training_y = NORMALIZE(disposible_training_y, mean_y, deviation_y) configuration_identity = 'logs/' + domain_name + '/naive_controller/' + str(simulation_iterator) + '/' + str(task_iterator) + '/' training_logs_directory = configuration_identity + 'training/' if not os.path.exists(training_logs_directory): os.makedirs(training_logs_directory) file_name_to_save_meta_data = training_logs_directory + 'training_meta_data.pkl' meta_data_to_store = {MEAN_KEY_X: mean_x, DEVIATION_KEY_X: deviation_x, MEAN_KEY_Y:mean_y, DEVIATION_KEY_Y:deviation_y, DRIFT_PER_TIME_STEP_KEY: drift_per_time_step, MOVING_WINDOWS_X_SIZE_KEY: moving_windows_x_size, TASKS_TRAINED_ON_KEY: tasks_trained_on, TASKS_ENCOUNTERED_KEY: tasks_encountered, WINDOW_SIZE_KEY: window_size} with open(file_name_to_save_meta_data, 'wb') as f: pickle.dump(meta_data_to_store, f) print(BLUE('Training phase')) train_BBB(data_x=copy.deepcopy(disposible_training_x), data_y=copy.deepcopy(disposible_training_y), configuration_identity=configuration_identity, epochs=epochs, number_mini_batches=number_mini_batches, activation_unit=activation_unit, learning_rate=learning_rate, hidden_units=hidden_units, number_samples_variance_reduction=number_samples_variance_reduction, precision_alpha=precision_alpha, weights_prior_mean_1=weights_prior_mean_1, weights_prior_mean_2=weights_prior_mean_2, weights_prior_deviation_1=weights_prior_deviation_1, weights_prior_deviation_2=weights_prior_deviation_2, mixture_pie=mixture_pie, rho_mean=rho_mean, extra_likelihood_emphasis=extra_likelihood_emphasis) print(BLUE('Validation phase')) validate_BBB(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity) ''' ###### BBB Controller ###### did_succeed = False all_gathered_x, all_gathered_y = None, None tasks_trained_on, tasks_encountered, task_iterator_trained_on = [], [], [] current_task_identity = COPY_OF_ALL_MUJOCO_TASK_IDENTITIES[0] detector = Detector(domain_name=domain_name, start_monitoring_at_time_step=start_monitoring_at_time_step, initial_threshold=initial_detector_threshold, detector_m=detector_m, detector_c=detector_c) print(GREEN('Starting runs for the BBB controller')) for task_iterator in range(len(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES)): print(RED('Simulation iteration is ' + str(simulation_iterator) + ', task iterator is ' + str(task_iterator) + ', and current task is ' + str(current_task_identity))) tasks_encountered.append(current_task_identity) detector.reset() configuration_identity = 'logs/' + domain_name + '/bbb_controller/detector_c_' + str(detector_c) + '_detector_m_' + str(detector_m) + '/' + str(simulation_iterator) + '/' + str(task_iterator) + '/' training_logs_directory = configuration_identity + 'training/' if not os.path.exists(training_logs_directory): os.makedirs(training_logs_directory) if not did_succeed: tasks_trained_on.append(current_task_identity) task_iterator_trained_on.append(task_iterator) moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask(domain_name=domain_name, task_identity=current_task_identity, window_size=window_size, number_demonstrations=number_demonstrations) if all_gathered_x is None: all_gathered_x, all_gathered_y = copy.deepcopy(moving_windows_x), copy.deepcopy(moving_windows_y) else: all_gathered_x, all_gathered_y = np.append(all_gathered_x, moving_windows_x, axis=0), np.append(all_gathered_y, moving_windows_y, axis=0) disposible_training_x, disposible_training_y = copy.deepcopy(all_gathered_x), copy.deepcopy(all_gathered_y) mean_x, deviation_x = get_mean_and_deviation(data = disposible_training_x) disposible_training_x = NORMALIZE(disposible_training_x, mean_x, deviation_x) mean_y, deviation_y = get_mean_and_deviation(data = disposible_training_y) disposible_training_y = NORMALIZE(disposible_training_y, mean_y, deviation_y) file_name_to_save_meta_data = training_logs_directory + 'training_meta_data.pkl' meta_data_to_store = {MEAN_KEY_X: mean_x, DEVIATION_KEY_X: deviation_x, MEAN_KEY_Y:mean_y, DEVIATION_KEY_Y:deviation_y, DRIFT_PER_TIME_STEP_KEY: drift_per_time_step, MOVING_WINDOWS_X_SIZE_KEY: moving_windows_x_size, TASKS_TRAINED_ON_KEY: tasks_trained_on, TASKS_ENCOUNTERED_KEY: tasks_encountered, WINDOW_SIZE_KEY: window_size} with open(file_name_to_save_meta_data, 'wb') as f: pickle.dump(meta_data_to_store, f) print(BLUE('Training phase')) train_BBB(data_x=copy.deepcopy(disposible_training_x), data_y=copy.deepcopy(disposible_training_y), configuration_identity=configuration_identity, epochs=epochs, number_mini_batches=number_mini_batches, activation_unit=activation_unit, learning_rate=learning_rate, hidden_units=hidden_units, number_samples_variance_reduction=number_samples_variance_reduction, precision_alpha=precision_alpha, weights_prior_mean_1=weights_prior_mean_1, weights_prior_mean_2=weights_prior_mean_2, weights_prior_deviation_1=weights_prior_deviation_1, weights_prior_deviation_2=weights_prior_deviation_2, mixture_pie=mixture_pie, rho_mean=rho_mean, extra_likelihood_emphasis=extra_likelihood_emphasis) _, average_uncertainty = run_on_itself(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity) #### Ground the threshold according to the quantitative value of uncertainty on the current task #### if adapt_detector_threshold: detector.threshold = average_uncertainty meta_data_file_for_this_run = 'logs/' + domain_name + '/bbb_controller/detector_c_' + str(detector_c) + '_detector_m_' + str(detector_m) + '/' + str(simulation_iterator) + '/meta_data.pkl' meta_data_for_this_run = {TRAINING_TASK_ITERATION_KEY: task_iterator_trained_on} with open(meta_data_file_for_this_run, 'wb') as f: pickle.dump(meta_data_for_this_run, f) #need_training = False print(BLUE('Validation phase')) validate_BBB(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity) if task_iterator == (len(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES) - 1): break current_task_identity = COPY_OF_ALL_MUJOCO_TASK_IDENTITIES[task_iterator + 1] tasks_encountered.append(current_task_identity) did_succeed, average_uncertainty = run_on_itself(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity, detector=detector) did_succeed = str_to_bool(did_succeed)
def generate_GP_controller(domain_name, task_identity, window_size, number_demonstrations): #if not os.path.exists(LOGS_DIRECTORY): # os.makedirs(LOGS_DIRECTORY) GP_LOGS_DIRECTORY = LOGS_DIRECTORY + domain_name + '/' + str( number_demonstrations) + '/GP_controller/window_size_' + str( window_size) + '/' if not os.path.exists(GP_LOGS_DIRECTORY): os.makedirs(GP_LOGS_DIRECTORY) file_to_save_gp_fit_logs = GP_LOGS_DIRECTORY + str( task_identity) + '_fit.pkl' gp_fit_logs = {} start_time = datetime.now() moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask( domain_name=domain_name, task_identity=task_identity, window_size=window_size, number_demonstrations=number_demonstrations) print( RED('Time taken to generate dataset is ' + str(datetime.now() - start_time))) ''' print(GREEN('Heuristic values of the parameters')) kernel_variance = np.var(moving_windows_y) kernel_lengthscales = np.median(cdist(moving_windows_x, moving_windows_x, 'sqeuclidean').flatten()) print(BLUE('Kernel Variance is ' + str(kernel_variance))) print(BLUE('Kernel lengthscales is ' + str(kernel_lengthscales))) print(BLUE('Likelihood variance is 1/%-10/% /of ' + str(kernel_variance))) ''' mean_x, deviation_x = get_mean_and_deviation(data=moving_windows_x) moving_windows_x = NORMALIZE(moving_windows_x, mean_x, deviation_x) mean_y, deviation_y = get_mean_and_deviation(data=moving_windows_y) moving_windows_y = NORMALIZE(moving_windows_y, mean_y, deviation_y) k = gpflow.kernels.RBF(moving_windows_x.shape[1], lengthscales=0.01 * np.std(moving_windows_x, axis=0)) moving_windows_x = np.float64(moving_windows_x) moving_windows_y = np.float64(moving_windows_y) m = gpflow.models.GPR(moving_windows_x, moving_windows_y, k) m.likelihood.variance = 1e-4 mean_control, var_control = m.predict_y(moving_windows_x) mean_squared_predictive_error = np.mean( np.square(mean_control - moving_windows_y)) average_dev_control = np.mean(np.sqrt(var_control)) gp_fit_logs[UNOPTIMIZED_GP_FIT_KEY] = { MEAN_GP_FIT_PREDICTIVE_ERROR_KEY: mean_squared_predictive_error, MEAN_GP_FIT_DEVIATION_KEY: average_dev_control } gp_fit_logs[UNOPTIMIZED_GP_TRAINABLES_KEY] = m.as_pandas_table() start_time = datetime.now() gpflow.train.ScipyOptimizer().minimize(m) print( RED('Time taken to optimize the parameters is ' + str(datetime.now() - start_time))) mean_control, var_control = m.predict_y(moving_windows_x) mean_squared_predictive_error = np.mean( np.square(mean_control - moving_windows_y)) average_dev_control = np.mean(np.sqrt(var_control)) gp_fit_logs[OPTIMIZED_GP_FIT_KEY] = { MEAN_GP_FIT_PREDICTIVE_ERROR_KEY: mean_squared_predictive_error, MEAN_GP_FIT_DEVIATION_KEY: average_dev_control } gp_fit_logs[OPTIMIZED_GP_TRAINABLES_KEY] = m.as_pandas_table() with open(file_to_save_gp_fit_logs, 'wb') as f: pickle.dump(gp_fit_logs, f, protocol=-1) #plot(m) #print(m.read_trainables()) #print(m.as_pandas_table()) #print(m.kern.lengthscales.read_value()) start_time = datetime.now() print(GREEN('Started Validation')) logs_for_all_tasks = validate_GP_controller( domain_name=domain_name, task_identity=task_identity, window_size=window_size, drift_per_time_step=drift_per_time_step, moving_windows_x_size=moving_windows_x_size, behavior_controller=m, mean_x=mean_x, deviation_x=deviation_x, mean_y=mean_y, deviation_y=deviation_y) print( RED('Time taken for the validation step is ' + str(datetime.now() - start_time))) file_to_save_logs = GP_LOGS_DIRECTORY + str( task_identity) + '_validation.pkl' with open(file_to_save_logs, 'wb') as f: pickle.dump(logs_for_all_tasks, f, protocol=-1)