def convert_file_names_to_file_paths(target_default_directory, target_file_name_s): """ given file names and a directory, generates the full paths to that file :param target_default_directory: a string :param file_name_s: either a string or list of strings :return: list of strings """ if isinstance(target_file_name_s, list): file_path_s = [ os.path.join(get_default_data_directory(target_default_directory), file_name) for file_name in target_file_name_s ] else: file_path_s = [ os.path.join(get_default_data_directory(target_default_directory), target_file_name_s[0]) ] return file_path_s
tau=0.001, layer_norm=False, normalize_observations=False, normalize_returns=False, critic_l2_reg=0, enable_popart=False, clip_norm=None, reward_scale=1., lastLayerTanh=lastLayerTanh) # Train the agent, summary contains training data summary = rlTrain(agent, env, render=args.render, render_episode=False, print_results=True, num_episodes=episodes) # type: Summary noGpu_str = "-NoGPU" if noGpu else "" llTanh_str = "-LLTanh" if lastLayerTanh else "" summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED, zz_episodes=episodes, noGpu=noGpu) fp = summary.save( get_default_data_directory("ddpg_baselines_summaries/1"), extra_name_append="-" + str(episodes) + "ep" + noGpu_str + "-noNorm" + llTanh_str + "-decayingNoise") train_writer = tf.summary.FileWriter(fp[:-5]) train_writer.add_graph(sess.graph)
def task_run_ss_ddpg_baselines_mc(params): import tensorflow as tf print("\n\nprocess " + str(params['id']) + " has started" + "-" * 200 + "\n") noGpu = params['noGpu'] render = False replay_buffer = None # random seed each time random.seed() RANDOM_SEED = random.randint(0, 2**32 - 1) # Overall Options episodes = params['episodes'] dir_name = params['dir_name'] #naming function get_extra_name = params['get_extra_name'] # configuring environment ENV_NAME = 'MountainCarContinuous-v0' env = gym.make(ENV_NAME) if noGpu: tfConfig = tf.ConfigProto(device_count={'GPU': 0}) else: tfConfig = None with tf.Graph().as_default() as graph: with tf.Session(config=tfConfig, graph=graph) as sess: # with tf.Session() as sess: # Reset the seed for random number generation set_global_seeds(RANDOM_SEED) env.seed(RANDOM_SEED) # Initialize agent, see class for available parameters base_agent = DDPG_Baselines_agent( env, sess, replay_buffer=replay_buffer, buffer_size=params['buffer_size'], batch_size=params['batch_size'], num_train_iterations=params['num_train_iterations'], num_steps_before_train=params['num_steps_before_train'], ou_epsilon=params['ou_epsilon'], ou_min_epsilon=params['ou_min_epsilon'], ou_epsilon_decay_factor=params['ou_epsilon_decay_factor'], ou_mu=params['ou_mu'], ou_sigma=params['ou_sigma'], ou_theta=params['ou_theta'], # actor_lr = params['actor_lr'], actor_lr=params['lr'], actor_h1=params['actor_h1'], actor_h2=params['actor_h2'], # critic_lr = params['critic_lr'], critic_lr=params['lr'], critic_h1=params['critic_h1'], critic_h2=params['critic_h2'], gamma=params['gamma'], tau=params['tau'], layer_norm=params['layer_norm'], normalize_observations=params['normalize_observations'], normalize_returns=params['normalize_returns'], critic_l2_reg=params['critic_l2_reg'], enable_popart=params['enable_popart'], clip_norm=params['clip_norm'], reward_scale=params['reward_scale'], lastLayerTanh=params['lastLayerTanh'], finalizeGraph=False) smart_start_agent = SmartStartContinuous( base_agent, env, sess, buffer_size=params['buffer_size'], exploitation_param=params['exploitation_param'], exploration_param=params['exploration_param'], eta=params['eta'], eta_decay_factor=params['eta_decay_factor'], n_ss=params['n_ss'], print_ss_stuff=True, # sigma=params['sigma'], # smart_start_selection_modified_distance_function=params['smart_start_selection_modified_distance_function'], nnd_mb_final_steps=params['nnd_mb_final_steps'], nnd_mb_steps_per_waypoint=params['nnd_mb_steps_per_waypoint'], nnd_mb_mean_per_stepsize=params['nnd_mb_mean_per_stepsize'], nnd_mb_std_per_stepsize=params['nnd_mb_std_per_stepsize'], nnd_mb_stepsizes_in_waypoint_radii=params[ 'nnd_mb_stepsizes_in_waypoint_radii'], nnd_mb_gamma=params['nnd_mb_gamma'], nnd_mb_horizontal_penalty_factor=params[ 'nnd_mb_horizontal_penalty_factor'], nnd_mb_horizon=params['nnd_mb_horizon'], nnd_mb_num_control_samples=params[ 'nnd_mb_num_control_samples'], nnd_mb_path_shortcutting=params['nnd_mb_path_shortcutting'], nnd_mb_steps_before_giving_up_on_waypoint=params[ 'nnd_mb_steps_before_giving_up_on_waypoint'], nnd_mb_load_dir_name=params['nnd_mb_load_dir_name'], nnd_mb_load_existing_training_data=params[ 'nnd_mb_load_existing_training_data'], nnd_mb_num_fc_layers=params['nnd_mb_num_fc_layers'], nnd_mb_depth_fc_layers=params['nnd_mb_depth_fc_layers'], nnd_mb_batchsize=params['nnd_mb_batchsize'], nnd_mb_lr=params['nnd_mb_lr'], nnd_mb_nEpoch=params['nnd_mb_nEpoch'], nnd_mb_fraction_use_new=params['nnd_mb_fraction_use_new'], nnd_mb_num_episodes_for_aggregation=params[ 'nnd_mb_num_episodes_for_aggregation'], nnd_mb_make_aggregated_dataset_noisy=params[ 'nnd_mb_make_aggregated_dataset_noisy'], nnd_mb_make_training_dataset_noisy=params[ 'nnd_mb_make_training_dataset_noisy'], nnd_mb_noise_actions_during_MPC_rollouts=params[ 'nnd_mb_noise_actions_during_MPC_rollouts'], nnd_mb_verbose=params['nnd_mb_verbose']) sess.graph.finalize() # Train the agent, summary contains training data summary = rlTrain(smart_start_agent, env, render=render, render_episode=False, print_steps=False, print_results=False, num_episodes=episodes, print_time=False, progress_bar=True, id=params['id'], num_ticks=params['num_ticks']) # type: Summary summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED, zz_episodes=episodes, noGpu=noGpu) fp = summary.save(get_default_data_directory(dir_name), last_name_section=True, extra_name_append=get_extra_name(params)) print("\n\nprocess " + str(params['id']) + " has finished" + "!" * 200 + "\n")
'nnd_mb_num_fc_layers': [1], 'nnd_mb_depth_fc_layers': [32], 'nnd_mb_batchsize': [512], 'nnd_mb_lr': [0.001], 'nnd_mb_nEpoch': [30], 'nnd_mb_fraction_use_new': [0.9], 'nnd_mb_num_episodes_for_aggregation': [4], 'nnd_mb_make_aggregated_dataset_noisy': [True], 'nnd_mb_make_training_dataset_noisy': [True], 'nnd_mb_noise_actions_during_MPC_rollouts': [True], 'nnd_mb_verbose': [False], 'get_extra_name': [get_extra_name] } noGpu_str = "_NoGPU" if noGpu else "" llTanh_str = "_LLTanh" if lastLayerTanh else "" decayingNoise_str = "_decayingNoise" if decaying_noise else "" if len(paramsGrid['nnd_mb_num_fc_layers']) == 1 and len( paramsGrid['nnd_mb_depth_fc_layers']) == 1: fc_layer_str = "_fcLayer-" + str( paramsGrid['nnd_mb_num_fc_layers'][0]) + "lyrs-" + str( paramsGrid['nnd_mb_depth_fc_layers'][0]) + "dpth" else: fc_layer_str = "" create_experimeter_info_txt(paramsGrid, get_default_data_directory(dir_name), name_append="_" + str(episodes) + "ep" + noGpu_str + llTanh_str + decayingNoise_str + fc_layer_str) run_experiment(paramsGrid, n_processes=-1)
import random import numpy as np from smartstart.RLDiscreteAlgorithms.qlearning import QLearning # from smartstart.smartexploration.smartexplorationdiscrete import generate_smartstart_object from smartstart.environments.gridworld import GridWorld from smartstart.utilities.experimenter import run_experiment from smartstart.utilities.utilities import get_default_data_directory # Get the path to the data folder in the same directory as this file. # If the folder does not exists it will be created summary_dir = get_default_data_directory("") # Define the task function for the experiment def task(params): print(params) # Define a parameter grid that can be supplied to the run_experiment method param_grid = { 'task': task, 'num_exp': 5, 'use_smart_start': [True, False], 'asdf': [0, 1] } if __name__ == '__main__': run_experiment(param_grid, n_processes=-1)
num_episodes_for_aggregation=num_episodes_for_aggregation, save_dir_name=save_dir, load_dir_name=load_dir, save_resulting_dynamics_model=save_model, load_existing_training_data=load_training, depth_fc_layers=depth_fc_layers, num_fc_layers=num_fc_layers, lr=lr, nEpoch=nEpochs) # type: NND_MB_agent # intializing path target_default_directory = "0_ddpg_summaries_DEPRACATED" # target_file_name = "DDPG_agent_MountainCarContinuous-v0-1000ep.json" target_file_name = "DDPG_agent_MountainCarContinuous-v0_test-2ep.json" target_file_pathname = os.path.join( get_default_data_directory(target_default_directory), target_file_name) target_summary = Summary.load(target_file_pathname) # type:Summary target_path = target_summary.get_last_path(0) target_reward = target_summary.get_last_reward(0) # target_path = target_summary.best_path # target_reward = target_summary.best_reward # summary object summary = Summary(agent.__class__.__name__ + "_" + env.spec.id) np.set_printoptions( formatter={'float': lambda x: "{0:0.4f}".format(x)}) # for printing # begin training episodes(1)
def task_run_ddpg_baselines_mc(params): import tensorflow as tf print("\n\nprocess " + str(params['id']) + " has started" + "-" * 200 + "\n") noGpu = params['noGpu'] render = False replay_buffer = None # random seed each time random.seed() RANDOM_SEED = random.randint(0, 2**32 - 1) # Overall Options episodes = params['episodes'] dir_name = params['dir_name'] # naming function get_extra_name = params['get_extra_name'] # configuring environment env = Continuous_MountainCarEnv_Editted.make_timed_env( params['power_scalar'], max_episode_steps=params['max_episode_steps'], max_episode_seconds=params['max_episode_seconds']) buffer_size = params['buffer_size'] batch_size = params['batch_size'] num_train_iterations = params['num_train_iterations'] num_steps_before_train = params['num_steps_before_train'] ou_epsilon = params['ou_epsilon'] ou_min_epsilon = params['ou_min_epsilon'] ou_epsilon_decay_factor = params['ou_epsilon_decay_factor'] ou_mu = params['ou_mu'] ou_sigma = params['ou_sigma'] ou_theta = params['ou_theta'] actor_lr = params['actor_lr'] actor_h1 = params['actor_h1'] actor_h2 = params['actor_h1'] // 2 critic_lr = params['critic_lr'] critic_h1 = params['critic_h1'] critic_h2 = params['critic_h1'] // 2 gamma = params['gamma'] tau = params['tau'] layer_norm = params['layer_norm'] normalize_observations = params['normalize_observations'] normalize_returns = params['normalize_returns'] critic_l2_reg = params['critic_l2_reg'] enable_popart = params['enable_popart'] clip_norm = params['clip_norm'] reward_scale = params['reward_scale'] lastLayerTanh = params['lastLayerTanh'] if noGpu: tfConfig = tf.ConfigProto(device_count={'GPU': 0}) else: tfConfig = None with tf.Graph().as_default() as graph: with tf.Session(config=tfConfig, graph=graph) as sess: # with tf.Session() as sess: # Reset the seed for random number generation set_global_seeds(RANDOM_SEED) env.seed(RANDOM_SEED) # Initialize agent, see class for available parameters agent = DDPG_Baselines_agent( env, sess, replay_buffer=replay_buffer, buffer_size=buffer_size, batch_size=batch_size, num_train_iterations=num_train_iterations, num_steps_before_train=num_steps_before_train, ou_epsilon=ou_epsilon, ou_min_epsilon=ou_min_epsilon, ou_epsilon_decay_factor=ou_epsilon_decay_factor, ou_mu=ou_mu, ou_sigma=ou_sigma, ou_theta=ou_theta, actor_lr=actor_lr, actor_h1=actor_h1, actor_h2=actor_h2, critic_lr=critic_lr, critic_h1=critic_h1, critic_h2=critic_h2, gamma=gamma, tau=tau, layer_norm=layer_norm, normalize_observations=normalize_observations, normalize_returns=normalize_returns, critic_l2_reg=critic_l2_reg, enable_popart=enable_popart, clip_norm=clip_norm, reward_scale=reward_scale, lastLayerTanh=lastLayerTanh) # Train the agent, summary contains training data summary = rlTrain(agent, env, render=render, render_episode=False, print_steps=False, print_results=False, num_episodes=episodes, progress_bar=True, id=params['id'], num_ticks=params['num_ticks']) # type: Summary summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED, zz_episodes=episodes, noGpu=noGpu) fp = summary.save(get_default_data_directory(dir_name), last_name_section=True, extra_name_append=get_extra_name(params)) print("\n\nprocess " + str(params['id']) + " has finished" + "!" * 200 + "\n")
# Train the agent, summary contains training data # summary = rlTrain(smart_start_agent, env, render=args.render, # render_episode=False, # print_steps=False, # print_results=False, # num_episodes=episodes, # print_time=True) # type: Summary summary = rlTrainGraphSS(smart_start_agent, env, render=args.render, render_episode=False, print_steps=False, print_results=False, num_episodes=episodes, plot_ss_stuff=False, print_time=True) noGpu_str = "-NoGPU" if noGpu else "" llTanh_str = "-LLTanh" if lastLayerTanh else "" summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED, zz_episodes=episodes, noGpu=noGpu) fp = summary.save(get_default_data_directory( "smart_start_continuous_summaries/0/"), extra_name_append="-" + str(episodes) + "ep" + noGpu_str + "-noNorm" + llTanh_str + "-decayingNoise" + "-2000n_ss") train_writer = tf.summary.FileWriter(fp[:-5]) train_writer.add_graph(sess.graph)
parser.add_argument('--actor-lr', type=float, default=0.01) parser.add_argument('--critic-lr', type=float, default=0.005) boolean_flag(parser, 'popart', default=False) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--reward-scale', type=float, default=1.) parser.add_argument('--clip-norm', type=float, default=None) parser.add_argument('--nb-epochs', type=int, default=1) # with default settings, perform 1M steps total parser.add_argument('--nb-epoch-cycles', type=int, default=1000) parser.add_argument('--nb-train-steps', type=int, default=1) # per epoch cycle and MPI worker parser.add_argument('--nb-eval-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--nb-rollout-steps', type=int, default=1) # per epoch cycle and MPI worker parser.add_argument('--noise-type', type=str, default='ou_0.2') # choices are adaptive-param_xx, ou_xx, normal_xx, none parser.add_argument('--num-timesteps', type=int, default=None) boolean_flag(parser, 'evaluation', default=False) args = parser.parse_args() # we don't directly specify timesteps for this script, so make sure that if we do specify them # they agree with the other parameters if args.num_timesteps is not None: assert(args.num_timesteps == args.nb_epochs * args.nb_epoch_cycles * args.nb_rollout_steps) dict_args = vars(args) del dict_args['num_timesteps'] return dict_args if __name__ == '__main__': args = parse_args() if MPI.COMM_WORLD.Get_rank() == 0: logger.configure(dir=os.path.join(get_default_data_directory("dppg_baselines_main_editted"))) # Run actual script. run(**args)