Exemple #1
0
def convert_file_names_to_file_paths(target_default_directory,
                                     target_file_name_s):
    """
    given file names and a directory, generates the full paths to that file
    :param target_default_directory: a string
    :param file_name_s: either a string or list of strings
    :return: list of strings
    """
    if isinstance(target_file_name_s, list):
        file_path_s = [
            os.path.join(get_default_data_directory(target_default_directory),
                         file_name) for file_name in target_file_name_s
        ]
    else:
        file_path_s = [
            os.path.join(get_default_data_directory(target_default_directory),
                         target_file_name_s[0])
        ]
    return file_path_s
Exemple #2
0
                                     tau=0.001,
                                     layer_norm=False,
                                     normalize_observations=False,
                                     normalize_returns=False,
                                     critic_l2_reg=0,
                                     enable_popart=False,
                                     clip_norm=None,
                                     reward_scale=1.,
                                     lastLayerTanh=lastLayerTanh)

        # Train the agent, summary contains training data
        summary = rlTrain(agent,
                          env,
                          render=args.render,
                          render_episode=False,
                          print_results=True,
                          num_episodes=episodes)  # type: Summary

        noGpu_str = "-NoGPU" if noGpu else ""
        llTanh_str = "-LLTanh" if lastLayerTanh else ""
        summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED,
                                         zz_episodes=episodes,
                                         noGpu=noGpu)
        fp = summary.save(
            get_default_data_directory("ddpg_baselines_summaries/1"),
            extra_name_append="-" + str(episodes) + "ep" + noGpu_str +
            "-noNorm" + llTanh_str + "-decayingNoise")

        train_writer = tf.summary.FileWriter(fp[:-5])
        train_writer.add_graph(sess.graph)
Exemple #3
0
def task_run_ss_ddpg_baselines_mc(params):
    import tensorflow as tf

    print("\n\nprocess " + str(params['id']) + " has started" + "-" * 200 +
          "\n")

    noGpu = params['noGpu']
    render = False
    replay_buffer = None

    # random seed each time
    random.seed()
    RANDOM_SEED = random.randint(0, 2**32 - 1)

    # Overall Options
    episodes = params['episodes']
    dir_name = params['dir_name']

    #naming function
    get_extra_name = params['get_extra_name']

    # configuring environment
    ENV_NAME = 'MountainCarContinuous-v0'
    env = gym.make(ENV_NAME)

    if noGpu:
        tfConfig = tf.ConfigProto(device_count={'GPU': 0})
    else:
        tfConfig = None

    with tf.Graph().as_default() as graph:
        with tf.Session(config=tfConfig, graph=graph) as sess:
            # with tf.Session() as sess:
            # Reset the seed for random number generation
            set_global_seeds(RANDOM_SEED)
            env.seed(RANDOM_SEED)

            # Initialize agent, see class for available parameters
            base_agent = DDPG_Baselines_agent(
                env,
                sess,
                replay_buffer=replay_buffer,
                buffer_size=params['buffer_size'],
                batch_size=params['batch_size'],
                num_train_iterations=params['num_train_iterations'],
                num_steps_before_train=params['num_steps_before_train'],
                ou_epsilon=params['ou_epsilon'],
                ou_min_epsilon=params['ou_min_epsilon'],
                ou_epsilon_decay_factor=params['ou_epsilon_decay_factor'],
                ou_mu=params['ou_mu'],
                ou_sigma=params['ou_sigma'],
                ou_theta=params['ou_theta'],
                # actor_lr = params['actor_lr'],
                actor_lr=params['lr'],
                actor_h1=params['actor_h1'],
                actor_h2=params['actor_h2'],
                # critic_lr = params['critic_lr'],
                critic_lr=params['lr'],
                critic_h1=params['critic_h1'],
                critic_h2=params['critic_h2'],
                gamma=params['gamma'],
                tau=params['tau'],
                layer_norm=params['layer_norm'],
                normalize_observations=params['normalize_observations'],
                normalize_returns=params['normalize_returns'],
                critic_l2_reg=params['critic_l2_reg'],
                enable_popart=params['enable_popart'],
                clip_norm=params['clip_norm'],
                reward_scale=params['reward_scale'],
                lastLayerTanh=params['lastLayerTanh'],
                finalizeGraph=False)

            smart_start_agent = SmartStartContinuous(
                base_agent,
                env,
                sess,
                buffer_size=params['buffer_size'],
                exploitation_param=params['exploitation_param'],
                exploration_param=params['exploration_param'],
                eta=params['eta'],
                eta_decay_factor=params['eta_decay_factor'],
                n_ss=params['n_ss'],
                print_ss_stuff=True,
                # sigma=params['sigma'],
                # smart_start_selection_modified_distance_function=params['smart_start_selection_modified_distance_function'],
                nnd_mb_final_steps=params['nnd_mb_final_steps'],
                nnd_mb_steps_per_waypoint=params['nnd_mb_steps_per_waypoint'],
                nnd_mb_mean_per_stepsize=params['nnd_mb_mean_per_stepsize'],
                nnd_mb_std_per_stepsize=params['nnd_mb_std_per_stepsize'],
                nnd_mb_stepsizes_in_waypoint_radii=params[
                    'nnd_mb_stepsizes_in_waypoint_radii'],
                nnd_mb_gamma=params['nnd_mb_gamma'],
                nnd_mb_horizontal_penalty_factor=params[
                    'nnd_mb_horizontal_penalty_factor'],
                nnd_mb_horizon=params['nnd_mb_horizon'],
                nnd_mb_num_control_samples=params[
                    'nnd_mb_num_control_samples'],
                nnd_mb_path_shortcutting=params['nnd_mb_path_shortcutting'],
                nnd_mb_steps_before_giving_up_on_waypoint=params[
                    'nnd_mb_steps_before_giving_up_on_waypoint'],
                nnd_mb_load_dir_name=params['nnd_mb_load_dir_name'],
                nnd_mb_load_existing_training_data=params[
                    'nnd_mb_load_existing_training_data'],
                nnd_mb_num_fc_layers=params['nnd_mb_num_fc_layers'],
                nnd_mb_depth_fc_layers=params['nnd_mb_depth_fc_layers'],
                nnd_mb_batchsize=params['nnd_mb_batchsize'],
                nnd_mb_lr=params['nnd_mb_lr'],
                nnd_mb_nEpoch=params['nnd_mb_nEpoch'],
                nnd_mb_fraction_use_new=params['nnd_mb_fraction_use_new'],
                nnd_mb_num_episodes_for_aggregation=params[
                    'nnd_mb_num_episodes_for_aggregation'],
                nnd_mb_make_aggregated_dataset_noisy=params[
                    'nnd_mb_make_aggregated_dataset_noisy'],
                nnd_mb_make_training_dataset_noisy=params[
                    'nnd_mb_make_training_dataset_noisy'],
                nnd_mb_noise_actions_during_MPC_rollouts=params[
                    'nnd_mb_noise_actions_during_MPC_rollouts'],
                nnd_mb_verbose=params['nnd_mb_verbose'])

            sess.graph.finalize()

            # Train the agent, summary contains training data
            summary = rlTrain(smart_start_agent,
                              env,
                              render=render,
                              render_episode=False,
                              print_steps=False,
                              print_results=False,
                              num_episodes=episodes,
                              print_time=False,
                              progress_bar=True,
                              id=params['id'],
                              num_ticks=params['num_ticks'])  # type: Summary

            summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED,
                                             zz_episodes=episodes,
                                             noGpu=noGpu)
            fp = summary.save(get_default_data_directory(dir_name),
                              last_name_section=True,
                              extra_name_append=get_extra_name(params))

            print("\n\nprocess " + str(params['id']) + " has finished" +
                  "!" * 200 + "\n")
Exemple #4
0
        'nnd_mb_num_fc_layers': [1],
        'nnd_mb_depth_fc_layers': [32],
        'nnd_mb_batchsize': [512],
        'nnd_mb_lr': [0.001],
        'nnd_mb_nEpoch': [30],
        'nnd_mb_fraction_use_new': [0.9],
        'nnd_mb_num_episodes_for_aggregation': [4],
        'nnd_mb_make_aggregated_dataset_noisy': [True],
        'nnd_mb_make_training_dataset_noisy': [True],
        'nnd_mb_noise_actions_during_MPC_rollouts': [True],
        'nnd_mb_verbose': [False],
        'get_extra_name': [get_extra_name]
    }

    noGpu_str = "_NoGPU" if noGpu else ""
    llTanh_str = "_LLTanh" if lastLayerTanh else ""
    decayingNoise_str = "_decayingNoise" if decaying_noise else ""
    if len(paramsGrid['nnd_mb_num_fc_layers']) == 1 and len(
            paramsGrid['nnd_mb_depth_fc_layers']) == 1:
        fc_layer_str = "_fcLayer-" + str(
            paramsGrid['nnd_mb_num_fc_layers'][0]) + "lyrs-" + str(
                paramsGrid['nnd_mb_depth_fc_layers'][0]) + "dpth"
    else:
        fc_layer_str = ""
    create_experimeter_info_txt(paramsGrid,
                                get_default_data_directory(dir_name),
                                name_append="_" + str(episodes) + "ep" +
                                noGpu_str + llTanh_str + decayingNoise_str +
                                fc_layer_str)
    run_experiment(paramsGrid, n_processes=-1)
Exemple #5
0
import random

import numpy as np

from smartstart.RLDiscreteAlgorithms.qlearning import QLearning
# from smartstart.smartexploration.smartexplorationdiscrete import generate_smartstart_object
from smartstart.environments.gridworld import GridWorld
from smartstart.utilities.experimenter import run_experiment
from smartstart.utilities.utilities import get_default_data_directory

# Get the path to the data folder in the same directory as this file.
# If the folder does not exists it will be created
summary_dir = get_default_data_directory("")


# Define the task function for the experiment
def task(params):
    print(params)


# Define a parameter grid that can be supplied to the run_experiment method
param_grid = {
    'task': task,
    'num_exp': 5,
    'use_smart_start': [True, False],
    'asdf': [0, 1]
}

if __name__ == '__main__':
    run_experiment(param_grid, n_processes=-1)
Exemple #6
0
            num_episodes_for_aggregation=num_episodes_for_aggregation,
            save_dir_name=save_dir,
            load_dir_name=load_dir,
            save_resulting_dynamics_model=save_model,
            load_existing_training_data=load_training,
            depth_fc_layers=depth_fc_layers,
            num_fc_layers=num_fc_layers,
            lr=lr,
            nEpoch=nEpochs)  # type: NND_MB_agent

        # intializing path
        target_default_directory = "0_ddpg_summaries_DEPRACATED"
        # target_file_name = "DDPG_agent_MountainCarContinuous-v0-1000ep.json"
        target_file_name = "DDPG_agent_MountainCarContinuous-v0_test-2ep.json"
        target_file_pathname = os.path.join(
            get_default_data_directory(target_default_directory),
            target_file_name)
        target_summary = Summary.load(target_file_pathname)  # type:Summary
        target_path = target_summary.get_last_path(0)
        target_reward = target_summary.get_last_reward(0)
        # target_path = target_summary.best_path
        # target_reward = target_summary.best_reward

        # summary object
        summary = Summary(agent.__class__.__name__ + "_" + env.spec.id)

        np.set_printoptions(
            formatter={'float':
                       lambda x: "{0:0.4f}".format(x)})  # for printing

        # begin training episodes(1)
def task_run_ddpg_baselines_mc(params):
    import tensorflow as tf

    print("\n\nprocess " + str(params['id']) + " has started" + "-" * 200 +
          "\n")

    noGpu = params['noGpu']
    render = False
    replay_buffer = None

    # random seed each time
    random.seed()
    RANDOM_SEED = random.randint(0, 2**32 - 1)

    # Overall Options
    episodes = params['episodes']
    dir_name = params['dir_name']

    # naming function
    get_extra_name = params['get_extra_name']

    # configuring environment
    env = Continuous_MountainCarEnv_Editted.make_timed_env(
        params['power_scalar'],
        max_episode_steps=params['max_episode_steps'],
        max_episode_seconds=params['max_episode_seconds'])

    buffer_size = params['buffer_size']
    batch_size = params['batch_size']
    num_train_iterations = params['num_train_iterations']
    num_steps_before_train = params['num_steps_before_train']
    ou_epsilon = params['ou_epsilon']
    ou_min_epsilon = params['ou_min_epsilon']
    ou_epsilon_decay_factor = params['ou_epsilon_decay_factor']
    ou_mu = params['ou_mu']
    ou_sigma = params['ou_sigma']
    ou_theta = params['ou_theta']
    actor_lr = params['actor_lr']
    actor_h1 = params['actor_h1']
    actor_h2 = params['actor_h1'] // 2
    critic_lr = params['critic_lr']
    critic_h1 = params['critic_h1']
    critic_h2 = params['critic_h1'] // 2
    gamma = params['gamma']
    tau = params['tau']
    layer_norm = params['layer_norm']
    normalize_observations = params['normalize_observations']
    normalize_returns = params['normalize_returns']
    critic_l2_reg = params['critic_l2_reg']
    enable_popart = params['enable_popart']
    clip_norm = params['clip_norm']
    reward_scale = params['reward_scale']
    lastLayerTanh = params['lastLayerTanh']

    if noGpu:
        tfConfig = tf.ConfigProto(device_count={'GPU': 0})
    else:
        tfConfig = None

    with tf.Graph().as_default() as graph:
        with tf.Session(config=tfConfig, graph=graph) as sess:
            # with tf.Session() as sess:
            # Reset the seed for random number generation
            set_global_seeds(RANDOM_SEED)
            env.seed(RANDOM_SEED)

            # Initialize agent, see class for available parameters
            agent = DDPG_Baselines_agent(
                env,
                sess,
                replay_buffer=replay_buffer,
                buffer_size=buffer_size,
                batch_size=batch_size,
                num_train_iterations=num_train_iterations,
                num_steps_before_train=num_steps_before_train,
                ou_epsilon=ou_epsilon,
                ou_min_epsilon=ou_min_epsilon,
                ou_epsilon_decay_factor=ou_epsilon_decay_factor,
                ou_mu=ou_mu,
                ou_sigma=ou_sigma,
                ou_theta=ou_theta,
                actor_lr=actor_lr,
                actor_h1=actor_h1,
                actor_h2=actor_h2,
                critic_lr=critic_lr,
                critic_h1=critic_h1,
                critic_h2=critic_h2,
                gamma=gamma,
                tau=tau,
                layer_norm=layer_norm,
                normalize_observations=normalize_observations,
                normalize_returns=normalize_returns,
                critic_l2_reg=critic_l2_reg,
                enable_popart=enable_popart,
                clip_norm=clip_norm,
                reward_scale=reward_scale,
                lastLayerTanh=lastLayerTanh)

            # Train the agent, summary contains training data
            summary = rlTrain(agent,
                              env,
                              render=render,
                              render_episode=False,
                              print_steps=False,
                              print_results=False,
                              num_episodes=episodes,
                              progress_bar=True,
                              id=params['id'],
                              num_ticks=params['num_ticks'])  # type: Summary

            summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED,
                                             zz_episodes=episodes,
                                             noGpu=noGpu)
            fp = summary.save(get_default_data_directory(dir_name),
                              last_name_section=True,
                              extra_name_append=get_extra_name(params))

            print("\n\nprocess " + str(params['id']) + " has finished" +
                  "!" * 200 + "\n")
Exemple #8
0
            # Train the agent, summary contains training data
            # summary = rlTrain(smart_start_agent, env, render=args.render,
            #                   render_episode=False,
            #                   print_steps=False,
            #                   print_results=False,
            #                   num_episodes=episodes,
            #                   print_time=True)  # type: Summary
            summary = rlTrainGraphSS(smart_start_agent,
                                     env,
                                     render=args.render,
                                     render_episode=False,
                                     print_steps=False,
                                     print_results=False,
                                     num_episodes=episodes,
                                     plot_ss_stuff=False,
                                     print_time=True)

            noGpu_str = "-NoGPU" if noGpu else ""
            llTanh_str = "-LLTanh" if lastLayerTanh else ""
            summary.add_params_to_param_dict(zz_RANDOM_SEED=RANDOM_SEED,
                                             zz_episodes=episodes,
                                             noGpu=noGpu)
            fp = summary.save(get_default_data_directory(
                "smart_start_continuous_summaries/0/"),
                              extra_name_append="-" + str(episodes) + "ep" +
                              noGpu_str + "-noNorm" + llTanh_str +
                              "-decayingNoise" + "-2000n_ss")

            train_writer = tf.summary.FileWriter(fp[:-5])
            train_writer.add_graph(sess.graph)
    parser.add_argument('--actor-lr', type=float, default=0.01)
    parser.add_argument('--critic-lr', type=float, default=0.005)
    boolean_flag(parser, 'popart', default=False)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--reward-scale', type=float, default=1.)
    parser.add_argument('--clip-norm', type=float, default=None)
    parser.add_argument('--nb-epochs', type=int, default=1)  # with default settings, perform 1M steps total
    parser.add_argument('--nb-epoch-cycles', type=int, default=1000)
    parser.add_argument('--nb-train-steps', type=int, default=1)  # per epoch cycle and MPI worker
    parser.add_argument('--nb-eval-steps', type=int, default=100)  # per epoch cycle and MPI worker
    parser.add_argument('--nb-rollout-steps', type=int, default=1)  # per epoch cycle and MPI worker
    parser.add_argument('--noise-type', type=str, default='ou_0.2')  # choices are adaptive-param_xx, ou_xx, normal_xx, none
    parser.add_argument('--num-timesteps', type=int, default=None)
    boolean_flag(parser, 'evaluation', default=False)
    args = parser.parse_args()
    # we don't directly specify timesteps for this script, so make sure that if we do specify them
    # they agree with the other parameters
    if args.num_timesteps is not None:
        assert(args.num_timesteps == args.nb_epochs * args.nb_epoch_cycles * args.nb_rollout_steps)
    dict_args = vars(args)
    del dict_args['num_timesteps']
    return dict_args


if __name__ == '__main__':
    args = parse_args()
    if MPI.COMM_WORLD.Get_rank() == 0:
        logger.configure(dir=os.path.join(get_default_data_directory("dppg_baselines_main_editted")))
    # Run actual script.
    run(**args)