Beispiel #1
0
    def get_graph_manager_from_args(self, args):
        """Returns the GraphManager object for coach to use to train by calling improve()"""
        # NOTE: TaskParameters are not configurable at this time.

        # Visualization
        vis_params = VisualizationParameters()
        self.config_visualization(vis_params)
        self.hyperparameters.apply_subset(vis_params, "vis_params.")

        # Schedule
        schedule_params = ScheduleParameters()
        self.config_schedule(schedule_params)
        self.hyperparameters.apply_subset(schedule_params, "schedule_params.")

        # Agent
        agent_params = self.define_agent()
        self.hyperparameters.apply_subset(agent_params, "agent_params.")

        # Environment
        env_params = self.define_environment()
        self.hyperparameters.apply_subset(env_params, "env_params.")

        graph_manager = BasicRLGraphManager(
            agent_params=agent_params,
            env_params=env_params,
            schedule_params=schedule_params,
            vis_params=vis_params,
        )

        return graph_manager
def evaluate(params):
    # file params
    experiment_path = os.path.join(params.output_data_dir)
    logger.experiment_path = os.path.join(experiment_path, 'evaluation')
    params.checkpoint_restore_dir = os.path.join(params.input_data_dir,
                                                 'checkpoint')
    checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint')

    inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".")
    # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace
    # the absolute path for the evaluation-from-a-checkpointed-model to work

    vis_params = VisualizationParameters()
    vis_params.dump_gifs = True

    task_params = TaskParameters(evaluate_only=True,
                                 experiment_path=logger.experiment_path)
    task_params.__dict__ = add_items_to_dict(task_params.__dict__,
                                             params.__dict__)

    graph_manager = BasicRLGraphManager(
        agent_params=ClippedPPOAgentParameters(),
        env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'),
        schedule_params=ScheduleParameters(),
        vis_params=vis_params)
    graph_manager = graph_manager.create_graph(task_parameters=task_params)
    graph_manager.evaluate(EnvironmentSteps(5))
Beispiel #3
0
    def get_graph_manager_from_args(self, args: argparse.Namespace) -> 'GraphManager':
        """
        Return the graph manager according to the command line arguments given by the user.
        :param args: the arguments given by the user
        :return: the graph manager, not bound to task_parameters yet.
        """
        graph_manager = None

        # if a preset was given we will load the graph manager for the preset
        if args.preset is not None:
            graph_manager = short_dynamic_import(args.preset, ignore_module_case=True)

        # for human play we need to create a custom graph manager
        if args.play:
            from rl_coach.agents.human_agent import HumanAgentParameters

            env_params = short_dynamic_import(args.environment_type, ignore_module_case=True)()
            env_params.human_control = True
            schedule_params = HumanPlayScheduleParameters()
            graph_manager = BasicRLGraphManager(HumanAgentParameters(), env_params, schedule_params, VisualizationParameters())

        # Set framework
        # Note: Some graph managers (e.g. HAC preset) create multiple agents and the attribute is called agents_params
        if hasattr(graph_manager, 'agent_params'):
            for network_parameters in graph_manager.agent_params.network_wrappers.values():
                network_parameters.framework = args.framework
        elif hasattr(graph_manager, 'agents_params'):
            for ap in graph_manager.agents_params:
                for network_parameters in ap.network_wrappers.values():
                    network_parameters.framework = args.framework

        if args.level:
            if isinstance(graph_manager.env_params.level, SingleLevelSelection):
                graph_manager.env_params.level.select(args.level)
            else:
                graph_manager.env_params.level = args.level

        # set the seed for the environment
        if args.seed is not None:
            graph_manager.env_params.seed = args.seed

        # visualization
        graph_manager.visualization_parameters.dump_gifs = graph_manager.visualization_parameters.dump_gifs or args.dump_gifs
        graph_manager.visualization_parameters.dump_mp4 = graph_manager.visualization_parameters.dump_mp4 or args.dump_mp4
        graph_manager.visualization_parameters.render = args.render
        graph_manager.visualization_parameters.tensorboard = args.tensorboard
        graph_manager.visualization_parameters.print_networks_summary = args.print_networks_summary

        # update the custom parameters
        if args.custom_parameter is not None:
            unstripped_key_value_pairs = [pair.split('=') for pair in args.custom_parameter.split(';')]
            stripped_key_value_pairs = [tuple([pair[0].strip(), pair[1].strip()]) for pair in
                                        unstripped_key_value_pairs if len(pair) == 2]

            # load custom parameters into run_dict
            for key, value in stripped_key_value_pairs:
                exec("graph_manager.{}={}".format(key, value))

        return graph_manager
Beispiel #4
0
def get_graph_manager_from_args(args: argparse.Namespace) -> 'GraphManager':
    """
    Return the graph manager according to the command line arguments given by the user
    :param args: the arguments given by the user
    :return: the updated graph manager
    """

    graph_manager = None

    # if a preset was given we will load the graph manager for the preset
    if args.preset is not None:
        graph_manager = short_dynamic_import(args.preset,
                                             ignore_module_case=True)

    # for human play we need to create a custom graph manager
    if args.play:
        env_params = short_dynamic_import(args.environment_type,
                                          ignore_module_case=True)()
        env_params.human_control = True
        schedule_params = HumanPlayScheduleParameters()
        graph_manager = BasicRLGraphManager(HumanAgentParameters(), env_params,
                                            schedule_params,
                                            VisualizationParameters())

    if args.level:
        if isinstance(graph_manager.env_params.level, SingleLevelSelection):
            graph_manager.env_params.level.select(args.level)
        else:
            graph_manager.env_params.level = args.level

    # set the seed for the environment
    if args.seed is not None:
        graph_manager.env_params.seed = args.seed

    # visualization
    graph_manager.visualization_parameters.dump_gifs = graph_manager.visualization_parameters.dump_gifs or args.dump_gifs
    graph_manager.visualization_parameters.dump_mp4 = graph_manager.visualization_parameters.dump_mp4 or args.dump_mp4
    graph_manager.visualization_parameters.render = args.render
    graph_manager.visualization_parameters.tensorboard = args.tensorboard

    # update the custom parameters
    if args.custom_parameter is not None:
        unstripped_key_value_pairs = [
            pair.split('=') for pair in args.custom_parameter.split(';')
        ]
        stripped_key_value_pairs = [
            tuple([pair[0].strip(), pair[1].strip()])
            for pair in unstripped_key_value_pairs if len(pair) == 2
        ]

        # load custom parameters into run_dict
        for key, value in stripped_key_value_pairs:
            exec("graph_manager.{}={}".format(key, value))

    return graph_manager
SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)

SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale',
                                              ObservationRGBToYFilter())
SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8',
                                              ObservationToUInt8Filter(0, 255))
SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                              ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = SilverstoneInputFilter
env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
    schedule_params=schedule_params,
    vis_params=vis_params,
    preset_validation_params=preset_validation_params)
Beispiel #6
0
from rl_coach.agents.ddpg_agent import DDPGAgentParameters
from rl_coach.agents.dqn_agent import DQNAgentParameters
from rl_coach.graph_managers.graph_manager import SimpleSchedule, SimpleScheduleWithoutEvaluation
from rl_coach.core_types import EnvironmentSteps, TrainingSteps
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.environments.first_test import ControlSuiteEnvironmentParameters

agent_params = DQNAgentParameters()
# rename the input embedder key from 'observation' to 'measurements'
# agent_params.network_wrappers['main'].input_embedders_parameters['measurements'] = agent_params.network_wrappers['main'].input_embedders_parameters.pop('observation')
schedule_params = SimpleSchedule()
schedule_params.heatup_steps = EnvironmentSteps(10)
preset_validation_params = PresetValidationParameters()
# preset_validation_params.test = True
# preset_validation_params.min_reward_threshold = 20
# preset_validation_params.max_episodes_to_achieve_reward = 400

vis_params = VisualizationParameters(render=False)

env_params = ControlSuiteEnvironmentParameters()

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
    schedule_params=schedule_params,
    vis_params=VisualizationParameters(),
    preset_validation_params=preset_validation_params)
Beispiel #7
0
# Agent #
#########
agent_params = SoftActorCriticAgentParameters()

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='coinche-v0')

####################
# Graph Scheduling #
####################
num_round_improve_steps = 80
num_round_heatup = 8
num_round_training = 300
num_round_evaluation = 10

schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(num_round_improve_steps)
schedule_params.heatup_steps = EnvironmentEpisodes(num_round_heatup)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(num_round_training)
schedule_params.evaluation_steps = EnvironmentEpisodes(num_round_evaluation)

########################
# Create Graph Manager #
########################
# BasicRLGraphManager, créé un uniquement LevelManager entre l'Agent et l'Environnement
graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params)
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters[
        'observation'].activation_function = 'relu'
    agent_params.network_wrappers[
        'main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers[
            'main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
        1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(
        params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(
            1.0, params["e_greedy_value"], params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation',
                                                  'to_grayscale',
                                                  ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'stacking',
        ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(
        agent_params=agent_params,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Beispiel #9
0
# from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule
from rl_coach.environments.environment import SingleLevelSelection
from rl_coach.environments.lab_environment import LabEnvironmentParameters, level_scripts
####################
# Graph Scheduling #
####################
schedule_params = SimpleSchedule()
# schedule_params.heatup_steps = EnvironmentSteps(10000)

#########
# Agent #
#########
agent_params = HumanAgentParameters()

###############
# Environment #
###############
env_params = LabEnvironmentParameters(
    level=SingleLevelSelection(level_scripts),
    human_control=True,
    width=100,
    height=100)

graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
                                    vis_params=VisualizationParameters(
                                        render=True, native_rendering=True))
Beispiel #10
0
# Agent params
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    100)
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4096)
agent_params.algorithm.act_for_full_episodes = False

# NN configuration
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=[])
}
agent_params.network_wrappers['main'].learning_rate = 0.001

################
#  Environment #
################
env_params = GymVectorEnvironment(
    level='gym_jiminy.envs.acrobot:JiminyAcrobotEnv')
env_params.additional_simulator_parameters = {
    'continuous': True,
    'enableGoalEnv': False
}

################
#   Learning   #
################
graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=SimpleSchedule())
graph_manager.improve()
Beispiel #11
0
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["lr_decay_rate"] = float(hp_dict.get("lr_decay_rate", 0))
    params["lr_decay_steps"] = float(hp_dict.get("lr_decay_steps", 0))
    params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))
    params["tensorboard"] = hp_dict.get("tensorboard", False)
    params["dump_mp4"] = hp_dict.get("dump_mp4", False)
    params["dump_gifs"] = hp_dict.get("dump_gifs", False)

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].learning_rate_decay_rate = params["lr_decay_rate"]
    agent_params.network_wrappers['main'].learning_rate_decay_steps = params["lr_decay_steps"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    # Replace the default CNN with single layer Conv2d(32, 3, 1)
#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Shallow

#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
#   agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Shallow
#   agent_params.network_wrappers['main'].middleware_parameters.dropout_rate = 0.3
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
#   agent_params.network_wrappers['main'].l2_regularization = 2e-5

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
    # Add an observation image pertubation for many aspects
#   DeepRacerInputFilter.add_observation_filter('observation', 'perturb_color', ObservationColorPerturbation(0.2))
    # Rescale to much smaller input when using shallow networks to avoid OOM
#   DeepRacerInputFilter.add_observation_filter('observation', 'rescaling',
#                                           ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]),
#                                                                                            high=255)))
    DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = DeepRacerInputFilter
    env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

    vis_params = VisualizationParameters()
    vis_params.tensorboard = params["tensorboard"]
    vis_params.dump_mp4 = params["dump_mp4"]
    vis_params.dump_gifs = params["dump_gifs"]
    # AlwaysDumpFilter, MaxDumpFilter, EveryNEpisodesDumpFilter, SelectedPhaseOnlyDumpFilter
    vis_params.video_dump_filters = [AlwaysDumpFilter()]

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.core_types import EnvironmentSteps
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule

graph_manager = BasicRLGraphManager(
    agent_params=ClippedPPOAgentParameters(),
    env_params=GymVectorEnvironment(level='CartPole-v0'),
    schedule_params=SimpleSchedule())

graph_manager.heatup(EnvironmentSteps(100))
graph_manager.train_and_act(EnvironmentSteps(100))