Exemple #1
0
 def __init__(self):
     super().__init__(algorithm=NECAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=NECMemoryParameters(),
                      networks={"main": NECNetworkParameters()})
     self.exploration.epsilon_schedule = ConstantSchedule(0.1)
     self.exploration.evaluation_epsilon = 0.01
Exemple #2
0
 def __init__(self, scheme=MiddlewareScheme.Medium, dense_layer = Dense):
     super().__init__(algorithm=DNECAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=DNECMemoryParameters(),
                      networks={"main": DNECNetworkParameters(scheme=scheme, dense_layer=dense_layer)})
     #self.exploration.epsilon_schedule = ConstantSchedule(0.1)
     self.exploration.evaluation_epsilon = 0.01
Exemple #3
0
 def __init__(self):
     super().__init__(algorithm=DQNAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=ExperienceReplayParameters(),
                      networks={"main": DQNNetworkParameters()})
     self.exploration.epsilon_schedule = LinearSchedule(1, 0.1, 1000000)
     self.exploration.evaluation_epsilon = 0.05
 def __init__(self):
     super().__init__(algorithm=DQNAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=ExperienceReplayParameters(),
                      networks={
                          "main": DQNNetworkParameters(),
                          "predictor": RNDNetworkParameters(),
                          "constant": RNDNetworkParameters()
                      })
     self.exploration.epsilon_schedule = LinearSchedule(1.0, 0.15, 15000)
Exemple #5
0
agent_params.algorithm.clip_critic_targets = [-50, 0]

# HER parameters
agent_params.memory = EpisodicHindsightExperienceReplayParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
agent_params.memory.hindsight_transitions_per_regular_transition = 4
agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal',
                                             reward_type=ReachingGoal(distance_from_goal_threshold=0.05,
                                                                      goal_reaching_reward=0,
                                                                      default_reward=-1),
                                             distance_metric=GoalsSpace.DistanceMetric.Euclidean)
agent_params.memory.shared_memory = True

# exploration parameters
agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3)
agent_params.exploration.evaluation_epsilon = 0
# they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range
agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1)
agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200))

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                       ObservationNormalizationFilter(name='normalize_observation'))
agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal',
                                                       ObservationNormalizationFilter(name='normalize_achieved_goal'))
agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
Exemple #6
0
 def __init__(self):
     super().__init__(algorithm=NStepQAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=SingleEpisodeBufferParameters(),
                      networks={"main": NStepQNetworkParameters()})
Exemple #7
0
def get_graph_manager(hp_dict, agent_list, run_phase_subject):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "categorical")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    trainable_agents_list = list()
    non_trainable_agents_list = list()

    for agent in agent_list:
        agent_params = DeepRacerAgentParams()
        if agent.network_settings:
            agent_params.env_agent = agent
            agent_params.network_wrappers['main'].learning_rate = params["lr"]

            agent_params.network_wrappers['main'].input_embedders_parameters = \
                create_input_embedder(agent.network_settings['input_embedders'],
                                      agent.network_settings['embedder_type'],
                                      agent.network_settings['activation_function'])
            agent_params.network_wrappers['main'].middleware_parameters = \
                create_middle_embedder(agent.network_settings['middleware_embedders'],
                                       agent.network_settings['embedder_type'],
                                       agent.network_settings['activation_function'])

            input_filter = InputFilter(is_a_reference_filter=True)
            for observation in agent.network_settings['input_embedders'].keys(
            ):
                if observation == Input.LEFT_CAMERA.value or observation == Input.CAMERA.value or\
                observation == Input.OBSERVATION.value:
                    input_filter.add_observation_filter(
                        observation, 'to_grayscale', ObservationRGBToYFilter())
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))
                    input_filter.add_observation_filter(
                        observation, 'stacking', ObservationStackingFilter(1))

                if observation == Input.STEREO.value:
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))

                if observation == Input.LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'clipping',
                        ObservationClippingFilter(0.15, 1.0))
                if observation == Input.SECTOR_LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'binary', ObservationBinarySectorFilter())
            agent_params.input_filter = input_filter()

            agent_params.network_wrappers['main'].batch_size = params[
                "batch_size"]
            agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
            agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

            if params["loss_type"] == "huber":
                agent_params.network_wrappers[
                    'main'].replace_mse_with_huber_loss = True

            agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
            agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
                1.0, 0, 1000000)
            agent_params.algorithm.beta_entropy = params["beta_entropy"]
            agent_params.algorithm.gae_lambda = 0.95
            agent_params.algorithm.discount = params["discount_factor"]
            agent_params.algorithm.optimization_epochs = params["num_epochs"]
            agent_params.algorithm.estimate_state_value_using_gae = True
            agent_params.algorithm.num_steps_between_copying_online_weights_to_target = \
                EnvironmentEpisodes(params["num_episodes_between_training"])
            agent_params.algorithm.num_consecutive_playing_steps = \
                EnvironmentEpisodes(params["num_episodes_between_training"])

            agent_params.algorithm.distributed_coach_synchronization_type = \
                DistributedCoachSynchronizationType.SYNC

            if params["exploration_type"] == "categorical":
                agent_params.exploration = CategoricalParameters()
            else:
                agent_params.exploration = EGreedyParameters()
                agent_params.exploration.epsilon_schedule = LinearSchedule(
                    1.0, params["e_greedy_value"], params["epsilon_steps"])

            trainable_agents_list.append(agent_params)
        else:
            non_trainable_agents_list.append(agent)

    ###############
    # Environment #
    ###############
    env_params = DeepRacerRacetrackEnvParameters()
    env_params.agents_params = trainable_agents_list
    env_params.non_trainable_agents = non_trainable_agents_list
    env_params.level = 'DeepRacerRacetrackEnv-v0'
    env_params.run_phase_subject = run_phase_subject

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = MultiAgentGraphManager(
        agents_params=trainable_agents_list,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Exemple #8
0
 def __init__(self):
     super().__init__(algorithm=CILAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=BalancedExperienceReplayParameters(),
                      networks={"main": CILNetworkParameters()})
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters[
        'observation'].activation_function = 'relu'
    agent_params.network_wrappers[
        'main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers[
            'main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
        1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(
        params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(
            1.0, params["e_greedy_value"], params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation',
                                                  'to_grayscale',
                                                  ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'stacking',
        ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(
        agent_params=agent_params,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Exemple #10
0
 def __init__(self):
     super().__init__(algorithm=HumanAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=ExperienceReplayParameters(),
                      networks={"main": BCNetworkParameters()})
Exemple #11
0
 def __init__(self):
     super().__init__()
     self.exploration = EGreedyParameters()
     self.exploration.epsilon_schedule = LinearSchedule(1.0, 1.0, 500000000)
Exemple #12
0
 def __init__(self):
     super().__init__(algorithm=DFPAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=DFPMemoryParameters(),
                      networks={"main": DFPNetworkParameters()})
Exemple #13
0
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["lr_decay_rate"] = float(hp_dict.get("lr_decay_rate", 0))
    params["lr_decay_steps"] = float(hp_dict.get("lr_decay_steps", 0))
    params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))
    params["tensorboard"] = hp_dict.get("tensorboard", False)
    params["dump_mp4"] = hp_dict.get("dump_mp4", False)
    params["dump_gifs"] = hp_dict.get("dump_gifs", False)

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].learning_rate_decay_rate = params["lr_decay_rate"]
    agent_params.network_wrappers['main'].learning_rate_decay_steps = params["lr_decay_steps"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    # Replace the default CNN with single layer Conv2d(32, 3, 1)
#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Shallow

#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
#   agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Shallow
#   agent_params.network_wrappers['main'].middleware_parameters.dropout_rate = 0.3
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
#   agent_params.network_wrappers['main'].l2_regularization = 2e-5

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
    # Add an observation image pertubation for many aspects
#   DeepRacerInputFilter.add_observation_filter('observation', 'perturb_color', ObservationColorPerturbation(0.2))
    # Rescale to much smaller input when using shallow networks to avoid OOM
#   DeepRacerInputFilter.add_observation_filter('observation', 'rescaling',
#                                           ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]),
#                                                                                            high=255)))
    DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = DeepRacerInputFilter
    env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

    vis_params = VisualizationParameters()
    vis_params.tensorboard = params["tensorboard"]
    vis_params.dump_mp4 = params["dump_mp4"]
    vis_params.dump_gifs = params["dump_gifs"]
    # AlwaysDumpFilter, MaxDumpFilter, EveryNEpisodesDumpFilter, SelectedPhaseOnlyDumpFilter
    vis_params.video_dump_filters = [AlwaysDumpFilter()]

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Exemple #14
0
def get_clipped_ppo_params(agent_params, agent, params):
    """This function is algorithm specific settings required for Clipped PPO algorithm

    Args:
        agent_params (DeepRacerClippedPPOAgentParams): the agent parameters that will be used to create the RL agent
        agent (Agent): The agent object that was created either as part of create_rollout_agent or create_training_agent
        params (dict): dictionary of hyperparameters

    Returns:
        DeepRacerClippedPPOAgentParams: updated agent params object with hyperparameters and other required details
    """
    agent_params.network_wrappers['main'].learning_rate = params[
        HyperParameterKeys.LEARNING_RATE.value]

    agent_params.network_wrappers['main'].input_embedders_parameters = \
        create_input_embedder(agent.network_settings['input_embedders'],
                              agent.network_settings['embedder_type'],
                              agent.network_settings['activation_function'])
    agent_params.network_wrappers['main'].middleware_parameters = \
        create_middle_embedder(agent.network_settings['middleware_embedders'],
                               agent.network_settings['embedder_type'],
                               agent.network_settings['activation_function'])

    agent_params.network_wrappers['main'].batch_size = params[
        HyperParameterKeys.BATCH_SIZE.value]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params[HyperParameterKeys.LOSS_TYPE.value] == LossTypes.HUBER.value:
        agent_params.network_wrappers[
            'main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.beta_entropy = params[
        HyperParameterKeys.BETA_ENTROPY.value]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params[
        HyperParameterKeys.DISCOUNT_FACTOR.value]
    agent_params.algorithm.optimization_epochs = params[
        HyperParameterKeys.NUM_EPOCHS.value]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = \
        EnvironmentEpisodes(params[HyperParameterKeys.NUM_EPISODES_BETWEEN_TRAINING.value])
    agent_params.algorithm.num_consecutive_playing_steps = \
        EnvironmentEpisodes(params[HyperParameterKeys.NUM_EPISODES_BETWEEN_TRAINING.value])

    agent_params.algorithm.distributed_coach_synchronization_type = \
        DistributedCoachSynchronizationType.SYNC
    if params[HyperParameterKeys.EXPLORATION_TYPE.value].lower().strip(
    ) == ExplorationTypes.CATEGORICAL.value:
        agent_params.exploration = {
            DiscreteActionSpace:
            DeepRacerCategoricalParameters(
                use_stochastic_evaluation_policy=False),
            ScalableBoxActionSpace:
            AdditiveNoiseParameters()
        }
    elif params[HyperParameterKeys.EXPLORATION_TYPE.value].lower().strip(
    ) == ExplorationTypes.E_GREEDY.value:
        agent_params.exploration = {
            DiscreteActionSpace: EGreedyParameters(),
            ScalableBoxActionSpace: AdditiveNoiseParameters()
        }
        agent_params.exploration[DiscreteActionSpace].epsilon_schedule = \
            LinearSchedule(1.0,
                           params[HyperParameterKeys.E_GREEDY_VALUE.value],
                           params[HyperParameterKeys.EPSILON_STEPS.value])
    else:
        log_and_exit(
            "Unknown exploration_type found in hyper parameters. \
            exploration_type: {}".format(params[
                HyperParameterKeys.EXPLORATION_TYPE.value].lower().strip()),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500)

    agent_params.memory = DeepRacerMemoryParameters()
    return agent_params