def __init__(self): super().__init__(algorithm=NECAlgorithmParameters(), exploration=EGreedyParameters(), memory=NECMemoryParameters(), networks={"main": NECNetworkParameters()}) self.exploration.epsilon_schedule = ConstantSchedule(0.1) self.exploration.evaluation_epsilon = 0.01
def __init__(self, scheme=MiddlewareScheme.Medium, dense_layer = Dense): super().__init__(algorithm=DNECAlgorithmParameters(), exploration=EGreedyParameters(), memory=DNECMemoryParameters(), networks={"main": DNECNetworkParameters(scheme=scheme, dense_layer=dense_layer)}) #self.exploration.epsilon_schedule = ConstantSchedule(0.1) self.exploration.evaluation_epsilon = 0.01
def __init__(self): super().__init__(algorithm=DQNAlgorithmParameters(), exploration=EGreedyParameters(), memory=ExperienceReplayParameters(), networks={"main": DQNNetworkParameters()}) self.exploration.epsilon_schedule = LinearSchedule(1, 0.1, 1000000) self.exploration.evaluation_epsilon = 0.05
def __init__(self): super().__init__(algorithm=DQNAlgorithmParameters(), exploration=EGreedyParameters(), memory=ExperienceReplayParameters(), networks={ "main": DQNNetworkParameters(), "predictor": RNDNetworkParameters(), "constant": RNDNetworkParameters() }) self.exploration.epsilon_schedule = LinearSchedule(1.0, 0.15, 15000)
agent_params.algorithm.clip_critic_targets = [-50, 0] # HER parameters agent_params.memory = EpisodicHindsightExperienceReplayParameters() agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future agent_params.memory.hindsight_transitions_per_regular_transition = 4 agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal', reward_type=ReachingGoal(distance_from_goal_threshold=0.05, goal_reaching_reward=0, default_reward=-1), distance_metric=GoalsSpace.DistanceMetric.Euclidean) agent_params.memory.shared_memory = True # exploration parameters agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3) agent_params.exploration.evaluation_epsilon = 0 # they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1) agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0 agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200)) agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal', ObservationNormalizationFilter(name='normalize_achieved_goal')) agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
def __init__(self): super().__init__(algorithm=NStepQAlgorithmParameters(), exploration=EGreedyParameters(), memory=SingleEpisodeBufferParameters(), networks={"main": NStepQNetworkParameters()})
def get_graph_manager(hp_dict, agent_list, run_phase_subject): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int( hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int( hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float( hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps( params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### trainable_agents_list = list() non_trainable_agents_list = list() for agent in agent_list: agent_params = DeepRacerAgentParams() if agent.network_settings: agent_params.env_agent = agent agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters = \ create_input_embedder(agent.network_settings['input_embedders'], agent.network_settings['embedder_type'], agent.network_settings['activation_function']) agent_params.network_wrappers['main'].middleware_parameters = \ create_middle_embedder(agent.network_settings['middleware_embedders'], agent.network_settings['embedder_type'], agent.network_settings['activation_function']) input_filter = InputFilter(is_a_reference_filter=True) for observation in agent.network_settings['input_embedders'].keys( ): if observation == Input.LEFT_CAMERA.value or observation == Input.CAMERA.value or\ observation == Input.OBSERVATION.value: input_filter.add_observation_filter( observation, 'to_grayscale', ObservationRGBToYFilter()) input_filter.add_observation_filter( observation, 'to_uint8', ObservationToUInt8Filter(0, 255)) input_filter.add_observation_filter( observation, 'stacking', ObservationStackingFilter(1)) if observation == Input.STEREO.value: input_filter.add_observation_filter( observation, 'to_uint8', ObservationToUInt8Filter(0, 255)) if observation == Input.LIDAR.value: input_filter.add_observation_filter( observation, 'clipping', ObservationClippingFilter(0.15, 1.0)) if observation == Input.SECTOR_LIDAR.value: input_filter.add_observation_filter( observation, 'binary', ObservationBinarySectorFilter()) agent_params.input_filter = input_filter() agent_params.network_wrappers['main'].batch_size = params[ "batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers[ 'main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = \ EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = \ EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = \ DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule( 1.0, params["e_greedy_value"], params["epsilon_steps"]) trainable_agents_list.append(agent_params) else: non_trainable_agents_list.append(agent) ############### # Environment # ############### env_params = DeepRacerRacetrackEnvParameters() env_params.agents_params = trainable_agents_list env_params.non_trainable_agents = non_trainable_agents_list env_params.level = 'DeepRacerRacetrackEnv-v0' env_params.run_phase_subject = run_phase_subject vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 10000 graph_manager = MultiAgentGraphManager( agents_params=trainable_agents_list, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
def __init__(self): super().__init__(algorithm=CILAlgorithmParameters(), exploration=EGreedyParameters(), memory=BalancedExperienceReplayParameters(), networks={"main": CILNetworkParameters()})
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int( hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int( hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float( hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps( params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'relu' agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers[ 'main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule( 1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter( 'observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'SilverstoneRacetrack-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
def __init__(self): super().__init__(algorithm=HumanAlgorithmParameters(), exploration=EGreedyParameters(), memory=ExperienceReplayParameters(), networks={"main": BCNetworkParameters()})
def __init__(self): super().__init__() self.exploration = EGreedyParameters() self.exploration.epsilon_schedule = LinearSchedule(1.0, 1.0, 500000000)
def __init__(self): super().__init__(algorithm=DFPAlgorithmParameters(), exploration=EGreedyParameters(), memory=DFPMemoryParameters(), networks={"main": DFPNetworkParameters()})
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["lr_decay_rate"] = float(hp_dict.get("lr_decay_rate", 0)) params["lr_decay_steps"] = float(hp_dict.get("lr_decay_steps", 0)) params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000)) params["tensorboard"] = hp_dict.get("tensorboard", False) params["dump_mp4"] = hp_dict.get("dump_mp4", False) params["dump_gifs"] = hp_dict.get("dump_gifs", False) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].learning_rate_decay_rate = params["lr_decay_rate"] agent_params.network_wrappers['main'].learning_rate_decay_steps = params["lr_decay_steps"] agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu' # Replace the default CNN with single layer Conv2d(32, 3, 1) # agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Shallow # agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3 agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' # agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Shallow # agent_params.network_wrappers['main'].middleware_parameters.dropout_rate = 0.3 agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 # agent_params.network_wrappers['main'].l2_regularization = 2e-5 if params["loss_type"] == "huber": agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### DeepRacerInputFilter = InputFilter(is_a_reference_filter=True) # Add an observation image pertubation for many aspects # DeepRacerInputFilter.add_observation_filter('observation', 'perturb_color', ObservationColorPerturbation(0.2)) # Rescale to much smaller input when using shallow networks to avoid OOM # DeepRacerInputFilter.add_observation_filter('observation', 'rescaling', # ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]), # high=255))) DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) DeepRacerInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = DeepRacerInputFilter env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0' vis_params = VisualizationParameters() vis_params.tensorboard = params["tensorboard"] vis_params.dump_mp4 = params["dump_mp4"] vis_params.dump_gifs = params["dump_gifs"] # AlwaysDumpFilter, MaxDumpFilter, EveryNEpisodesDumpFilter, SelectedPhaseOnlyDumpFilter vis_params.video_dump_filters = [AlwaysDumpFilter()] ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 10000 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
def get_clipped_ppo_params(agent_params, agent, params): """This function is algorithm specific settings required for Clipped PPO algorithm Args: agent_params (DeepRacerClippedPPOAgentParams): the agent parameters that will be used to create the RL agent agent (Agent): The agent object that was created either as part of create_rollout_agent or create_training_agent params (dict): dictionary of hyperparameters Returns: DeepRacerClippedPPOAgentParams: updated agent params object with hyperparameters and other required details """ agent_params.network_wrappers['main'].learning_rate = params[ HyperParameterKeys.LEARNING_RATE.value] agent_params.network_wrappers['main'].input_embedders_parameters = \ create_input_embedder(agent.network_settings['input_embedders'], agent.network_settings['embedder_type'], agent.network_settings['activation_function']) agent_params.network_wrappers['main'].middleware_parameters = \ create_middle_embedder(agent.network_settings['middleware_embedders'], agent.network_settings['embedder_type'], agent.network_settings['activation_function']) agent_params.network_wrappers['main'].batch_size = params[ HyperParameterKeys.BATCH_SIZE.value] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params[HyperParameterKeys.LOSS_TYPE.value] == LossTypes.HUBER.value: agent_params.network_wrappers[ 'main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.beta_entropy = params[ HyperParameterKeys.BETA_ENTROPY.value] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params[ HyperParameterKeys.DISCOUNT_FACTOR.value] agent_params.algorithm.optimization_epochs = params[ HyperParameterKeys.NUM_EPOCHS.value] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = \ EnvironmentEpisodes(params[HyperParameterKeys.NUM_EPISODES_BETWEEN_TRAINING.value]) agent_params.algorithm.num_consecutive_playing_steps = \ EnvironmentEpisodes(params[HyperParameterKeys.NUM_EPISODES_BETWEEN_TRAINING.value]) agent_params.algorithm.distributed_coach_synchronization_type = \ DistributedCoachSynchronizationType.SYNC if params[HyperParameterKeys.EXPLORATION_TYPE.value].lower().strip( ) == ExplorationTypes.CATEGORICAL.value: agent_params.exploration = { DiscreteActionSpace: DeepRacerCategoricalParameters( use_stochastic_evaluation_policy=False), ScalableBoxActionSpace: AdditiveNoiseParameters() } elif params[HyperParameterKeys.EXPLORATION_TYPE.value].lower().strip( ) == ExplorationTypes.E_GREEDY.value: agent_params.exploration = { DiscreteActionSpace: EGreedyParameters(), ScalableBoxActionSpace: AdditiveNoiseParameters() } agent_params.exploration[DiscreteActionSpace].epsilon_schedule = \ LinearSchedule(1.0, params[HyperParameterKeys.E_GREEDY_VALUE.value], params[HyperParameterKeys.EPSILON_STEPS.value]) else: log_and_exit( "Unknown exploration_type found in hyper parameters. \ exploration_type: {}".format(params[ HyperParameterKeys.EXPLORATION_TYPE.value].lower().strip()), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500) agent_params.memory = DeepRacerMemoryParameters() return agent_params