agent_params.memory.shared_memory = True # exploration parameters agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3) agent_params.exploration.evaluation_epsilon = 0 # they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1) agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0 agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200)) agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal', ObservationNormalizationFilter(name='normalize_achieved_goal')) agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal', ObservationNormalizationFilter(name='normalize_desired_goal')) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(fetch_v1)) env_params.custom_reward_threshold = -49 ######## # Test # ######## preset_validation_params = PresetValidationParameters()
######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.005 agent_params.network_wrappers['main'].learning_rate = 0.00002 agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \ InputEmbedderParameters(scheme=[Dense(200)]) agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128) agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.)) agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = False preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 preset_validation_params.num_workers = 8 preset_validation_params.reward_test_level = 'inverted_pendulum'
agent_params.network_wrappers['critic'].learning_rate = 0.001 agent_params.network_wrappers['actor'].input_embedders_parameters[ 'observation'].scheme = [Dense(64)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [ Dense(64) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'observation'].scheme = [Dense(64)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense(64) ] agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter( 'observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 3000 preset_validation_params.reward_test_level = 'inverted_pendulum' preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper']
agent_params.algorithm.beta_entropy = 0 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.99 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 2048) # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) agent_params.pre_network_filter.add_observation_filter( 'observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) ############### # Environment # ############### env_params = GymVectorEnvironment(level='knapsack_env:KnapSackMediumEnv') ################# # Visualization # ################# vis_params = VisualizationParameters() vis_params.dump_gifs = True ######## # Test #
agent_params.algorithm.discount = 0.99 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 2048) # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = ( DistributedCoachSynchronizationType.SYNC) agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) agent_params.pre_network_filter.add_observation_filter( "observation", "normalize_observation", ObservationNormalizationFilter(name="normalize_observation"), ) ############### # Environment # ############### env_params = GymVectorEnvironment(level="CartPole-v0") ################# # Visualization # ################# vis_params = VisualizationParameters() vis_params.dump_gifs = True ########