예제 #1
0
agent_params.memory.shared_memory = True

# exploration parameters
agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3)
agent_params.exploration.evaluation_epsilon = 0
# they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range
agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1)
agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200))

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                       ObservationNormalizationFilter(name='normalize_observation'))
agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal',
                                                       ObservationNormalizationFilter(name='normalize_achieved_goal'))
agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
                                                       ObservationNormalizationFilter(name='normalize_desired_goal'))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(fetch_v1))
env_params.custom_reward_threshold = -49

########
# Test #
########
preset_validation_params = PresetValidationParameters()
예제 #2
0
#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense(200)])
agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty,
                                                                                       number_of_lstm_cells=128)

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.))
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = False
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000
preset_validation_params.num_workers = 8
preset_validation_params.reward_test_level = 'inverted_pendulum'
예제 #3
0
agent_params.network_wrappers['critic'].learning_rate = 0.001

agent_params.network_wrappers['actor'].input_embedders_parameters[
    'observation'].scheme = [Dense(64)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense(64)
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'observation'].scheme = [Dense(64)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense(64)
]

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 3000
preset_validation_params.reward_test_level = 'inverted_pendulum'
preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper']
예제 #4
0
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.99
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    2048)

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
agent_params.pre_network_filter.add_observation_filter(
    'observation', 'normalize_observation',
    ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='knapsack_env:KnapSackMediumEnv')

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = True

########
# Test #
예제 #5
0
agent_params.algorithm.discount = 0.99
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    2048)

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = (
    DistributedCoachSynchronizationType.SYNC)

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
agent_params.pre_network_filter.add_observation_filter(
    "observation",
    "normalize_observation",
    ObservationNormalizationFilter(name="normalize_observation"),
)

###############
# Environment #
###############
env_params = GymVectorEnvironment(level="CartPole-v0")

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = True

########