agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.input_filter = InputFilter()
agent_params.exploration = AdditiveNoiseParameters()
agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter(
    'observation', 'normalize_observation',
    ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))
# Set the target success
env_params.target_success_rate = 1.0

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000
preset_validation_params.reward_test_level = 'inverted_pendulum'
preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper']

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
Example #2
0
#########
agent_params = NStepQAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Conv2d([16, 8, 4]),
                             Conv2d([32, 4, 2])]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense([256])
]

###############
# Environment #
###############
env_params = Atari()
env_params.level = SingleLevelSelection(atari_deterministic_v4)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [
    SelectedPhaseOnlyDumpMethod(RunPhase.TEST),
    MaxDumpMethod()
]
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.trace_test_levels = ['breakout', 'pong', 'alien']

graph_manager = BasicRLGraphManager(
Example #3
0
agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200))

agent_params.pre_network_filter = MujocoInputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                       ObservationNormalizationFilter(name='normalize_observation'))
agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal',
                                                       ObservationNormalizationFilter(name='normalize_achieved_goal'))
agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
                                                       ObservationNormalizationFilter(name='normalize_desired_goal'))

###############
# Environment #
###############
env_params = Mujoco()
env_params.level = SingleLevelSelection(fetch_v1)
env_params.custom_reward_threshold = -49

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = False


########
# Test #
########
preset_validation_params = PresetValidationParameters()
# preset_validation_params.test = True
# preset_validation_params.min_reward_threshold = 200
# preset_validation_params.max_episodes_to_achieve_reward = 600
# preset_validation_params.reward_test_level = 'inverted_pendulum'