Ejemplo n.º 1
0
schedule_params.evaluation_steps = EnvironmentEpisodes(3)
schedule_params.heatup_steps = EnvironmentSteps(10000)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()

agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.05

agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Medium,
                                                                                       number_of_lstm_cells=256)
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
env_params = Atari()
env_params.level = SingleLevelSelection(atari_deterministic_v4)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
Ejemplo n.º 2
0
agent_params.algorithm.beta_entropy = 0.05
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048)

agent_params.network_wrappers["main"].learning_rate = 0.0003
agent_params.network_wrappers["main"].input_embedders_parameters[
    "observation"
].activation_function = "tanh"
agent_params.network_wrappers["main"].input_embedders_parameters["observation"].scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.activation_function = "tanh"
agent_params.network_wrappers["main"].batch_size = 64
agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5
agent_params.network_wrappers["main"].clip_gradients = 40.0

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

###############
# Environment #
###############
env_params = GymVectorEnvironment(level="autoscalesim:SimpleScalableWebserviceSim")

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 400
Ejemplo n.º 3
0
#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 10000000
agent_params.algorithm.beta_entropy = 0.0001
agent_params.network_wrappers['main'].learning_rate = 0.00001

agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_reward_filter('rescale',
                                            RewardRescaleFilter(1 / 20.))
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

agent_params.exploration = ContinuousEntropyParameters()

###############
# Environment #
###############
env_params = Mujoco()
env_params.level = SingleLevelSelection(mujoco_v2)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [
    SelectedPhaseOnlyDumpMethod(RunPhase.TEST),
    MaxDumpMethod()
]
vis_params.dump_mp4 = False

########
agent_params = ActorCriticAgentParameters()

agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.gae_lambda = 0.96
agent_params.algorithm.beta_entropy = 0

agent_params.network_wrappers['main'].clip_gradients = 10.0
agent_params.network_wrappers['main'].learning_rate = 0.00001
# agent_params.network_wrappers['main'].batch_size = 20
agent_params.network_wrappers['main'].input_embedders_parameters = {
    "screen": InputEmbedderParameters(input_rescaling={'image': 3.0})
}

agent_params.exploration = AdditiveNoiseParameters()
agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0.05)
# agent_params.exploration.noise_percentage_schedule = LinearSchedule(0.4, 0.05, 100000)
agent_params.exploration.evaluation_noise_percentage = 0.05

agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

###############
# Environment #
###############

env_params = StarCraft2EnvironmentParameters(level='CollectMineralShards')
env_params.feature_screen_maps_to_use = [5]
env_params.feature_minimap_maps_to_use = [5]
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.05
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048)

agent_params.network_wrappers['main'].learning_rate = 0.0003
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'tanh'
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense(64)]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(64)]
agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'tanh'
agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].clip_gradients = 40.

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='autoscalesim:SimpleScalableWebserviceSim')

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 400