Esempio n. 1
0
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense([300])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['actor'].heads_parameters[
    0].activation_function = 'sigmoid'
#agent_params.network_wrappers['critic'].clip_gradients = 100
#agent_params.network_wrappers['actor'].clip_gradients = 100

agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    1)
agent_params.algorithm.discount = 1
agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
agent_params.exploration = TruncatedNormalParameters(
)  # AdditiveNoiseParameters()
steps_per_episode = 13
agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
    (ConstantSchedule(0.5), EnvironmentSteps(100 * steps_per_episode)),
    (ExponentialSchedule(0.5, 0,
                         0.996), EnvironmentSteps(300 * steps_per_episode))
])
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
agent_params.input_filter = MujocoInputFilter()
agent_params.output_filter = MujocoOutputFilter()
agent_params.network_wrappers['actor'].learning_rate = 0.0001
agent_params.network_wrappers['critic'].learning_rate = 0.001

##############################
#      Gym                   #
##############################
Esempio n. 2
0
agent_params.algorithm.clip_critic_targets = [-50, 0]

# HER parameters
agent_params.memory = EpisodicHindsightExperienceReplayParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
agent_params.memory.hindsight_transitions_per_regular_transition = 4
agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal',
                                             reward_type=ReachingGoal(distance_from_goal_threshold=0.05,
                                                                      goal_reaching_reward=0,
                                                                      default_reward=-1),
                                             distance_metric=GoalsSpace.DistanceMetric.Euclidean)
agent_params.memory.shared_memory = True

# exploration parameters
agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3)
agent_params.exploration.evaluation_epsilon = 0
# they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range
agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1)
agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200))

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                       ObservationNormalizationFilter(name='normalize_observation'))
agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal',
                                                       ObservationNormalizationFilter(name='normalize_achieved_goal'))
agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
Esempio n. 3
0
agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(300)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(300)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['actor'].heads_parameters[0].activation_function = 'sigmoid'
# agent_params.network_wrappers['critic'].clip_gradients = 100
# agent_params.network_wrappers['actor'].clip_gradients = 100

agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1)
agent_params.algorithm.heatup_using_network_decisions = True
agent_params.algorithm.discount = 1
# Replay buffer size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
agent_params.exploration = TruncatedNormalParameters()
agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
    (ConstantSchedule(0.5), EnvironmentSteps(100*steps_per_episode)),
    (ExponentialSchedule(0.5, 0, 0.996), EnvironmentSteps(300*steps_per_episode))])
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
agent_params.network_wrappers['actor'].learning_rate = 0.0001
agent_params.network_wrappers['critic'].learning_rate = 0.001

##############################
#      Gym                   #
##############################
env_params = GymVectorEnvironment()
env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'


vis_params = VisualizationParameters()