agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense([300]) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['actor'].heads_parameters[ 0].activation_function = 'sigmoid' #agent_params.network_wrappers['critic'].clip_gradients = 100 #agent_params.network_wrappers['actor'].clip_gradients = 100 agent_params.algorithm.rate_for_copying_weights_to_target = 0.01 # Tau pg. 11 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 1) agent_params.algorithm.discount = 1 agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000) agent_params.exploration = TruncatedNormalParameters( ) # AdditiveNoiseParameters() steps_per_episode = 13 agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(0.5), EnvironmentSteps(100 * steps_per_episode)), (ExponentialSchedule(0.5, 0, 0.996), EnvironmentSteps(300 * steps_per_episode)) ]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) agent_params.input_filter = MujocoInputFilter() agent_params.output_filter = MujocoOutputFilter() agent_params.network_wrappers['actor'].learning_rate = 0.0001 agent_params.network_wrappers['critic'].learning_rate = 0.001 ############################## # Gym # ##############################
agent_params.algorithm.clip_critic_targets = [-50, 0] # HER parameters agent_params.memory = EpisodicHindsightExperienceReplayParameters() agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future agent_params.memory.hindsight_transitions_per_regular_transition = 4 agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal', reward_type=ReachingGoal(distance_from_goal_threshold=0.05, goal_reaching_reward=0, default_reward=-1), distance_metric=GoalsSpace.DistanceMetric.Euclidean) agent_params.memory.shared_memory = True # exploration parameters agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3) agent_params.exploration.evaluation_epsilon = 0 # they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1) agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0 agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200)) agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal', ObservationNormalizationFilter(name='normalize_achieved_goal')) agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(300)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(300)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['actor'].heads_parameters[0].activation_function = 'sigmoid' # agent_params.network_wrappers['critic'].clip_gradients = 100 # agent_params.network_wrappers['actor'].clip_gradients = 100 agent_params.algorithm.rate_for_copying_weights_to_target = 0.01 # Tau pg. 11 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1) agent_params.algorithm.heatup_using_network_decisions = True agent_params.algorithm.discount = 1 # Replay buffer size agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000) agent_params.exploration = TruncatedNormalParameters() agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(0.5), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(0.5, 0, 0.996), EnvironmentSteps(300*steps_per_episode))]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) agent_params.network_wrappers['actor'].learning_rate = 0.0001 agent_params.network_wrappers['critic'].learning_rate = 0.001 ############################## # Gym # ############################## env_params = GymVectorEnvironment() env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment' vis_params = VisualizationParameters()