cycles = 100 # 20 for reach. for others it's 100 #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(cycles * 200) # 200 epochs schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(cycles) # 50 cycles schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(0) ################ # Agent Params # ################ agent_params = DDPGAgentParameters() # actor actor_network = agent_params.network_wrappers['actor'] actor_network.learning_rate = 0.001 actor_network.batch_size = 256 actor_network.optimizer_epsilon = 1e-08 actor_network.adam_optimizer_beta1 = 0.9 actor_network.adam_optimizer_beta2 = 0.999 actor_network.input_embedders_parameters = { 'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)]) actor_network.heads_parameters[0].batchnorm = False
from rl_coach.graph_managers.graph_manager import ScheduleParameters #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'] = \ agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \ agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'].scheme = [Dense([300])] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([200])] agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'].scheme = [Dense([400])] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([300])] agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty agent_params.input_filter = MujocoInputFilter() agent_params.input_filter.add_reward_filter("rescale", RewardRescaleFilter(1/10.)) ############### # Environment # ###############
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = DDPGAgentParameters() agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4) # front camera agent_params.network_wrappers['actor'].input_embedders_parameters['forward_camera'] = \ agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['critic'].input_embedders_parameters['forward_camera'] = \ agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation') # left camera agent_params.network_wrappers['actor'].input_embedders_parameters['left_camera'] = \ copy.deepcopy(agent_params.network_wrappers['actor'].input_embedders_parameters['forward_camera']) agent_params.network_wrappers['critic'].input_embedders_parameters['left_camera'] = \ copy.deepcopy(agent_params.network_wrappers['critic'].input_embedders_parameters['forward_camera']) # right camera
from rl_coach.graph_managers.graph_manager import ScheduleParameters #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(2000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(10000) ######### # Agent # ######### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(400)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(400)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ######## preset_validation_params = PresetValidationParameters()
from rl_coach.base_parameters import EmbedderScheme from rl_coach.architectures.tensorflow_components.architecture import Dense #################### # Block Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(400) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(1000) schedule_params.evaluation_steps = EnvironmentEpisodes(0) schedule_params.heatup_steps = EnvironmentSteps(2) ##################### # DDPG Agent Params # ##################### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters[ 'observation'].scheme = [Dense([300])] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [ Dense([300]) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'observation'].scheme = [Dense([300])] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense([300]) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['actor'].heads_parameters[ 0].activation_function = 'sigmoid' #agent_params.network_wrappers['critic'].clip_gradients = 100
steps_per_episode = 13 #################### # Block Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(400) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(3) #3) # Neta: (1000) schedule_params.evaluation_steps = EnvironmentEpisodes(1) #1) # Neta: 0 schedule_params.heatup_steps = EnvironmentEpisodes(100) #120*steps_per_episode) # Neta (2) ##################### # DDPG Agent Params # ##################### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(300)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(300)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['actor'].heads_parameters[0].activation_function = 'sigmoid' # agent_params.network_wrappers['critic'].clip_gradients = 100 # agent_params.network_wrappers['actor'].clip_gradients = 100 agent_params.algorithm.rate_for_copying_weights_to_target = 0.01 # Tau pg. 11 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1) agent_params.algorithm.heatup_using_network_decisions = True agent_params.algorithm.discount = 1 # Replay buffer size agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
from rl_coach.filters.filter import InputFilter # !!!! Enable when using branch "distiller-AMC-induced-changes" from rl_coach.filters.reward import RewardEwmaNormalizationFilter import numpy as np #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(0) schedule_params.evaluation_steps = EnvironmentEpisodes(0) ##################### # DDPG Agent Params # ##################### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters[ 'observation'].scheme = [Dense(300)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [ Dense(300) ] agent_params.network_wrappers['actor'].heads_parameters[ 0].activation_function = 'sigmoid' agent_params.network_wrappers['critic'].input_embedders_parameters[ 'observation'].scheme = [Dense(300)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense(300) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'action'].scheme = [Dense(300)]