from rl_coach.architectures.tensorflow_components.layers import Dense #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(50000) ######### # Agent # ######### agent_params = NECAgentParameters(scheme=MiddlewareScheme.Shallow) agent_params.network_wrappers['main'].learning_rate = 0.00001 agent_params.input_filter = AtariInputFilter() agent_params.input_filter.remove_reward_filter('clipping') ############### # Environment # ############### env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4)) env_params.random_initialization_steps = 1 ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test_using_a_trace_test = False
#################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(50000) ######### # Agent # ######### agent_params = NECAgentParameters(scheme=MiddlewareScheme.RP, dense_layer=RPDense) # If the rp_to_fc_step is inf, use only RP layers. Also if it's 0 or None, use FC layers only (same as NEC). agent_params.rp_to_fc_step = 100000000 agent_params.network_wrappers['main'].learning_rate = 0.00001 agent_params.input_filter = AtariInputFilter() agent_params.input_filter.remove_reward_filter('clipping') ############### # Environment # ############### env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4)) env_params.random_initialization_steps = 1 ######## # Test # ########
from rl_coach.agents.nec_agent import NECAgentParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(3) schedule_params.heatup_steps = EnvironmentSteps(2000) ######### # Agent # ######### agent_params = NECAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.00001 agent_params.input_filter = AtariInputFilter() agent_params.input_filter.remove_reward_filter('clipping') ############### # Environment # ############### env_params = Atari() env_params.level = SingleLevelSelection(atari_deterministic_v4) env_params.random_initialization_steps = 1 vis_params = VisualizationParameters() vis_params.video_dump_methods = [ SelectedPhaseOnlyDumpMethod(RunPhase.TEST),
#################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1300) ######### # Agent # ######### agent_params = NECAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0.1, 1000) agent_params.exploration.evaluation_epsilon = 0 agent_params.algorithm.discount = 0.99 agent_params.memory.max_size = (MemoryGranularity.Episodes, 200) agent_params.input_filter = MujocoInputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 200.)) ############### # Environment # ############### env_params = Atari() env_params.level = 'CartPole-v0'