#################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(16 * 50 * 200) # 200 epochs schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes( 16 * 50) # 50 cycles schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = DQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.001 agent_params.network_wrappers['main'].batch_size = 128 agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense([256]) ] agent_params.network_wrappers['main'].input_embedders_parameters = { 'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } agent_params.algorithm.discount = 0.98 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16) agent_params.algorithm.num_consecutive_training_steps = 40 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( 40) agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters from rl_coach.schedules import LinearSchedule #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(50000000) schedule_params.steps_between_evaluation_periods = EnvironmentSteps(250000) schedule_params.evaluation_steps = EnvironmentSteps(135000) schedule_params.heatup_steps = EnvironmentSteps(50000) ######### # Agent # ######### agent_params = DQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.memory = PrioritizedExperienceReplayParameters() agent_params.memory.beta = LinearSchedule( 0.4, 1, 12500000) # 12.5M training iterations = 50M steps = 200M frames ############### # Environment # ############### env_params = Atari() env_params.level = SingleLevelSelection(atari_deterministic_v4) vis_params = VisualizationParameters() vis_params.video_dump_methods = [ SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()
from rl_coach.agents.ddpg_agent import DDPGAgentParameters from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.graph_managers.graph_manager import SimpleSchedule, SimpleScheduleWithoutEvaluation from rl_coach.core_types import EnvironmentSteps, TrainingSteps from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.environments.first_test import ControlSuiteEnvironmentParameters agent_params = DQNAgentParameters() # rename the input embedder key from 'observation' to 'measurements' # agent_params.network_wrappers['main'].input_embedders_parameters['measurements'] = agent_params.network_wrappers['main'].input_embedders_parameters.pop('observation') schedule_params = SimpleSchedule() schedule_params.heatup_steps = EnvironmentSteps(10) preset_validation_params = PresetValidationParameters() # preset_validation_params.test = True # preset_validation_params.min_reward_threshold = 20 # preset_validation_params.max_episodes_to_achieve_reward = 400 vis_params = VisualizationParameters(render=False) env_params = ControlSuiteEnvironmentParameters() graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params)
from rl_coach.memories.non_episodic import ExperienceReplayParameters from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters, DQNNetworkParameters from rl_coach.agents.rainbow_dqn_agent import RainbowDQNAgentParameters from rl_coach.base_parameters import PresetValidationParameters, VisualizationParameters, AgentParameters from rl_coach.core_types import EnvironmentSteps from rl_coach.environments.second_test import ControlSuiteEnvironmentParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import SimpleSchedule experience_replay_parameters = ExperienceReplayParameters() experience_replay_parameters.max_size = (MemoryGranularity.Transitions, 10000) agent_params = DQNAgentParameters() agent_params.memory = experience_replay_parameters schedule_params = SimpleSchedule() schedule_params.heatup_steps = EnvironmentSteps(10) preset_validation_params = PresetValidationParameters( test=True, min_reward_threshold=-50000, max_episodes_to_achieve_reward=10, num_workers=5) vis_params = VisualizationParameters(render=False) env_params = ControlSuiteEnvironmentParameters() graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params,
from rl_coach.filters.filter import InputFilter from rl_coach.filters.reward.reward_adversarial_inversion_filter import RewardAdversarialInversionFilter #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(30000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = DQNAgentParameters() # DQN params agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 100) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) # NN configuration agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000) # E-Greedy schedule
from rl_coach.filters.observation.observation_adversarial_noise_filter import ObservationAdversarialNoiseFilter #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(30000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = DQNAgentParameters() # DQN params agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(100) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) # NN configuration agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000) # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
from rl_coach.filters.action.action_adversarial_noise import ActionAdversarialNoise #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(30000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = DQNAgentParameters() # DQN params agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(100) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) # NN configuration agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000) # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
Dense(64)] # ER size agent_params.memory = EpisodicExperienceReplayParameters() # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(0, 0, 10000) agent_params.exploration.evaluation_epsilon = 0 # Input filtering agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 200.)) # Experience Generating Agent parameters experience_generating_agent_params = DQNAgentParameters() # schedule parameters experience_generating_schedule_params = ScheduleParameters() experience_generating_schedule_params.heatup_steps = EnvironmentSteps(1000) experience_generating_schedule_params.improve_steps = TrainingSteps( DATASET_SIZE - experience_generating_schedule_params.heatup_steps.num_steps) experience_generating_schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes( 10) experience_generating_schedule_params.evaluation_steps = EnvironmentEpisodes(1) # DQN params experience_generating_agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 100) experience_generating_agent_params.algorithm.discount = 0.99