agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.input_filter = MujocoInputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 100.)) agent_params.algorithm.num_steps_between_gradient_updates = 30 agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.gae_lambda = 1.0 agent_params.algorithm.beta_entropy = 0.01 agent_params.network_wrappers['main'].clip_gradients = 40. agent_params.exploration = CategoricalParameters() ############### # Environment # ############### env_params = DoomEnvironmentParameters() env_params.level = 'basic' vis_params = VisualizationParameters() vis_params.video_dump_methods = [ SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod() ] vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 20
######### agent_params = MixedMonteCarloAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000) agent_params.exploration.evaluation_epsilon = 0 agent_params.memory.max_size = (MemoryGranularity.Episodes, 200) agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 1000) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False ############### # Environment # ############### env_params = DoomEnvironmentParameters(level='HEALTH_GATHERING') ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test_using_a_trace_test = False # disabling this test for now, as it takes too long to converge # preset_validation_params.test = True # preset_validation_params.min_reward_threshold = 1000 # preset_validation_params.max_episodes_to_achieve_reward = 300 graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params,
# the original DFP code decays epsilon in ~1.5M steps. Only that unlike other most other papers, these are 1.5M # training steps. i.e. it is equivalent to once every 8 playing steps (when a training batch is sampled). # so this is 1.5M*8 =~ 12M playing steps per worker. # TODO allow the epsilon schedule to be defined in terms of training steps. agent_params.exploration.epsilon_schedule = LinearSchedule(1, 0, 12000000) agent_params.exploration.evaluation_epsilon = 0 agent_params.algorithm.use_accumulated_reward_as_measurement = False agent_params.algorithm.goal_vector = [0.5, 0.5, 1] # ammo, health, frag count agent_params.network_wrappers['main'].input_embedders_parameters[ 'measurements'].input_rescaling['vector'] = 100. agent_params.algorithm.scale_measurements_targets['GameVariable.HEALTH'] = 30.0 agent_params.algorithm.scale_measurements_targets['GameVariable.AMMO2'] = 7.5 agent_params.algorithm.scale_measurements_targets['GameVariable.USER2'] = 1.0 agent_params.network_wrappers['main'].learning_rate_decay_rate = 0.3 agent_params.network_wrappers['main'].learning_rate_decay_steps = 250000 agent_params.network_wrappers['main'].input_embedders_parameters[ 'measurements'].input_offset['vector'] = 0.5 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].input_offset['vector'] = 0.5 ############### # Environment # ############### env_params = DoomEnvironmentParameters(level='BATTLE_COACH_LOCAL') env_params.cameras = [DoomEnvironment.CameraTypes.OBSERVATION] graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters())
agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = EmbedderScheme.Medium agent_params.network_wrappers['main'].input_embedders_parameters[ 'measurements'].scheme = EmbedderScheme.Medium agent_params.network_wrappers['main'].input_embedders_parameters[ 'goal'].scheme = EmbedderScheme.Medium agent_params.network_wrappers[ 'main'].middleware_parameters.scheme = MiddlewareScheme.Medium # scale the target measurements according to the paper (dividing by standard deviation) agent_params.algorithm.scale_measurements_targets['GameVariable.HEALTH'] = 30.0 ############### # Environment # ############### env_params = DoomEnvironmentParameters() env_params.level = 'HEALTH_GATHERING_SUPREME_COACH_LOCAL' vis_params = VisualizationParameters() vis_params.video_dump_methods = [ SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod() ] vis_params.dump_mp4 = False graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, )