Ejemplo n.º 1
0
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].input_offset['vector'] = 0.5

# changing the network scheme to match Coach's default network, as it performs better on this preset
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Medium
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].scheme = EmbedderScheme.Medium
agent_params.network_wrappers['main'].input_embedders_parameters['goal'].scheme = EmbedderScheme.Medium
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Medium

# scale the target measurements according to the paper (dividing by standard deviation)
agent_params.algorithm.scale_measurements_targets['GameVariable.HEALTH'] = 30.0

###############
# Environment #
###############
env_params = DoomEnvironmentParameters()
env_params.level = 'HEALTH_GATHERING'
vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
# reward threshold was set to 1000 since otherwise the test takes about an hour
preset_validation_params.min_reward_threshold = 1000
preset_validation_params.max_episodes_to_achieve_reward = 70
preset_validation_params.test_using_a_trace_test = False

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
Ejemplo n.º 2
0
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = EmbedderScheme.Medium
agent_params.network_wrappers['main'].input_embedders_parameters[
    'measurements'].scheme = EmbedderScheme.Medium
agent_params.network_wrappers['main'].input_embedders_parameters[
    'goal'].scheme = EmbedderScheme.Medium
agent_params.network_wrappers[
    'main'].middleware_parameters.scheme = MiddlewareScheme.Medium

# scale the target measurements according to the paper (dividing by standard deviation)
agent_params.algorithm.scale_measurements_targets['GameVariable.HEALTH'] = 30.0

###############
# Environment #
###############
env_params = DoomEnvironmentParameters()
env_params.level = 'HEALTH_GATHERING_SUPREME_COACH_LOCAL'
vis_params = VisualizationParameters()
vis_params.video_dump_methods = [
    SelectedPhaseOnlyDumpMethod(RunPhase.TEST),
    MaxDumpMethod()
]
vis_params.dump_mp4 = False

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
    schedule_params=schedule_params,
    vis_params=vis_params,
)