def main(game, _seed, _run): torch.manual_seed(_seed) game = lower_under_to_upper(game) + 'NoFrameskip-v4' env = gym.make(game) env = wrap_deepmind(env) input_space = env.observation_space num_actions = env.action_space.n agent_params = DeepQAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') add_epsilon_params(params=agent_params) agent_params.obs_filter = AtariObservationFilter() input_space = agent_params.obs_filter.output_space(input_space) agent_params.sacred_run = _run agent_params.env = env agent_params.mode = 'train' online_q_net = build_net(input_shape=input_space.shape, num_actions=num_actions) target_q_net = build_net(input_shape=input_space.shape, num_actions=num_actions) agent_params.online_q_net = online_q_net agent_params.target_q_net = target_q_net agent = agent_params.make_agent() agent.run()
def main(_seed, _run, env): torch.manual_seed(_seed) train_env, test_env = build_env(**env['train']), build_env(**env['test']) input_shape = train_env.observation_space.shape num_actions = test_env.action_space.n agent_params = DeepQAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') add_epsilon_params(params=agent_params) agent_params.sacred_run = _run agent_params.train_env = train_env agent_params.test_envs.append(test_env) online_q_net = build_net(input_shape=input_shape, num_actions=num_actions) target_q_net = build_net(input_shape=input_shape, num_actions=num_actions) agent_params.online_q_net = online_q_net agent_params.target_q_net = target_q_net agent_params.obs_filter = batch_fn agent = agent_params.make_agent() agent.run()
def main(_seed, _run, env): torch.manual_seed(_seed) train_envs = build_envs(**env['train']) train_env = SampleEnv(train_envs) test_envs = build_envs(**env['test']) input_shape = train_env.observation_space.shape num_actions = train_env.action_space.n agent_params = ActorCriticAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') agent_params.sacred_run = _run agent_params.train_env = train_env agent_params.test_envs = test_envs policy_value_net = build_net(input_shape=input_shape, num_actions=num_actions) agent_params.policy_value_net = policy_value_net agent = agent_params.make_agent() agent.run()
def main(_seed, _run): torch.manual_seed(_seed) env = build_env() input_shape = env.observation_space.shape num_actions = env.action_space.n agent_params = PolicyGradientsAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') agent_params.sacred_run = _run agent_params.env = env agent_params.mode = 'train' policy_net = build_net(input_shape=input_shape, num_actions=num_actions) agent_params.policy_net = policy_net agent = agent_params.make_agent() agent.run()
def main(_seed, _run): torch.manual_seed(_seed) env = gym.make('CartPole-v0') input_shape = env.observation_space.shape num_actions = env.action_space.n agent_params = PolicyGradientsAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') agent_params.sacred_run = _run agent_params.env = env agent_params.mode = 'train' agent_params.reward_filter = RewardRescaleFilter(200.) policy_net = build_net(input_shape=input_shape, num_actions=num_actions) agent_params.policy_net = policy_net agent = agent_params.make_agent() agent.run()
def main(_seed, _run, env): torch.manual_seed(_seed) train_envs, (kg_entities, _, num_node_feats, num_edge_feats) = build_envs(**env['train']) num_entities = len(kg_entities) train_env = SampleEnv(train_envs) test_envs, _ = build_envs(**env['test']) input_shape = train_env.observation_space.shape num_actions = train_env.action_space.n agent_params = DeepQAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') add_epsilon_params(params=agent_params) add_stopping_params(params=agent_params) agent_params.sacred_run = _run agent_params.train_env = train_env agent_params.test_envs = test_envs agent_params.obs_filter = GraphEnv.batch_observations online_q_net = build_net(input_shape=input_shape, num_actions=num_actions, num_entities=num_entities, num_node_feats=num_node_feats, num_edge_feats=num_edge_feats) target_q_net = build_net(input_shape=input_shape, num_actions=num_actions, num_entities=num_entities, num_node_feats=num_node_feats, num_edge_feats=num_edge_feats) agent_params.online_q_net = online_q_net agent_params.target_q_net = target_q_net agent = agent_params.make_agent() agent.run()
def main(_seed, _run): torch.manual_seed(_seed) env = build_env() input_shape = env.observation_space.shape num_actions = env.action_space.n agent_params = DeepQAgentParams() add_params(params=agent_params, prefix='agent') add_params(params=agent_params.optimizer_params, prefix='opt') add_epsilon_params(params=agent_params) agent_params.sacred_run = _run agent_params.env = env agent_params.mode = 'train' online_q_net = build_net(input_shape=input_shape, num_actions=num_actions) target_q_net = build_net(input_shape=input_shape, num_actions=num_actions) agent_params.online_q_net = online_q_net agent_params.target_q_net = target_q_net agent = agent_params.make_agent() agent.run()