Beispiel #1
0
def main():

    device = torch.device("cpu")
    env = UnityEnvironment(file_name='reacher20/reacher', base_port=64739)
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]
    action_size = brain.vector_action_space_size
    num_agents = len(env_info.agents)
    states = env_info.vector_observations
    state_size = states.shape[1]

    agent = PPOAgent(state_size=state_size,
                     action_size=action_size,
                     hidden_size=256,
                     num_agents=num_agents,
                     random_seed=0,
                     ppo_epochs=4,
                     mini_batch_size=128,
                     normalize_advantages=True,
                     learning_rate=3e-4,
                     clip_gradients=True,
                     gamma=0.99,
                     tau=0.95,
                     device=device)
    agent.load_model('assets/ppo_checkpoint_37.10.pth')
    test_agent(env, brain_name, agent, device, real_time=True)
Beispiel #2
0
def evaluate(args):
    env = gym.make(args.env)
    env_params = get_env_params(env, args)
    env.close()

    agent = PPOAgent(args, env_params)
    agent.load_model(load_model_remark=args.load_model_remark)

    parent_conn, child_conn = Pipe()
    worker = AtariEnvironment(args.env,
                              1,
                              child_conn,
                              is_render=True,
                              max_episode_step=args.max_episode_step)
    worker.start()

    for i_episode in range(100):
        obs = worker.reset()
        while True:
            obs = np.expand_dims(obs, axis=0)
            action = agent.choose_action(obs / 255)

            parent_conn.send(action[0])
            obs_, r, done, info = parent_conn.recv()

            obs = obs_

            if done:
                break
Beispiel #3
0
def main(args):

    model_store_sprefix = "snapshot"

    # NormalizedEnv
    env = gym.make(args.env)

    env.seed(args.seed)
    torch.manual_seed(args.seed)

    env, generator, model, cont = get_functions(env, args)

    optimizer = optim.Adam(model.parameters(), lr=args.rllr)

    memory = Memory(args)

    agent = PPOAgent(args, model, optimizer, env, generator, memory, cont)
    if args.resume:
        agent.load_model(model_store_sprefix)

    agent.train(model_store_sprefix, args.save_interval)
Beispiel #4
0
    env = gym.make(args.env)
    envs = SubprocVecEnv([make_env(args.env) for i in range(args.n_envs)])

    n_inputs = envs.observation_space.shape[0]
    n_outs = envs.action_space.n

    agent = PPOAgent(lr=args.lr,
                     n_inputs=n_inputs,
                     n_hidden=args.n_hidden,
                     n_outs=n_outs,
                     td_n=args.td_n,
                     ppo_epochs=args.ppo_epochs,
                     mini_batch_size=args.mini_batch_size)
    if args.load_best_pretrained_model:
        agent.load_model('../models/ppo/model.pt')
        print('Loaded pretrained model')

    if args.test_env:
        state = env.reset()
        done = False
        score = 0
        while not done:
            env.render()
            dist, value = agent.step(state)

            action = dist.sample()
            state, reward, done, _ = env.step(action.cpu().numpy())
            score += reward
        print(score)
    else: