コード例 #1
0
class CartPole:
    def __init__(self, gravity):
        self.dim = 5
        self.env = CartPoleEnv()
        self.env.gravity = gravity

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def action(self, observation, x):
        x = x * 10 - 5
        w = x[:4]
        b = x[4]
        return int(self.sigmoid(np.sum(observation * w) + b) > 0.5)

    def fitness(self, x):
        fitness = 0
        observation = self.env.reset()
        for t in range(200):
            action = self.action(observation, x)
            observation, reward, done, info = self.env.step(action)
            fitness += reward
            if done:
                break
        return -fitness

    def __del__(self):
        self.env.close()
コード例 #2
0
ファイル: main.py プロジェクト: victor8733/imitation-learning
                    ppo_update(agent, policy_trajectories, agent_optimiser,
                               args.ppo_clip, epoch, args.value_loss_coeff,
                               args.entropy_loss_coeff)

    # Evaluate agent and plot metrics
    if step % args.evaluation_interval == 0:
        metrics['test_steps'].append(step)
        metrics['test_returns'].append(
            evaluate_agent(agent, args.evaluation_episodes, seed=args.seed))
        lineplot(metrics['test_steps'], metrics['test_returns'],
                 'test_returns')
        if args.imitation != 'BC':
            lineplot(metrics['train_steps'], metrics['train_returns'],
                     'train_returns')

if args.save_trajectories:
    # Store trajectories from agent after training
    _, trajectories = evaluate_agent(agent,
                                     args.evaluation_episodes,
                                     return_trajectories=True,
                                     seed=args.seed)
    torch.save(trajectories, os.path.join('results', 'trajectories.pth'))

# Save agent and metrics
torch.save(agent.state_dict(), os.path.join('results', 'agent.pth'))
if args.imitation in ['AIRL', 'GAIL']:
    torch.save(discriminator.state_dict(),
               os.path.join('results', 'discriminator.pth'))
torch.save(metrics, os.path.join('results', 'metrics.pth'))
env.close()