Esempio n. 1
0
def train(nb_steps: int, env: Env, agent: Agent, start_obs: Arrayable):
    """Trains for one epoch.

    :args nb_steps: number of interaction steps
    :args env: environment
    :args agent: interacting agent
    :start_obs: starting observation

    :return: final observation
    """
    agent.train()
    agent.reset()
    obs = start_obs
    for _ in range(nb_steps):
        # interact
        obs, _, _ = interact(env, agent, obs)
    return obs
Esempio n. 2
0
            if done:
                state_ = (env.reset())
                state = np.clip(
                    (state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8),
                    -5, 5)
                score_lst.append(score)
                if writer != None:
                    writer.add_scalar("score/real", score, n_epi)
                    writer.add_scalar("score/discriminator",
                                      discriminator_score, n_epi)
                score = 0
                discriminator_score = 0
            else:
                state = next_state
                state_ = next_state_
        agent.train(discriminator, discriminator_args.batch_size, state_rms,
                    n_epi)
        state_rms.update(np.vstack(state_lst))
        state_lst = []
        if n_epi % args.print_interval == 0 and n_epi != 0:
            print("# of episode :{}, avg score : {:.1f}".format(
                n_epi,
                sum(score_lst) / len(score_lst)))
            score_lst = []
        if (n_epi % args.save_interval == 0) & (n_epi != 0):
            torch.save(agent.state_dict(),
                       './model_weights/model_' + str(n_epi))
else:  #off-policy
    for n_epi in range(args.epochs):
        score = 0.0
        discriminator_score = 0.0
        state = env.reset()
Esempio n. 3
0
from models.leaky_relu_model import LeakyReluModel
from agents.agent import Agent

model = LeakyReluModel()
agent = Agent(model)
print(model.model.summary())
agent.train()
model.save()
for i in range(0, 100):
    print(i, model.predict_num(i))