def train(nb_steps: int, env: Env, agent: Agent, start_obs: Arrayable): """Trains for one epoch. :args nb_steps: number of interaction steps :args env: environment :args agent: interacting agent :start_obs: starting observation :return: final observation """ agent.train() agent.reset() obs = start_obs for _ in range(nb_steps): # interact obs, _, _ = interact(env, agent, obs) return obs
if done: state_ = (env.reset()) state = np.clip( (state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8), -5, 5) score_lst.append(score) if writer != None: writer.add_scalar("score/real", score, n_epi) writer.add_scalar("score/discriminator", discriminator_score, n_epi) score = 0 discriminator_score = 0 else: state = next_state state_ = next_state_ agent.train(discriminator, discriminator_args.batch_size, state_rms, n_epi) state_rms.update(np.vstack(state_lst)) state_lst = [] if n_epi % args.print_interval == 0 and n_epi != 0: print("# of episode :{}, avg score : {:.1f}".format( n_epi, sum(score_lst) / len(score_lst))) score_lst = [] if (n_epi % args.save_interval == 0) & (n_epi != 0): torch.save(agent.state_dict(), './model_weights/model_' + str(n_epi)) else: #off-policy for n_epi in range(args.epochs): score = 0.0 discriminator_score = 0.0 state = env.reset()
from models.leaky_relu_model import LeakyReluModel from agents.agent import Agent model = LeakyReluModel() agent = Agent(model) print(model.model.summary()) agent.train() model.save() for i in range(0, 100): print(i, model.predict_num(i))