def process_batch(engine, batch):
     optimizer.zero_grad()
     loss_v = model.calc_loss_dqn(
         batch, net, tgt_net.target_model, preproc,
         gamma=PARAMS.gamma, device=device)
     loss_v.backward()
     optimizer.step()
     epsilon_tracker.frame(engine.state.iteration)
     if engine.state.iteration % PARAMS.target_net_sync == 0:
         tgt_net.sync()
     return {
         "loss": loss_v.item(),
         "epsilon": action_selector.epsilon,
     }
 def process_batch(engine, batch):
     optimizer.zero_grad()
     loss_t = model.calc_loss_dqn(batch, prep, tgt_prep.target_model,
                                  net, tgt_net.target_model, GAMMA, device=device)
     loss_t.backward()
     optimizer.step()
     eps = 1 - engine.state.iteration / params.epsilon_steps
     agent.epsilon = max(params.epsilon_final, eps)
     if engine.state.iteration % params.sync_nets == 0:
         tgt_net.sync()
         tgt_prep.sync()
     return {
         "loss": loss_t.item(),
         "epsilon": agent.epsilon,
     }
 def process_batch(engine, batch):
     res = {}
     optimizer.zero_grad()
     loss_v = model.calc_loss_dqn(batch,
                                  net,
                                  tgt_net.target_model,
                                  preproc,
                                  gamma=PARAMS.gamma,
                                  device=device)
     loss_v.backward()
     optimizer.step()
     if epsilon_tracker is not None:
         epsilon_tracker.frame(engine.state.iteration)
         res['epsilon'] = action_selector.epsilon
     if engine.state.iteration % PARAMS.target_net_sync == 0:
         tgt_net.sync()
     res['loss'] = loss_v.item()
     return res
コード例 #4
0
    def process_batches(engine, batches):
        res = {}
        for name, batch, opt, net, tgt_net in zip(
            ["deer", "tiger"], batches, [deer_optimizer, tiger_optimizer],
            [net_deer, net_tiger], [tgt_net_deer, tgt_net_tiger]):
            opt.zero_grad()
            loss_v = model.calc_loss_dqn(batch,
                                         net,
                                         tgt_net.target_model,
                                         preproc,
                                         gamma=PARAMS.gamma,
                                         device=device)
            loss_v.backward()
            opt.step()
            res[name + "_loss"] = loss_v.item()
            if engine.state.iteration % PARAMS.target_net_sync == 0:
                tgt_net.sync()

        epsilon_tracker.frame(engine.state.iteration)
        res['epsilon'] = action_selector.epsilon
        return res