Ejemplo n.º 1
0
 def process_batch(engine, batch):
     optimizer.zero_grad()
     loss_v = common.calc_loss_dqn(
         batch, net, tgt_net.target_model, gamma=params.gamma, device=device
     )
     loss_v.backward()
     optimizer.step()
     if engine.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     return {
         "loss": loss_v.item(),
         "epsilon": batch_generator.epsilon,
     }
Ejemplo n.º 2
0
 def process_batch(engine_, batch):
     optimizer.zero_grad()
     loss_v = common.calc_loss_dqn(
         batch, net, tgt_net.target_model, gamma=params.gamma ** args.n, device=device
     )
     loss_v.backward()
     optimizer.step()
     epsilon_tracker.frame(engine_.state.iteration)
     if engine_.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     return {
         "loss": loss_v.item(),
         "epsilon": selector.epsilon,
     }
Ejemplo n.º 3
0
 def process_batch(engine, batch):
     optimizer.zero_grad()
     loss_v = common.calc_loss_dqn(batch,
                                   net,
                                   tgt_net.target_model,
                                   gamma=params.gamma,
                                   device=device)
     loss_v.backward()
     optimizer.step()
     if engine.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     if engine.state.iteration % NOISY_SNR_EVERY_ITERS == 0:
         for layer_idx, sigma_l2 in enumerate(net.noisy_layers_sigma_snr()):
             engine.state.metrics[f"snr_{layer_idx+1}"] = sigma_l2
     return {
         "loss": loss_v.item(),
     }
Ejemplo n.º 4
0
 def process_batch(engine, batch):
     optimizer.zero_grad()
     loss_v = common.calc_loss_dqn(
         batch, net, tgt_net.target_model, gamma=params.gamma, device=device
     )
     loss_v.backward()
     optimizer.step()
     epsilon_tracker.frame(engine.state.iteration)
     if engine.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     if engine.state.iteration % EVAL_EVERY_FRAME == 0:
         eval_states = getattr(engine.state, "eval_states", None)
         if eval_states is None:
             eval_states = buffer.sample(STATES_TO_EVALUATE)
             eval_states = [np.array(transition.state, copy=False) for transition in eval_states]
             eval_states = np.array(eval_states, copy=False)
             engine.state.eval_states = eval_states
         evaluate_states(eval_states, net, device, engine)
     return {
         "loss": loss_v.item(),
         "epsilon": selector.epsilon,
     }