def process_batch(engine, batch): optimizer.zero_grad() loss_v = model.calc_loss_dqn( batch, net, tgt_net.target_model, preproc, gamma=PARAMS.gamma, device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine.state.iteration) if engine.state.iteration % PARAMS.target_net_sync == 0: tgt_net.sync() return { "loss": loss_v.item(), "epsilon": action_selector.epsilon, }
def process_batch(engine, batch): optimizer.zero_grad() loss_t = model.calc_loss_dqn(batch, prep, tgt_prep.target_model, net, tgt_net.target_model, GAMMA, device=device) loss_t.backward() optimizer.step() eps = 1 - engine.state.iteration / params.epsilon_steps agent.epsilon = max(params.epsilon_final, eps) if engine.state.iteration % params.sync_nets == 0: tgt_net.sync() tgt_prep.sync() return { "loss": loss_t.item(), "epsilon": agent.epsilon, }
def process_batch(engine, batch): res = {} optimizer.zero_grad() loss_v = model.calc_loss_dqn(batch, net, tgt_net.target_model, preproc, gamma=PARAMS.gamma, device=device) loss_v.backward() optimizer.step() if epsilon_tracker is not None: epsilon_tracker.frame(engine.state.iteration) res['epsilon'] = action_selector.epsilon if engine.state.iteration % PARAMS.target_net_sync == 0: tgt_net.sync() res['loss'] = loss_v.item() return res
def process_batches(engine, batches): res = {} for name, batch, opt, net, tgt_net in zip( ["deer", "tiger"], batches, [deer_optimizer, tiger_optimizer], [net_deer, net_tiger], [tgt_net_deer, tgt_net_tiger]): opt.zero_grad() loss_v = model.calc_loss_dqn(batch, net, tgt_net.target_model, preproc, gamma=PARAMS.gamma, device=device) loss_v.backward() opt.step() res[name + "_loss"] = loss_v.item() if engine.state.iteration % PARAMS.target_net_sync == 0: tgt_net.sync() epsilon_tracker.frame(engine.state.iteration) res['epsilon'] = action_selector.epsilon return res