def update_vf(vf, optim_vf, batch, clip_param, clip, max_grad_norm): """ Update function for V function. Parameters ---------- vf : SVfunction V function. optim_vf : torch.optim.Optimizer Optimizer for V function. batch : dict Batch of trajectory clip_param : float Clipping ratio of objective function. clip: bool If True, vfunc is also updated by clipped objective function. max_grad_norm : float Maximum gradient norm. Returns ------- vf_loss : ndarray Value of loss function. """ vf_loss = lf.monte_carlo(vf, batch, clip_param, clip) optim_vf.zero_grad() vf_loss.backward() torch.nn.utils.clip_grad_norm_(vf.parameters(), max_grad_norm) optim_vf.step() return vf_loss.detach().cpu().numpy()
def update_vf(vf, optim_vf, batch): vf_loss = lf.monte_carlo(vf, batch) optim_vf.zero_grad() vf_loss.backward() optim_vf.step() return vf_loss.detach().cpu().numpy()