Exemple #1
0
def update_vf(vf, optim_vf, batch, clip_param, clip, max_grad_norm):
    """
    Update function for V function.

    Parameters
    ----------
    vf : SVfunction
        V function.
    optim_vf : torch.optim.Optimizer
        Optimizer for V function.
    batch : dict
        Batch of trajectory
    clip_param : float
        Clipping ratio of objective function.
    clip: bool
        If True, vfunc is also updated by clipped objective function.
    max_grad_norm : float
        Maximum gradient norm.

    Returns
    -------
    vf_loss : ndarray
        Value of loss function.
    """
    vf_loss = lf.monte_carlo(vf, batch, clip_param, clip)
    optim_vf.zero_grad()
    vf_loss.backward()
    torch.nn.utils.clip_grad_norm_(vf.parameters(), max_grad_norm)
    optim_vf.step()
    return vf_loss.detach().cpu().numpy()
Exemple #2
0
def update_vf(vf, optim_vf, batch):
    vf_loss = lf.monte_carlo(vf, batch)
    optim_vf.zero_grad()
    vf_loss.backward()
    optim_vf.step()
    return vf_loss.detach().cpu().numpy()