def value_objective_grad_func(value_net_flat_params): set_flat_params(value_net, DOUBLE(value_net_flat_params)) for param in value_net.parameters(): if param.grad is not None: param.grad.data.fill_(0) values_pred = value_net(states) value_loss = nn.MSELoss()(values_pred, returns) # weight decay for param in value_net.parameters(): value_loss += param.pow(2).sum() * l2_reg value_loss.backward() # to get the grad objective_value_loss_grad = get_flat_grad_params( value_net).detach().cpu().numpy() return objective_value_loss_grad
def value_objective_grad_func(value_net_flat_params): """ objective function for scipy optimizing """ set_flat_params(value_net, FLOAT(value_net_flat_params)) for param in value_net.parameters(): if param.grad is not None: param.grad.data.fill_(0) values_pred = value_net(states) value_loss = nn.MSELoss()(values_pred, returns) # weight decay for param in value_net.parameters(): value_loss += param.pow(2).sum() * l2_reg value_loss.backward() # to get the grad objective_value_loss_grad = get_flat_grad_params( value_net).detach().cpu().numpy().astype(np.float64) return objective_value_loss_grad