예제 #1
0
    def value_objective_grad_func(value_net_flat_params):
        set_flat_params(value_net, DOUBLE(value_net_flat_params))
        for param in value_net.parameters():
            if param.grad is not None:
                param.grad.data.fill_(0)
        values_pred = value_net(states)
        value_loss = nn.MSELoss()(values_pred, returns)
        # weight decay
        for param in value_net.parameters():
            value_loss += param.pow(2).sum() * l2_reg

        value_loss.backward()  # to get the grad
        objective_value_loss_grad = get_flat_grad_params(
            value_net).detach().cpu().numpy()
        return objective_value_loss_grad
예제 #2
0
    def value_objective_grad_func(value_net_flat_params):
        """
        objective function for scipy optimizing 
        """
        set_flat_params(value_net, FLOAT(value_net_flat_params))
        for param in value_net.parameters():
            if param.grad is not None:
                param.grad.data.fill_(0)
        values_pred = value_net(states)
        value_loss = nn.MSELoss()(values_pred, returns)
        # weight decay
        for param in value_net.parameters():
            value_loss += param.pow(2).sum() * l2_reg

        value_loss.backward()  # to get the grad
        objective_value_loss_grad = get_flat_grad_params(
            value_net).detach().cpu().numpy().astype(np.float64)
        return objective_value_loss_grad