Esempio n. 1
0
    def euc_descent_step(self, params, grads, t):
        """Projected gradient descent on exploitability using Euclidean projection.

    Args:
      params: tuple of variables to be updated (dist, y)
      grads: tuple of variable gradients (grad_dist, grad_y)
      t: int, solver iteration (unused)
    Returns:
      new_params: tuple of update params (new_dist, new_y)
    """
        lr_dist, lr_y = self.lrs
        new_dist = []
        for dist_i, dist_grad_i in zip(params[0], grads[0]):
            new_dist_i = dist_i - lr_dist * dist_grad_i
            new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i)
            new_dist.append(new_dist_i)
        lr_y = np.clip(1 / float(t + 1), lr_y, np.inf)
        new_y = dict()
        for i, j in params[1]:
            y_ij = params[1][(i, j)]
            y_grad_ij = grads[1][(i, j)]
            new_y_ij = y_ij - lr_y * y_grad_ij
            new_y_ij = np.clip(new_y_ij, 0., np.inf)
            new_y[(i, j)] = new_y_ij
        return (new_dist, new_y)
Esempio n. 2
0
def euc_project(dist, y):
  """Project variables onto their feasible sets (euclidean proj for dist).

  Args:
    dist: 1-d np.array, current estimate of nash distribution
    y: 1-d np.array (same shape as dist), current estimate of payoff gradient
  Returns:
    projected variables (dist, y) as tuple
  """
  dist = simplex.euclidean_projection_onto_simplex(dist)
  y = np.clip(y, 0., np.inf)

  return dist, y
Esempio n. 3
0
  def euc_descent_step(self, params, grads, t):
    """Projected gradient descent on exploitability using Euclidean projection.

    Args:
      params: tuple of variables to be updated (dist,)
      grads: tuple of variable gradients (grad_dist,)
      t: int, solver iteration
    Returns:
      new_params: tuple of update params (new_dist,)
    """
    del t
    new_params = params[0] - self.lrs[0] * grads[0]
    new_params = simplex.euclidean_projection_onto_simplex(new_params)
    return (new_params,)
Esempio n. 4
0
    def euc_descent_step(self, params, grads, t):
        """Projected gradient descent on exploitability using Euclidean projection.

    Args:
      params: tuple of variables to be updated (dist,)
      grads: tuple of variable gradients (grad_dist,)
      t: int, solver iteration (unused)
    Returns:
      new_params: tuple of update params (new_dist,)
    """
        del t
        lr_dist = self.lrs[0]
        new_params = []
        for dist_i, dist_grad_i in zip(params[0], grads[0]):
            new_dist_i = dist_i - lr_dist * dist_grad_i
            new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i)
            new_params.append(new_dist_i)
        return (new_params, )
Esempio n. 5
0
  def euc_descent_step(self, params, grads, t):
    """Projected gradient descent on exploitability using Euclidean projection.

    Args:
      params: tuple of variables to be updated (dist, y, anneal_steps)
      grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps)
      t: int, solver iteration (unused)
    Returns:
      new_params: tuple of update params (new_dist, new_y, new_anneal_steps)
    """
    lr_dist, lr_y = self.lrs
    new_dist = []
    for dist_i, dist_grad_i in zip(params[0], grads[0]):
      new_dist_i = dist_i - lr_dist * dist_grad_i
      new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i)
      new_dist.append(new_dist_i)
    lr_y = np.clip(1 / float(t + 1), lr_y, np.inf)
    new_y = []
    for y_i, y_grad_i in zip(params[1], grads[1]):
      new_y_i = y_i - lr_y * y_grad_i
      new_y_i = np.clip(new_y_i, 0., np.inf)
      new_y.append(new_y_i)
    new_anneal_steps = params[2] + grads[2]
    return (new_dist, new_y, new_anneal_steps)
Esempio n. 6
0
 def test_euclidean_projection(self, vector, expected_projection):
     projection = simplex.euclidean_projection_onto_simplex(vector,
                                                            subset=False)
     self.assertListEqual(list(projection),
                          list(expected_projection),
                          msg='projection not accurate')