def euc_descent_step(self, params, grads, t): """Projected gradient descent on exploitability using Euclidean projection. Args: params: tuple of variables to be updated (dist, y) grads: tuple of variable gradients (grad_dist, grad_y) t: int, solver iteration (unused) Returns: new_params: tuple of update params (new_dist, new_y) """ lr_dist, lr_y = self.lrs new_dist = [] for dist_i, dist_grad_i in zip(params[0], grads[0]): new_dist_i = dist_i - lr_dist * dist_grad_i new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) new_dist.append(new_dist_i) lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) new_y = dict() for i, j in params[1]: y_ij = params[1][(i, j)] y_grad_ij = grads[1][(i, j)] new_y_ij = y_ij - lr_y * y_grad_ij new_y_ij = np.clip(new_y_ij, 0., np.inf) new_y[(i, j)] = new_y_ij return (new_dist, new_y)
def euc_project(dist, y): """Project variables onto their feasible sets (euclidean proj for dist). Args: dist: 1-d np.array, current estimate of nash distribution y: 1-d np.array (same shape as dist), current estimate of payoff gradient Returns: projected variables (dist, y) as tuple """ dist = simplex.euclidean_projection_onto_simplex(dist) y = np.clip(y, 0., np.inf) return dist, y
def euc_descent_step(self, params, grads, t): """Projected gradient descent on exploitability using Euclidean projection. Args: params: tuple of variables to be updated (dist,) grads: tuple of variable gradients (grad_dist,) t: int, solver iteration Returns: new_params: tuple of update params (new_dist,) """ del t new_params = params[0] - self.lrs[0] * grads[0] new_params = simplex.euclidean_projection_onto_simplex(new_params) return (new_params,)
def euc_descent_step(self, params, grads, t): """Projected gradient descent on exploitability using Euclidean projection. Args: params: tuple of variables to be updated (dist,) grads: tuple of variable gradients (grad_dist,) t: int, solver iteration (unused) Returns: new_params: tuple of update params (new_dist,) """ del t lr_dist = self.lrs[0] new_params = [] for dist_i, dist_grad_i in zip(params[0], grads[0]): new_dist_i = dist_i - lr_dist * dist_grad_i new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) new_params.append(new_dist_i) return (new_params, )
def euc_descent_step(self, params, grads, t): """Projected gradient descent on exploitability using Euclidean projection. Args: params: tuple of variables to be updated (dist, y, anneal_steps) grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) t: int, solver iteration (unused) Returns: new_params: tuple of update params (new_dist, new_y, new_anneal_steps) """ lr_dist, lr_y = self.lrs new_dist = [] for dist_i, dist_grad_i in zip(params[0], grads[0]): new_dist_i = dist_i - lr_dist * dist_grad_i new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) new_dist.append(new_dist_i) lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) new_y = [] for y_i, y_grad_i in zip(params[1], grads[1]): new_y_i = y_i - lr_y * y_grad_i new_y_i = np.clip(new_y_i, 0., np.inf) new_y.append(new_y_i) new_anneal_steps = params[2] + grads[2] return (new_dist, new_y, new_anneal_steps)
def test_euclidean_projection(self, vector, expected_projection): projection = simplex.euclidean_projection_onto_simplex(vector, subset=False) self.assertListEqual(list(projection), list(expected_projection), msg='projection not accurate')