def grad_check(self): '''Check whether the full gradient equals to the gradient computed by finite difference at a random point.''' w = np.random.randn(self.dim) delta = np.zeros(self.dim) grad = np.zeros(self.dim) eps = 1e-4 for i in range(self.dim): delta[i] = eps grad[i] = (self.f(w + delta) - self.f(w - delta)) / 2 / eps delta[i] = 0 if np.linalg.norm(grad - self.grad(w)) > eps: log.warn('Gradient implementation check failed!') return False else: log.info('Gradient implementation check succeeded!') return True
def _check_2d_gradient(): res = True w_2d = xp.random.randn(self.dim, self.n_agent) g_1d = 0 for i in range(self.n_agent): g_1d += self.grad(w_2d[:, i], i=i) g_1d /= self.n_agent g_2d = self.grad(w_2d).mean(axis=1) if xp.linalg.norm(g_1d - g_2d) > 1e-5: log.warn( 'Distributed graident check failed! Difference between global gradient and average of distributed graidents is %.4f' % xp.linalg.norm(g_1d - g_2d)) res = False g_2d_sample = self.grad(w_2d, j=xp.arange(self.m).reshape(-1, 1).repeat( self.n_agent, axis=1).T).mean(axis=1) if xp.linalg.norm(g_1d - g_2d_sample) > 1e-5: log.warn( 'Distributed graident check failed! Difference between global graident and average of all sample gradients is %.4f' % xp.linalg.norm(g_1d - g_2d_sample)) res = False samples = xp.random.randint(0, self.m, (self.n_agent, 10)) g_2d_stochastic = self.grad(w_2d, j=samples) for i in range(self.n_agent): g_1d_stochastic = self.grad(w_2d[:, i], i=i, j=samples[i]) if xp.linalg.norm(g_1d_stochastic - g_2d_stochastic[:, i]) > 1e-5: log.warn( 'Distributed graident check failed! Difference between distributed stoachastic gradient at agent %d and average of sample gradients is %.4f' % (i, xp.linalg.norm(g_1d_stochastic - g_2d_stochastic[:, i]))) res = False return res
def _check_1d_gradient(): w = xp.random.randn(self.dim) g = self.grad(w) g_i = g_ij = 0 res = True for i in range(self.n_agent): _tmp_g_i = self.grad(w, i) _tmp_g_ij = 0 for j in range(self.m): _tmp_g_ij += self.grad(w, i, j) if xp.linalg.norm(_tmp_g_i - _tmp_g_ij / self.m) > 1e-5: log.warn( 'Distributed graident check failed! Difference between local graident at agent %d and average of all local sample gradients is %.4f' % (i, xp.linalg.norm(_tmp_g_i - _tmp_g_ij / self.m))) res = False g_i += _tmp_g_i g_ij += _tmp_g_ij g_i /= self.n_agent g_ij /= self.m_total if xp.linalg.norm(g - g_i) > 1e-5: log.warn( 'Distributed gradient check failed! Difference between global graident and average of local gradients is %.4f', xp.linalg.norm(g - g_i)) res = False if xp.linalg.norm(g - g_ij) > 1e-5: log.warn( 'Distributed graident check failed! Difference between global graident and average of all sample gradients is %.4f' % xp.linalg.norm(g - g_ij)) res = False return res
def _check_function_value(): w = xp.random.randn(self.dim) f = self.f(w) f_i = f_ij = 0 res = True for i in range(self.n_agent): _tmp_f_i = self.f(w, i) _tmp_f_ij = 0 for j in range(self.m): _tmp_f_ij += self.f(w, i, j) if xp.abs(_tmp_f_i - _tmp_f_ij / self.m) > 1e-10: log.warn( 'Distributed function value check failed! Difference between local function value at agent %d and average of all local sample function values %d is %.4f' % (i, i, xp.abs(_tmp_f_i - _tmp_f_ij / self.m))) res = False f_i += _tmp_f_i f_ij += _tmp_f_ij f_i /= self.n_agent f_ij /= self.m_total if xp.abs(f - f_i) > 1e-10: log.warn( 'Distributed function value check failed! Difference between the global function value and average of local function values is %.4f' % xp.abs(f - f_i)) res = False if xp.abs(f - f_ij) > 1e-10: log.warn( 'Distributed function value check failed! Difference between the global function value and average of all sample function values is %.4f' % xp.abs(f - f_ij)) res = False return res