def _iterate(self, *args): (x, xag, y, yag), (rtol, atol), need_eval, k, iter_num = args if isinstance(x, list): x = distmat.DistMat(x) xag = distmat.DistMat(xag) if isinstance(y, list): y = distmat.DistMat(y) yag = distmat.DistMat(yag) with ops.name_scope(type(self).__name__): with ops.name_scope("xbar_update"): Ax = self.matmul_A(x) r = self.loss.eval_deriv(Ax, self.b) Atr = self.matmul_A(r, True) Dty = self.spmatmul_D(y, True) xbar = x - self.tau * (1 - self.kappa) * (Atr + Dty) with ops.name_scope("y_update"): Dxbar = self.spmatmul_D(xbar) ypp = self.penalty.prox(y + self.sigma * Dxbar, self.sigma) with ops.name_scope("x_update"): ybar = -self.kappa * y + (1 + self.kappa) * ypp Dtybar = self.spmatmul_D(ybar, True) xpp = x - self.tau * (Atr + Dtybar) with ops.name_scope("relax"): xp = (one - self.rho) * x + self.rho * xpp yp = (one - self.rho) * y + self.rho * ypp with ops.name_scope("aggregate"): if self.aggregate: iter_num_f = tf.to_float(iter_num) xagp = (one - one / (iter_num_f + one)) * xag + 1 / (iter_num_f + one) * x yagp = (one - one / (iter_num_f + one)) * yag + 1 / (iter_num_f + one) * y else: xagp = xag yagp = yag with ops.name_scope("evaluations"): if self.aggregate: evals = self._evaluate(need_eval, xag, xagp, yag, yagp) else: evals = self._evaluate(need_eval, x, xp, y, yp) if isinstance(xp, distmat.DistMat): xp = xp.tensors xagp = xagp.tensors if isinstance(yp, distmat.DistMat): yp = yp.tensors yagp = yagp.tensors return [xp, xagp, yp, yagp], evals
def initialize(self): #g_int16 = self.g.astype('int16') #sizes = np.bincount(g_int16).reshape((-1,1)) gpt = self.gpart sizes = np.array([gpt[i + 1] - gpt[i] for i in range(len(gpt) - 1)]).reshape((-1, 1)) sqrt_sizes = np.sqrt(sizes) if self.partition is None: with tf.device(self.devices): self.sqrt_sizes = tf.constant(sqrt_sizes, dtype=self.dtype) self.grpidx = tf.constant(self.g) self.grpidx_2d = tf.reshape(self.grpidx, (-1, 1)) self.max_norms = tf.constant(self.lam * sqrt_sizes, dtype=self.dtype) else: partition = self.partition self.grp_device_part = partitioners.groupvar_partitioner( partition, gpt)(len(gpt) - 1, len(self.devices)) grp_device_part = self.grp_device_part self.sqrt_sizes = [] self.grpidx = [] self.grpidx_2d = [] self.max_norms = [] for i, d in enumerate(self.devices): with tf.device(d): self.sqrt_sizes.append( tf.constant( sqrt_sizes[grp_device_part[i]:grp_device_part[i + 1]], dtype=self.dtype)) g_sect = self.g[partition[i]:partition[i + 1]] g_sect = g_sect - np.min(g_sect) gidx = tf.constant(g_sect) self.grpidx.append(gidx) self.grpidx_2d.append(tf.reshape(gidx, (-1, 1))) self.max_norms.append( tf.constant( self.lam * sqrt_sizes[grp_device_part[i]:grp_device_part[i + 1]], dtype=self.dtype)) self.sqrt_sizes = distmat.DistMat(self.sqrt_sizes) self.grpidx = distmat.DistMat(self.grpidx) self.grpidx_2d = distmat.DistMat(self.grpidx_2d) self.max_norms = distmat.DistMat(self.max_norms)
def _iterate(self, *args): (xm, x, xag, ym, y, yag), (rtol, atol), need_eval, k, iter_num = args if isinstance(x, list): xm = distmat.DistMat(xm) x = distmat.DistMat(x) xag = distmat.DistMat(xag) if isinstance(y, list): ym = distmat.DistMat(ym) y = distmat.DistMat(y) yag = distmat.DistMat(yag) tau = self.tau alpha1 = self.alpha1 alpha2 = self.alpha2 with ops.name_scope(type(self).__name__): with ops.name_scope("x_update"): Ax = self.matmul_A(x) Dx = self.spmatmul_D(x) Dty = self.spmatmul_D(y, True) r = self.loss.eval_deriv(Ax, self.b) Atr = self.matmul_A(r, True) xtilde = x - tau * (Atr + Dty) + alpha1 * (x - xm) with ops.name_scope("w_update"): Dxtilde = self.spmatmul_D(xtilde) ytilde = self.penalty.prox(y + tau * Dx + alpha1 * (y - ym), self.penalty.lam) Dtytilde = self.spmatmul_D(ytilde, True) with ops.name_scope("correction"): yp = ytilde + tau * (Dxtilde - Dx) + alpha2 * (y - ym) xp = xtilde + tau * self.spmatmul_D(y - ytilde, True) + alpha2 * (x - xm) with ops.name_scope("aggregation"): if self.aggregate: iter_num_f = tf.to_float(iter_num) xagp = (one - one / (iter_num_f + one)) * xag + 1 / (iter_num_f + one) * x yagp = (one - one / (iter_num_f + one)) * yag + 1 / (iter_num_f + one) * y else: xagp = xag yagp = yag with ops.name_scope("evaluations"): if self.aggregate: evals = self._evaluate(need_eval, xag, xagp, yag, yagp) else: evals = self._evaluate(need_eval, x, xp, y, yp) if isinstance(xp, distmat.DistMat): x = x.tensors xp = xp.tensors xagp = xagp.tensors if isinstance(yp, distmat.DistMat): y = y.tensors yp = yp.tensors yagp = yagp.tensors return [x, xp, xagp, y, yp, yagp], evals
def matmul(A, B, transpose_A=False, transpose_B=False, master='/gpu:0'): """ distributed matrix multiplication. A: DistMat, B: single tensor or a list of tensors. Note: returns a single tensor or a list of tensors, Not a DistMat. """ if isinstance(A, tf.Tensor) or isinstance(A, tf.Variable): if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable): return tf.matmul(A, B) else: raise NotImplementedError if transpose_B: raise NotImplementedError else: if transpose_A: # distributed dim is inner axis if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable): # broadcast partial_sums = [] for i, t in enumerate(A.tensors): with tf.device(t.device): partial_sums.append( tf.matmul(t, B[A.partition[i]:A.partition[i + 1], :], transpose_a=True)) with tf.device(master): return tf.add_n(partial_sums) else: partial_sums = [] for t_A, t_B in zip(A.tensors, B.tensors): #print(t_A.device) #print(t_B.device) #assert t_A.device == t_B.device with tf.device(t_A.device): partial_sums.append( tf.matmul(t_A, t_B, transpose_a=True)) with tf.device(master): return tf.add_n(partial_sums) # distributed computation necessary #return tf.add_n([tf.matmul(Apart, Bpart) for Apart, Bpart in zip(A.tensors, B.tensors)]) else: # non-distributed dim is inner axis. merely broacast B. if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable): slices = [] for t in A.tensors: with tf.device(t.device): slices.append(tf.matmul(t, B)) return distmat.DistMat(slices) else: raise NotImplementedError
def spmatmul(D, x, transpose_A=False, transpose_B=False): """ distributed sparse matrix times a dense vector. Note: The behavior of this operation is based on an undocumented feature of `scatter_nd`. If something changes in the implementation of `scatter_nd`, we should change this implementation. """ #print(type(D)) assert isinstance(D, distmat.DistSpMat) if transpose_B: raise NotImplementedError if isinstance(x, distmat.DistMat): # TODO: check validity # take the list of tensors x = x.tensors if isinstance(x, list): pass else: x = [x] Dxparts = defaultdict(list) outlist = [] #print(type(x)) xcols = x[0].shape[1] if isinstance(xcols, tf.Dimension): xcols = xcols.value if transpose_A: # piecewise computation for i in range(len(D.devices_r)): xpiece = x[i] for j in range(len(D.devices_c)): if D.D_tensors[i][j]: Dt_block = D.Dt_tensors[i][j] with tf.device(D.devices_r[i]): Dxparts[j].append( tf.sparse_tensor_dense_matmul(Dt_block, xpiece)) # scatter for j in range(len(D.devices_c)): with tf.device(D.devices_c[j]): Dxdata = tf.concat(Dxparts[j], 0) Dxidx = tf.reshape(D.Dt_nz_r_all[j], (-1, 1)) rows = D.partition_c[j + 1] - D.partition_c[j] outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols])) else: # piecewise computation for j in range(len(D.devices_c)): xpiece = x[j] for i in range(len(D.devices_r)): if D.D_tensors[i][j]: D_block = D.D_tensors[i][j] with tf.device(D.devices_c[j]): Dxparts[i].append( tf.sparse_tensor_dense_matmul(D_block, xpiece)) # scatter for i in range(len(D.devices_r)): with tf.device(D.devices_r[i]): Dxdata = tf.concat(Dxparts[i], 0) Dxidx = tf.reshape(D.D_nz_r_all[i], (-1, 1)) rows = D.partition_r[i + 1] - D.partition_r[i] outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols])) return distmat.DistMat(outlist)
def spmatmul_dropout(D, x, rate=1.0, transpose_A=False, transpose_B=False): """ distributed sparse matrix times a dense vector. Note: The behavior of this operation is based on an undocumented feature of `scatter_nd`. If something changes in the implementation of `scatter_nd`, we should change this implementation. rate: proportion to keep! """ #print(type(D)) assert isinstance(D, distmat.DistSpMat) if transpose_B: raise NotImplementedError if isinstance(x, distmat.DistMat): # TODO: check validity # take the list of tensors x = x.tensors #print([t.shape for t in x]) if isinstance(x, list): pass else: x = [x] Dxparts = defaultdict(list) outlist = [] #print(type(x)) xcols = x[0].shape[1] if isinstance(xcols, tf.Dimension): xcols = xcols.value if transpose_A: # piecewise computation for i in range(len(D.devices_r)): xpiece = x[i] for j in range(len(D.devices_c)): if D.D_tensors[i][j]: Dt_block = D.Dt_tensors[i][j] nonzero_elems = Dt_block.values.shape[0].value with tf.device('/cpu:0'): select = tf.not_equal( tf.multinomial(tf.log([[1 - rate, rate]]), nonzero_elems)[0], zero) Dt_block_drop = tf.sparse_retain(Dt_block, select) with tf.device(D.devices_r[i]): Dxparts[j].append( tf.sparse_tensor_dense_matmul( Dt_block_drop, xpiece)) # scatter for j in range(len(D.devices_c)): with tf.device(D.devices_c[j]): Dxdata = tf.concat(Dxparts[j], 0) Dxidx = tf.reshape(D.Dt_nz_r_all[j], (-1, 1)) rows = D.partition_c[j + 1] - D.partition_c[j] outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols])) else: # piecewise computation for j in range(len(D.devices_c)): xpiece = x[j] for i in range(len(D.devices_r)): if D.D_tensors[i][j]: D_block = D.D_tensors[i][j] nonzero_elems = D_block.values.shape[0].value with tf.device('/cpu:0'): select = tf.not_equal( tf.multinomial(tf.log([[1 - rate, rate]]), nonzero_elems)[0], zero) D_block_drop = tf.sparse_retain(D_block, select) with tf.device(D.devices_c[j]): Dxparts[i].append( tf.sparse_tensor_dense_matmul( D_block_drop, xpiece)) # scatter for i in range(len(D.devices_r)): with tf.device(D.devices_r[i]): Dxdata = tf.concat(Dxparts[i], 0) Dxidx = tf.reshape(D.D_nz_r_all[i], (-1, 1)) rows = D.partition_r[i + 1] - D.partition_r[i] outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols])) return distmat.DistMat(outlist) / rate # scale by rate!
def matmul_dropout(A, B, rate=1.0, noise_shape=None, transpose_A=False, transpose_B=False, master='/gpu:0'): """ distributed matrix multiplication. A: DistMat, B: single tensor or a list of tensors. rate: keep prob. noise_shape : 'row' or 'None'. implementation for 'col' is incomplete. Note: returns a single tensor or a list of tensors, Not a DistMat. """ noise_shape_slice = None if isinstance(A, tf.Tensor) or isinstance(A, tf.Variable): if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable): if noise_shape == 'row': noise_shape_slice = [A.shape[0], 1] drop_A = tf.nn.dropout(A, rate, noise_shape_slice) return tf.matmul(A, B) else: raise NotImplementedError if transpose_B: raise NotImplementedError else: if transpose_A: # distributed dim is inner axis if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable): # broadcast partial_sums = [] for i, t in enumerate(A.tensors): with tf.device(t.device): if noise_shape == 'row': noise_shape_slice = [t.shape[0].value, 1] t_drop = tf.nn.dropout(t, rate, noise_shape_slice) partial_sums.append( tf.matmul(t_drop, B[A.partition[i]:A.partition[i + 1], :], transpose_a=True)) with tf.device(master): return tf.add_n(partial_sums) else: partial_sums = [] for t_A, t_B in zip(A.tensors, B.tensors): #print(t_A.device) #print(t_B.device) #assert t_A.device == t_B.device with tf.device(t_A.device): if noise_shape == 'row': noise_shape_slice = [t_A.shape[0].value, 1] t_A_drop = tf.nn.dropout(t_A, rate, noise_shape_slice) partial_sums.append( tf.matmul(t_A_drop, t_B, transpose_a=True)) with tf.device(master): return tf.add_n(partial_sums) # distributed computation necessary #return tf.add_n([tf.matmul(Apart, Bpart) for Apart, Bpart in zip(A.tensors, B.tensors)]) else: # non-distributed dim is inner axis. merely broacast B. if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable): slices = [] for t in A.tensors: with tf.device(t.device): if noise_shape == 'row': noise_shape_slice = [t.shape[0].value, 1] t_drop = tf.nn.dropout(t, rate, noise_shape_slice) slices.append(tf.matmul(t_drop, B)) return distmat.DistMat(slices) else: raise NotImplementedError
def _iterate(self, *args): (xm, x, xag, ym, y, yag), (rtol, atol), need_eval, k, iter_num = args if isinstance(x, list): xm = distmat.DistMat(xm) x = distmat.DistMat(x) xag = distmat.DistMat(xag) if isinstance(y, list): ym = distmat.DistMat(ym) y = distmat.DistMat(y) yag = distmat.DistMat(yag) # 1-based indexing for params taum = tf.to_float(self.tau(iter_num)) tau = tf.to_float(self.tau(iter_num + 1)) sigma = tf.to_float(self.sigma(iter_num + 1)) theta = tf.to_float(self.theta(iter_num + 1)) rho = tf.to_float(self.rho(iter_num + 1)) coef_a = self.coef_a coef_b = self.coef_b with ops.name_scope(type(self).__name__): with ops.name_scope("xmid_update"): #Dxm = self.spmatmul_dropout(self.D, xm, self.D_p) Dx1 = self.spmatmul_D(x.dropout(self.D_p)) Dx2 = self.spmatmul_D((x - xm).dropout(self.D_p)) ubar = Dx1 - theta * coef_a * (Dx2) Dty1 = self.spmatmul_D( (y + theta * taum / tau * (y - ym)).dropout(self.D_p), True) Dty2 = self.spmatmul_D( ((taum / tau - 1) * (y - ym)).dropout(self.D_p), True) #Dtym = self.spmatmul_dropout(self.D, ym, self.D_p, True) #Dty = self.spmatmul_dropout(self.D, y, self.D_p, True) vbar = Dty1 + theta * coef_b * Dty2 xmid = (1 - rho) * xag + rho * x Axmid = self.matmul_A(xmid.dropout(self.A_p)) with ops.name_scope("update_iterates"): r = self.loss.eval_deriv(Axmid, self.b) Atr = self.matmul_A(r, True) up = ubar - tau * (1 + coef_a) * self.spmatmul_D(Atr + vbar) yp = self.penalty.prox(y + sigma * up, sigma) Dty3 = self.spmatmul_D(yp.dropout(self.D_p), True) Dty4 = self.spmatmul_D( ((yp - y) - theta * (y - ym)).dropout(self.D_p), True) #Dtyp = self.spmatmul_dropout(self.D, yp, self.D_p, True) vp = Dty3 + coef_b * Dty4 xp = x - tau * (Atr + vp) with ops.name_scope("aggregation"): xagp = (1 - rho) * xag + rho * xp yagp = (1 - rho) * yag + rho * yp with ops.name_scope("evaluations"): if self.aggregate: evals = self._evaluate(need_eval, xag, xagp, yag, yagp) else: evals = self._evaluate(need_eval, x, xp, y, yp) if isinstance(xp, distmat.DistMat): x = x.tensors xp = xp.tensors xagp = xagp.tensors if isinstance(yp, distmat.DistMat): y = y.tensors yp = yp.tensors yagp = yagp.tensors return [x, xp, xagp, y, yp, yagp], evals
def initialize(self): #g_int16 = self.g.astype('int16') #sizes = np.bincount(g_int16).reshape((-1,1)) gpt = self.gpart sizes = np.array([gpt[i + 1] - gpt[i] for i in range(len(gpt) - 1)]).reshape((-1, 1)) if self.dtype == tf.float32: np_type = np.float32 elif self.dtype == tf.float64: np_type = np.float64 grpmat = csc_matrix((np.ones_like(self.g, dtype=np_type), self.g, np.arange(self.g.shape[0] + 1))).tocsr().tocoo() print(grpmat.shape) sqrt_sizes = np.sqrt(sizes) if self.partition is None: self.grpmat = coo_to_sparsetensor(grpmat) with tf.device(self.devices): self.sqrt_sizes = tf.constant(sqrt_sizes, dtype=self.dtype) self.grpidx = tf.constant(self.g) self.grpidx_2d = tf.reshape(self.grpidx, (-1, 1)) self.max_norms = tf.constant(self.lam * sqrt_sizes, dtype=self.dtype) self.maxynorm = tf.sqrt(tf.reduce_sum(self.max_norms**2)) else: partition = self.partition grp_device_partitioner = partitioners.groupvar_partitioner( partition, gpt) dual_partitioner = partitioners.group_partitioner(gpt) self.grp_device_part = grp_device_partitioner( len(gpt) - 1, len(self.devices)) grp_device_part = self.grp_device_part self.grpmat = distmat.DistSpMat.from_spmatrix( grpmat, self.devices, partitioner_r=grp_device_partitioner, partitioner_c=dual_partitioner) self.sqrt_sizes = [] self.grpidx_2d = [] self.max_norms = [] for i, d in enumerate(self.devices): with tf.device(d): self.sqrt_sizes.append( tf.constant( sqrt_sizes[grp_device_part[i]:grp_device_part[i + 1]], dtype=self.dtype)) g_sect = self.g[partition[i]:partition[i + 1]] g_sect = g_sect - np.min(g_sect) gidx = tf.constant(g_sect) self.grpidx_2d.append(tf.reshape(gidx, (-1, 1))) self.max_norms.append( tf.constant( self.lam * sqrt_sizes[grp_device_part[i]:grp_device_part[i + 1]], dtype=self.dtype)) self.sqrt_sizes = distmat.DistMat(self.sqrt_sizes) self.grpidx_2d = distmat.DistMat(self.grpidx_2d) self.max_norms = distmat.DistMat(self.max_norms) self.maxynorm = tf.sqrt((self.max_norms**2).reduce_sum())