Ejemplo n.º 1
0
    def _iterate(self, *args):
        (x, xag, y, yag), (rtol, atol), need_eval, k, iter_num = args
        if isinstance(x, list):
            x = distmat.DistMat(x)
            xag = distmat.DistMat(xag)
        if isinstance(y, list):
            y = distmat.DistMat(y)
            yag = distmat.DistMat(yag)

        with ops.name_scope(type(self).__name__):
            with ops.name_scope("xbar_update"):
                Ax = self.matmul_A(x)
                r = self.loss.eval_deriv(Ax, self.b)
                Atr = self.matmul_A(r, True)
                Dty = self.spmatmul_D(y, True)
                xbar = x - self.tau * (1 - self.kappa) * (Atr + Dty)
            with ops.name_scope("y_update"):
                Dxbar = self.spmatmul_D(xbar)
                ypp = self.penalty.prox(y + self.sigma * Dxbar, self.sigma)
            with ops.name_scope("x_update"):
                ybar = -self.kappa * y + (1 + self.kappa) * ypp
                Dtybar = self.spmatmul_D(ybar, True)
                xpp = x - self.tau * (Atr + Dtybar)
            with ops.name_scope("relax"):
                xp = (one - self.rho) * x + self.rho * xpp
                yp = (one - self.rho) * y + self.rho * ypp
            with ops.name_scope("aggregate"):
                if self.aggregate:
                    iter_num_f = tf.to_float(iter_num)
                    xagp = (one - one /
                            (iter_num_f + one)) * xag + 1 / (iter_num_f +
                                                             one) * x
                    yagp = (one - one /
                            (iter_num_f + one)) * yag + 1 / (iter_num_f +
                                                             one) * y
                else:
                    xagp = xag
                    yagp = yag
            with ops.name_scope("evaluations"):
                if self.aggregate:
                    evals = self._evaluate(need_eval, xag, xagp, yag, yagp)
                else:
                    evals = self._evaluate(need_eval, x, xp, y, yp)
        if isinstance(xp, distmat.DistMat):
            xp = xp.tensors
            xagp = xagp.tensors
        if isinstance(yp, distmat.DistMat):
            yp = yp.tensors
            yagp = yagp.tensors
        return [xp, xagp, yp, yagp], evals
Ejemplo n.º 2
0
    def initialize(self):
        #g_int16 = self.g.astype('int16')
        #sizes = np.bincount(g_int16).reshape((-1,1))
        gpt = self.gpart
        sizes = np.array([gpt[i + 1] - gpt[i]
                          for i in range(len(gpt) - 1)]).reshape((-1, 1))
        sqrt_sizes = np.sqrt(sizes)
        if self.partition is None:
            with tf.device(self.devices):
                self.sqrt_sizes = tf.constant(sqrt_sizes, dtype=self.dtype)
                self.grpidx = tf.constant(self.g)
                self.grpidx_2d = tf.reshape(self.grpidx, (-1, 1))
                self.max_norms = tf.constant(self.lam * sqrt_sizes,
                                             dtype=self.dtype)
        else:
            partition = self.partition
            self.grp_device_part = partitioners.groupvar_partitioner(
                partition, gpt)(len(gpt) - 1, len(self.devices))
            grp_device_part = self.grp_device_part
            self.sqrt_sizes = []
            self.grpidx = []
            self.grpidx_2d = []
            self.max_norms = []

            for i, d in enumerate(self.devices):
                with tf.device(d):

                    self.sqrt_sizes.append(
                        tf.constant(
                            sqrt_sizes[grp_device_part[i]:grp_device_part[i +
                                                                          1]],
                            dtype=self.dtype))
                    g_sect = self.g[partition[i]:partition[i + 1]]
                    g_sect = g_sect - np.min(g_sect)
                    gidx = tf.constant(g_sect)
                    self.grpidx.append(gidx)
                    self.grpidx_2d.append(tf.reshape(gidx, (-1, 1)))
                    self.max_norms.append(
                        tf.constant(
                            self.lam *
                            sqrt_sizes[grp_device_part[i]:grp_device_part[i +
                                                                          1]],
                            dtype=self.dtype))

            self.sqrt_sizes = distmat.DistMat(self.sqrt_sizes)
            self.grpidx = distmat.DistMat(self.grpidx)
            self.grpidx_2d = distmat.DistMat(self.grpidx_2d)
            self.max_norms = distmat.DistMat(self.max_norms)
Ejemplo n.º 3
0
    def _iterate(self, *args):
        (xm, x, xag, ym, y, yag), (rtol, atol), need_eval, k, iter_num = args
        if isinstance(x, list):
            xm = distmat.DistMat(xm)
            x = distmat.DistMat(x)
            xag = distmat.DistMat(xag)
        if isinstance(y, list):
            ym = distmat.DistMat(ym)
            y = distmat.DistMat(y)
            yag = distmat.DistMat(yag)
        tau = self.tau
        alpha1 = self.alpha1
        alpha2 = self.alpha2
        with ops.name_scope(type(self).__name__):
            with ops.name_scope("x_update"):
                Ax = self.matmul_A(x)
                Dx = self.spmatmul_D(x)
                Dty = self.spmatmul_D(y, True)

                r = self.loss.eval_deriv(Ax, self.b)
                Atr = self.matmul_A(r, True)
                xtilde = x - tau * (Atr + Dty) + alpha1 * (x - xm)
            with ops.name_scope("w_update"):
                Dxtilde = self.spmatmul_D(xtilde)
                ytilde = self.penalty.prox(y + tau * Dx + alpha1 * (y - ym),
                                           self.penalty.lam)
                Dtytilde = self.spmatmul_D(ytilde, True)
            with ops.name_scope("correction"):
                yp = ytilde + tau * (Dxtilde - Dx) + alpha2 * (y - ym)
                xp = xtilde + tau * self.spmatmul_D(y - ytilde,
                                                    True) + alpha2 * (x - xm)

            with ops.name_scope("aggregation"):
                if self.aggregate:
                    iter_num_f = tf.to_float(iter_num)
                    xagp = (one - one /
                            (iter_num_f + one)) * xag + 1 / (iter_num_f +
                                                             one) * x
                    yagp = (one - one /
                            (iter_num_f + one)) * yag + 1 / (iter_num_f +
                                                             one) * y
                else:
                    xagp = xag
                    yagp = yag

            with ops.name_scope("evaluations"):
                if self.aggregate:
                    evals = self._evaluate(need_eval, xag, xagp, yag, yagp)
                else:
                    evals = self._evaluate(need_eval, x, xp, y, yp)

        if isinstance(xp, distmat.DistMat):
            x = x.tensors
            xp = xp.tensors
            xagp = xagp.tensors
        if isinstance(yp, distmat.DistMat):
            y = y.tensors
            yp = yp.tensors
            yagp = yagp.tensors
        return [x, xp, xagp, y, yp, yagp], evals
Ejemplo n.º 4
0
def matmul(A, B, transpose_A=False, transpose_B=False, master='/gpu:0'):
    """
    distributed matrix multiplication.
    A: DistMat, 
    B: single tensor or a list of tensors.
    Note: returns a single tensor or a list of tensors, Not a DistMat.
    """
    if isinstance(A, tf.Tensor) or isinstance(A, tf.Variable):
        if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable):
            return tf.matmul(A, B)
        else:
            raise NotImplementedError
    if transpose_B:
        raise NotImplementedError
    else:
        if transpose_A:  # distributed dim is inner axis
            if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable):
                # broadcast
                partial_sums = []
                for i, t in enumerate(A.tensors):
                    with tf.device(t.device):
                        partial_sums.append(
                            tf.matmul(t,
                                      B[A.partition[i]:A.partition[i + 1], :],
                                      transpose_a=True))
                with tf.device(master):
                    return tf.add_n(partial_sums)
            else:
                partial_sums = []
                for t_A, t_B in zip(A.tensors, B.tensors):
                    #print(t_A.device)
                    #print(t_B.device)
                    #assert t_A.device == t_B.device
                    with tf.device(t_A.device):
                        partial_sums.append(
                            tf.matmul(t_A, t_B, transpose_a=True))
                with tf.device(master):
                    return tf.add_n(partial_sums)
                # distributed computation necessary
                #return tf.add_n([tf.matmul(Apart, Bpart) for Apart, Bpart in zip(A.tensors, B.tensors)])
        else:  # non-distributed dim is inner axis. merely broacast B.
            if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable):
                slices = []
                for t in A.tensors:
                    with tf.device(t.device):
                        slices.append(tf.matmul(t, B))
                return distmat.DistMat(slices)
            else:
                raise NotImplementedError
Ejemplo n.º 5
0
def spmatmul(D, x, transpose_A=False, transpose_B=False):
    """
    distributed sparse matrix times a dense vector.
    Note: The behavior of this operation is based on an undocumented feature of `scatter_nd`. If something changes in the
    implementation of `scatter_nd`, we should change this implementation. 
    """
    #print(type(D))
    assert isinstance(D, distmat.DistSpMat)
    if transpose_B:
        raise NotImplementedError
    if isinstance(x, distmat.DistMat):
        # TODO: check validity
        # take the list of tensors
        x = x.tensors
    if isinstance(x, list):
        pass
    else:
        x = [x]

    Dxparts = defaultdict(list)
    outlist = []
    #print(type(x))
    xcols = x[0].shape[1]
    if isinstance(xcols, tf.Dimension):
        xcols = xcols.value

    if transpose_A:
        # piecewise computation
        for i in range(len(D.devices_r)):
            xpiece = x[i]
            for j in range(len(D.devices_c)):
                if D.D_tensors[i][j]:
                    Dt_block = D.Dt_tensors[i][j]

                    with tf.device(D.devices_r[i]):
                        Dxparts[j].append(
                            tf.sparse_tensor_dense_matmul(Dt_block, xpiece))
        # scatter
        for j in range(len(D.devices_c)):
            with tf.device(D.devices_c[j]):
                Dxdata = tf.concat(Dxparts[j], 0)
                Dxidx = tf.reshape(D.Dt_nz_r_all[j], (-1, 1))
                rows = D.partition_c[j + 1] - D.partition_c[j]
                outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols]))

    else:
        # piecewise computation
        for j in range(len(D.devices_c)):
            xpiece = x[j]
            for i in range(len(D.devices_r)):
                if D.D_tensors[i][j]:
                    D_block = D.D_tensors[i][j]
                    with tf.device(D.devices_c[j]):
                        Dxparts[i].append(
                            tf.sparse_tensor_dense_matmul(D_block, xpiece))
        # scatter
        for i in range(len(D.devices_r)):
            with tf.device(D.devices_r[i]):
                Dxdata = tf.concat(Dxparts[i], 0)
                Dxidx = tf.reshape(D.D_nz_r_all[i], (-1, 1))
                rows = D.partition_r[i + 1] - D.partition_r[i]
                outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols]))
    return distmat.DistMat(outlist)
Ejemplo n.º 6
0
def spmatmul_dropout(D, x, rate=1.0, transpose_A=False, transpose_B=False):
    """
    distributed sparse matrix times a dense vector.
    Note: The behavior of this operation is based on an undocumented feature of `scatter_nd`. If something changes in the
    implementation of `scatter_nd`, we should change this implementation.
    rate: proportion to keep! 
    """
    #print(type(D))
    assert isinstance(D, distmat.DistSpMat)
    if transpose_B:
        raise NotImplementedError
    if isinstance(x, distmat.DistMat):
        # TODO: check validity
        # take the list of tensors
        x = x.tensors
        #print([t.shape for t in x])
    if isinstance(x, list):
        pass
    else:
        x = [x]

    Dxparts = defaultdict(list)
    outlist = []
    #print(type(x))
    xcols = x[0].shape[1]
    if isinstance(xcols, tf.Dimension):
        xcols = xcols.value

    if transpose_A:
        # piecewise computation
        for i in range(len(D.devices_r)):
            xpiece = x[i]
            for j in range(len(D.devices_c)):
                if D.D_tensors[i][j]:
                    Dt_block = D.Dt_tensors[i][j]
                    nonzero_elems = Dt_block.values.shape[0].value
                    with tf.device('/cpu:0'):
                        select = tf.not_equal(
                            tf.multinomial(tf.log([[1 - rate, rate]]),
                                           nonzero_elems)[0], zero)
                        Dt_block_drop = tf.sparse_retain(Dt_block, select)

                    with tf.device(D.devices_r[i]):
                        Dxparts[j].append(
                            tf.sparse_tensor_dense_matmul(
                                Dt_block_drop, xpiece))
        # scatter
        for j in range(len(D.devices_c)):
            with tf.device(D.devices_c[j]):
                Dxdata = tf.concat(Dxparts[j], 0)
                Dxidx = tf.reshape(D.Dt_nz_r_all[j], (-1, 1))
                rows = D.partition_c[j + 1] - D.partition_c[j]
                outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols]))

    else:
        # piecewise computation
        for j in range(len(D.devices_c)):
            xpiece = x[j]
            for i in range(len(D.devices_r)):
                if D.D_tensors[i][j]:
                    D_block = D.D_tensors[i][j]
                    nonzero_elems = D_block.values.shape[0].value
                    with tf.device('/cpu:0'):
                        select = tf.not_equal(
                            tf.multinomial(tf.log([[1 - rate, rate]]),
                                           nonzero_elems)[0], zero)
                        D_block_drop = tf.sparse_retain(D_block, select)
                    with tf.device(D.devices_c[j]):
                        Dxparts[i].append(
                            tf.sparse_tensor_dense_matmul(
                                D_block_drop, xpiece))
        # scatter
        for i in range(len(D.devices_r)):
            with tf.device(D.devices_r[i]):
                Dxdata = tf.concat(Dxparts[i], 0)
                Dxidx = tf.reshape(D.D_nz_r_all[i], (-1, 1))
                rows = D.partition_r[i + 1] - D.partition_r[i]
                outlist.append(tf.scatter_nd(Dxidx, Dxdata, [rows, xcols]))
    return distmat.DistMat(outlist) / rate  # scale by rate!
Ejemplo n.º 7
0
def matmul_dropout(A,
                   B,
                   rate=1.0,
                   noise_shape=None,
                   transpose_A=False,
                   transpose_B=False,
                   master='/gpu:0'):
    """
    distributed matrix multiplication.
    A: DistMat, 
    B: single tensor or a list of tensors.
    rate: keep prob.
    noise_shape : 'row' or 'None'. implementation for 'col' is incomplete. 
    Note: returns a single tensor or a list of tensors, Not a DistMat.
    """
    noise_shape_slice = None
    if isinstance(A, tf.Tensor) or isinstance(A, tf.Variable):
        if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable):
            if noise_shape == 'row':
                noise_shape_slice = [A.shape[0], 1]
            drop_A = tf.nn.dropout(A, rate, noise_shape_slice)
            return tf.matmul(A, B)
        else:
            raise NotImplementedError
    if transpose_B:
        raise NotImplementedError
    else:
        if transpose_A:  # distributed dim is inner axis
            if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable):
                # broadcast
                partial_sums = []
                for i, t in enumerate(A.tensors):
                    with tf.device(t.device):
                        if noise_shape == 'row':
                            noise_shape_slice = [t.shape[0].value, 1]

                        t_drop = tf.nn.dropout(t, rate, noise_shape_slice)
                        partial_sums.append(
                            tf.matmul(t_drop,
                                      B[A.partition[i]:A.partition[i + 1], :],
                                      transpose_a=True))
                with tf.device(master):
                    return tf.add_n(partial_sums)
            else:
                partial_sums = []
                for t_A, t_B in zip(A.tensors, B.tensors):
                    #print(t_A.device)
                    #print(t_B.device)
                    #assert t_A.device == t_B.device
                    with tf.device(t_A.device):
                        if noise_shape == 'row':
                            noise_shape_slice = [t_A.shape[0].value, 1]
                        t_A_drop = tf.nn.dropout(t_A, rate, noise_shape_slice)
                        partial_sums.append(
                            tf.matmul(t_A_drop, t_B, transpose_a=True))
                with tf.device(master):
                    return tf.add_n(partial_sums)
                # distributed computation necessary
                #return tf.add_n([tf.matmul(Apart, Bpart) for Apart, Bpart in zip(A.tensors, B.tensors)])
        else:  # non-distributed dim is inner axis. merely broacast B.
            if isinstance(B, tf.Tensor) or isinstance(B, tf.Variable):
                slices = []
                for t in A.tensors:
                    with tf.device(t.device):
                        if noise_shape == 'row':
                            noise_shape_slice = [t.shape[0].value, 1]
                        t_drop = tf.nn.dropout(t, rate, noise_shape_slice)
                        slices.append(tf.matmul(t_drop, B))
                return distmat.DistMat(slices)
            else:
                raise NotImplementedError
Ejemplo n.º 8
0
    def _iterate(self, *args):
        (xm, x, xag, ym, y, yag), (rtol, atol), need_eval, k, iter_num = args
        if isinstance(x, list):
            xm = distmat.DistMat(xm)
            x = distmat.DistMat(x)
            xag = distmat.DistMat(xag)
        if isinstance(y, list):
            ym = distmat.DistMat(ym)
            y = distmat.DistMat(y)
            yag = distmat.DistMat(yag)

        # 1-based indexing for params
        taum = tf.to_float(self.tau(iter_num))
        tau = tf.to_float(self.tau(iter_num + 1))
        sigma = tf.to_float(self.sigma(iter_num + 1))
        theta = tf.to_float(self.theta(iter_num + 1))
        rho = tf.to_float(self.rho(iter_num + 1))
        coef_a = self.coef_a
        coef_b = self.coef_b

        with ops.name_scope(type(self).__name__):
            with ops.name_scope("xmid_update"):

                #Dxm   = self.spmatmul_dropout(self.D, xm, self.D_p)
                Dx1 = self.spmatmul_D(x.dropout(self.D_p))
                Dx2 = self.spmatmul_D((x - xm).dropout(self.D_p))
                ubar = Dx1 - theta * coef_a * (Dx2)

                Dty1 = self.spmatmul_D(
                    (y + theta * taum / tau * (y - ym)).dropout(self.D_p),
                    True)
                Dty2 = self.spmatmul_D(
                    ((taum / tau - 1) * (y - ym)).dropout(self.D_p), True)

                #Dtym  = self.spmatmul_dropout(self.D, ym, self.D_p, True)
                #Dty   = self.spmatmul_dropout(self.D, y, self.D_p, True)
                vbar = Dty1 + theta * coef_b * Dty2

                xmid = (1 - rho) * xag + rho * x
                Axmid = self.matmul_A(xmid.dropout(self.A_p))

            with ops.name_scope("update_iterates"):
                r = self.loss.eval_deriv(Axmid, self.b)
                Atr = self.matmul_A(r, True)

                up = ubar - tau * (1 + coef_a) * self.spmatmul_D(Atr + vbar)
                yp = self.penalty.prox(y + sigma * up, sigma)

                Dty3 = self.spmatmul_D(yp.dropout(self.D_p), True)
                Dty4 = self.spmatmul_D(
                    ((yp - y) - theta * (y - ym)).dropout(self.D_p), True)
                #Dtyp  = self.spmatmul_dropout(self.D, yp, self.D_p, True)
                vp = Dty3 + coef_b * Dty4
                xp = x - tau * (Atr + vp)

            with ops.name_scope("aggregation"):
                xagp = (1 - rho) * xag + rho * xp
                yagp = (1 - rho) * yag + rho * yp

            with ops.name_scope("evaluations"):
                if self.aggregate:
                    evals = self._evaluate(need_eval, xag, xagp, yag, yagp)
                else:
                    evals = self._evaluate(need_eval, x, xp, y, yp)

        if isinstance(xp, distmat.DistMat):
            x = x.tensors
            xp = xp.tensors
            xagp = xagp.tensors
        if isinstance(yp, distmat.DistMat):
            y = y.tensors
            yp = yp.tensors
            yagp = yagp.tensors
        return [x, xp, xagp, y, yp, yagp], evals
Ejemplo n.º 9
0
    def initialize(self):
        #g_int16 = self.g.astype('int16')
        #sizes = np.bincount(g_int16).reshape((-1,1))
        gpt = self.gpart
        sizes = np.array([gpt[i + 1] - gpt[i]
                          for i in range(len(gpt) - 1)]).reshape((-1, 1))
        if self.dtype == tf.float32:
            np_type = np.float32
        elif self.dtype == tf.float64:
            np_type = np.float64
        grpmat = csc_matrix((np.ones_like(self.g, dtype=np_type), self.g,
                             np.arange(self.g.shape[0] + 1))).tocsr().tocoo()
        print(grpmat.shape)
        sqrt_sizes = np.sqrt(sizes)
        if self.partition is None:
            self.grpmat = coo_to_sparsetensor(grpmat)
            with tf.device(self.devices):
                self.sqrt_sizes = tf.constant(sqrt_sizes, dtype=self.dtype)
                self.grpidx = tf.constant(self.g)
                self.grpidx_2d = tf.reshape(self.grpidx, (-1, 1))
                self.max_norms = tf.constant(self.lam * sqrt_sizes,
                                             dtype=self.dtype)
                self.maxynorm = tf.sqrt(tf.reduce_sum(self.max_norms**2))
        else:
            partition = self.partition
            grp_device_partitioner = partitioners.groupvar_partitioner(
                partition, gpt)
            dual_partitioner = partitioners.group_partitioner(gpt)
            self.grp_device_part = grp_device_partitioner(
                len(gpt) - 1, len(self.devices))
            grp_device_part = self.grp_device_part

            self.grpmat = distmat.DistSpMat.from_spmatrix(
                grpmat,
                self.devices,
                partitioner_r=grp_device_partitioner,
                partitioner_c=dual_partitioner)

            self.sqrt_sizes = []
            self.grpidx_2d = []
            self.max_norms = []

            for i, d in enumerate(self.devices):
                with tf.device(d):
                    self.sqrt_sizes.append(
                        tf.constant(
                            sqrt_sizes[grp_device_part[i]:grp_device_part[i +
                                                                          1]],
                            dtype=self.dtype))
                    g_sect = self.g[partition[i]:partition[i + 1]]
                    g_sect = g_sect - np.min(g_sect)
                    gidx = tf.constant(g_sect)
                    self.grpidx_2d.append(tf.reshape(gidx, (-1, 1)))
                    self.max_norms.append(
                        tf.constant(
                            self.lam *
                            sqrt_sizes[grp_device_part[i]:grp_device_part[i +
                                                                          1]],
                            dtype=self.dtype))

            self.sqrt_sizes = distmat.DistMat(self.sqrt_sizes)
            self.grpidx_2d = distmat.DistMat(self.grpidx_2d)
            self.max_norms = distmat.DistMat(self.max_norms)
            self.maxynorm = tf.sqrt((self.max_norms**2).reduce_sum())