Ejemplo n.º 1
0
    def grad_h(self, w, i=None, j=None):
        '''Gradient at w. If i is None, returns the full gradient; if i is not None but j is, returns the gradient in the i-th machine; otherwise,return the gradient of j-th sample in i-th machine. '''

        if w.ndim == 1:
            if type(j) is int:
                j = [j]

            if i is None and j is None:  # Return the full gradient
                return self.forward_backward(self.X_train, self.Y_train, w)[0]
            elif i is not None and j is None:  # Return the local gradient
                return self.forward_backward(self.X[i], self.Y[i], w)[0]
            elif i is None and j is not None:  # Return the stochastic gradient
                return self.forward_backward(self.X_train[j], self.Y_train[j], w)[0]
            else:  # Return the stochastic gradient
                return self.forward_backward(self.X[i][j], self.Y[i][j], w)[0]

        elif w.ndim == 2:
            if i is None and j is None:  # Return the distributed gradient
                return np.array([self.forward_backward(self.X[i], self.Y[i], w[:, i])[0].copy() for i in range(self.n_agent)]).T
            elif i is None and j is not None:  # Return the stochastic gradient
                return np.array([self.forward_backward(self.X[i][j[i]], self.Y[i][j[i]], w[:, i])[0].copy() for i in range(self.n_agent)]).T
            else:
                log.fatal('For distributed gradients j must be None')

        else:
            log.fatal('Parameter dimension should only be 1 or 2')
Ejemplo n.º 2
0
    def h(self, w, i=None, j=None, split='train'):
        '''Function value at w. If i is None, returns f(x); if i is not None but j is, returns the function value in the i-th machine; otherwise,return the function value of j-th sample in i-th machine.'''

        if split == 'train':
            X = self.X_train
            Y = self.Y_train
        elif split == 'test':
            if w.ndim > 1 or i is not None or j is not None:
                log.fatal(
                    "Function value on test set only applies to one parameter vector"
                )
            X = self.X_test
            Y = self.Y_test

        if i is None:  # Return the function value
            tmp = X.dot(w)
            return -xp.sum(
                (Y - 1) * tmp - xp.log1p(xp.exp(-tmp))) / X.shape[0] + xp.sum(
                    w**2) * self.LAMBDA / 2

        elif j is None:  # Return the function value in machine i
            tmp = self.X[i].dot(w)
            return -xp.sum((self.Y[i] - 1) * tmp - xp.log1p(xp.exp(-tmp))
                           ) / self.m + xp.sum(w**2) * self.LAMBDA / 2
        else:  # Return the gradient of sample j in machine i
            tmp = self.X[i][j].dot(w)
            return -((self.Y[i][j] - 1) * tmp -
                     xp.log1p(xp.exp(-tmp))) + xp.sum(w**2) * self.LAMBDA / 2
Ejemplo n.º 3
0
    def generate_graph(self, graph_type='expander', params=None):
        '''Generate connected connectivity graph according to the params.'''

        if graph_type == 'expander':
            G = nx.paley_graph(self.n_agent).to_undirected()
        elif graph_type == 'grid':
            G = nx.grid_2d_graph(*params)
        elif graph_type == 'cycle':
            G = nx.cycle_graph(self.n_agent)
        elif graph_type == 'path':
            G = nx.path_graph(self.n_agent)
        elif graph_type == 'star':
            G = nx.star_graph(self.n_agent - 1)
        elif graph_type == 'er':
            if params < 2 / (self.n_agent - 1):
                log.fatal(
                    "Need higher probability to create a connected E-R graph!")
            G = None
            while G is None or nx.is_connected(G) is False:
                G = nx.erdos_renyi_graph(self.n_agent, params)
        else:
            log.fatal('Graph type %s not supported' % graph_type)

        self.n_edges = G.number_of_edges()
        self.G = G
Ejemplo n.º 4
0
    def grad_h(self, w, i=None, j=None):
        '''Gradient of h(x) at w. Depending on the shape of w and parameters i and j, this function behaves differently:
        1. If w is a vector of shape (dim,)
            1.1 If i is None and j is None
                returns the full gradient.
            1.2 If i is not None and j is None
                returns the gradient at the i-th agent.
            1.3 If i is None and j is not None
                returns the i-th gradient of all training data.
            1.4 If i is not None and j is not None
                returns the gradient of the j-th data sample at the i-th agent.
            Note i, j can be integers, lists or vectors.
        2. If w is a matrix of shape (dim, n_agent)
            2.1 if j is None
                returns the gradient of each parameter at the corresponding agent
            2.2 if j is not None
                returns the gradient of each parameter of the j-th sample at the corresponding agent.
            Note j can be lists of lists or vectors.
        '''

        if w.ndim == 1:
            if type(j) is int:
                j = [j]
            if i is None and j is None:  # Return the full gradient
                return self.X_train.T.dot(
                    logit_1d(self.X_train, w) -
                    self.Y_train) / self.m_total + w * self.LAMBDA
            elif i is not None and j is None:
                return self.X[i].T.dot(logit_1d(self.X[i], w) -
                                       self.Y[i]) / self.m + w * self.LAMBDA
            elif i is None and j is not None:  # Return the full gradient
                return self.X_train[j].T.dot(
                    logit_1d(self.X_train[j], w) -
                    self.Y_train[j]) / len(j) + w * self.LAMBDA
            else:  # Return the gradient of sample j at machine i
                return (logit_1d(self.X[i][j], w) - self.Y[i][j]).dot(
                    self.X[i][j]) / len(j) + w * self.LAMBDA

        elif w.ndim == 2:
            if i is None and j is None:  # Return the distributed gradient
                tmp = logit_2d(self.X, w) - self.Y
                return xp.einsum('ikj,ik->ji', self.X,
                                 tmp) / self.m + w * self.LAMBDA
            elif i is None and j is not None:  # Return the stochastic gradient
                res = []
                for i in range(self.n_agent):
                    if type(j[i]) is int:
                        samples = [j[i]]
                    else:
                        samples = j[i]
                    res.append(self.X[i][samples].T.dot(
                        logit_1d(self.X[i][samples], w[:, i]) -
                        self.Y[i][samples]) / len(samples) +
                               w[:, i] * self.LAMBDA)
                return xp.array(res).T
            else:
                log.fatal('For distributed gradients j must be None')
        else:
            log.fatal('Parameter dimension should only be 1 or 2')
Ejemplo n.º 5
0
 def split_data(self, X):
     '''Helper function to split data according to the number of training samples per agent.'''
     if self.m * self.n_agent != len(X):
         log.fatal('Data cannot be distributed equally to %d agents' %
                   self.n_agent)
     if X.ndim == 1:
         return X.reshape(self.n_agent, -1)
     else:
         return X.reshape(self.n_agent, self.m, -1)
Ejemplo n.º 6
0
    def grad_h(self, w, i=None, j=None, split='train'):
        '''Gradient of h(x) at w. Depending on the shape of w and parameters i and j, this function behaves differently:
        1. If w is a vector of shape (dim,)
            1.1 If i is None and j is None
                returns the full gradient.
            1.2 If i is not None and j is None
                returns the gradient at the i-th agent.
            1.3 If i is None and j is not None
                returns the i-th gradient of all training data.
            1.4 If i is not None and j is not None
                returns the gradient of the j-th data sample at the i-th agent.
            Note i, j can be integers, lists or vectors.
        2. If w is a matrix of shape (dim, n_agent)
            2.1 if j is None
                returns the gradient of each parameter at the corresponding agent
            2.2 if j is not None
                returns the gradient of each parameter of the j-th sample at the corresponding agent.
            Note j can be lists of lists or vectors.
        '''

        if w.ndim == 1:
            if type(j) is int:
                j = [j]

            if i is None and j is None:  # Return the full gradient
                return self.H.dot(w) - self.X_T_Y
            elif i is not None and j is None:  # Return the local gradient
                return self.H_list[i].dot(w) - self.X_T_Y_list[i]
            elif i is None and j is not None:  # Return the stochastic gradient
                return (self.X_train[j].dot(w) - self.Y_train[j]).dot(
                    self.X_train[j]) / len(j)
            else:  # Return the stochastic gradient
                return (self.X[i][j].dot(w) - self.Y[i][j]).dot(
                    self.X[i][j]) / len(j)

        elif w.ndim == 2:
            if i is None and j is None:  # Return the distributed gradient
                return xp.einsum('ijk,ki->ji', self.H_list,
                                 w) - self.X_T_Y_list.T
            elif i is None and j is not None:  # Return the stochastic gradient
                res = []
                for i in range(self.n_agent):
                    if type(j[i]) is int:
                        samples = [j[i]]
                    else:
                        samples = j[i]
                    res.append((self.X[i][samples].dot(w[:, i]) -
                                self.Y[i][samples]).dot(self.X[i][samples]) /
                               len(samples))
                return xp.array(res).T
            else:
                log.fatal('For distributed gradients j must be None')
        else:
            log.fatal('Parameter dimension should only be 1 or 2')
Ejemplo n.º 7
0
    def h(self, w, i=None, j=None, split='train'):
        '''Function value of h(x) at w. If i is None, returns h(x); if i is not None but j is, returns the function value at the i-th machine; otherwise,return the function value of j-th sample at the i-th machine.'''

        if i is None and j is None:  # Return the function value
            Z = xp.sqrt(2 * self.m_total)
            return xp.sum((self.Y_train / Z - (self.X_train / Z).dot(w))**2)
        elif i is not None and j is None:  # Return the function value at machine i
            return xp.sum((self.Y[i] - self.X[i].dot(w))**2) / 2 / self.m
        elif i is not None and j is not None:  # Return the function value of sample j at machine i
            return xp.sum((self.Y[i][j] - self.X[i][j].dot(w))**2) / 2
        else:
            log.fatal('When i is None, j mush be None')
Ejemplo n.º 8
0
    def accuracy(self, w, split='train'):
        if len(w.shape) > 1:
            w = w.mean(axis=1)
        if split == 'train':
            X = self.X_train
            Y = self.Y_train
        elif split == 'test':
            X = self.X_test
            Y = self.Y_test
        else:
            log.fatal('Data split %s is not supported' % split)

        Y_hat = X.dot(w)
        Y_hat[Y_hat > 0] = 1
        Y_hat[Y_hat <= 0] = 0
        return np.mean(Y_hat == Y)
Ejemplo n.º 9
0
    def accuracy(self, w, split='test'):
        if w.ndim > 1:
            w = w.mean(axis=1)
        if split == 'train':
            X = self.X_train
            Y = self.Y_train
            labels = self.Y_train_labels
        elif split == 'test':
            X = self.X_test
            Y = self.Y_test
            labels = self.Y_test_labels
        else:
            log.fatal('Data split %s is not supported' % split)

        loss, _, A2 = self.forward(X, Y, w)
        pred = A2.argmax(axis=1)

        return sum(pred == labels) / len(pred), loss
Ejemplo n.º 10
0
    def h(self, w, i=None, j=None, split='train'):
        '''Function value at w. If i is None, returns f(x); if i is not None but j is, returns the function value in the i-th machine; otherwise,return the function value of j-th sample in i-th machine.'''

        if split == 'train':
            X = self.X_train
            Y = self.Y_train
        elif split == 'test':
            if w.ndim > 1 or i is not None or j is not None:
                log.fatal(
                    "Function value on test set only applies to one parameter vector"
                )
            X = self.X_test
            Y = self.Y_test

        if i is None and j is None:  # Return the function value
            return self.forward(X, Y, w)[0]
        elif i is not None and j is None:  # Return the function value at machine i
            return self.forward(self.X[i], self.Y[i], w)[0]
        else:  # Return the function value at machine i
            if type(j) is int:
                j = [j]
            return self.forward(self.X[i][j], self.Y[i][j], w)[0]