Exemple #1
0
 def ApplyActivation(self):
     state = self.state
     if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
         cm.sigmoid(state)
     elif self.activation == deepnet_pb2.Hyperparams.TANH:
         cm.tanh(state)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR:
         state.greater_than(0, target=self.temp)
         state.mult(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR_SMOOTH:
         cm.log_1_plus_exp(state)
     elif self.activation == deepnet_pb2.Hyperparams.LINEAR:
         pass
     elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.temp.reciprocal()
         state.mult_by_row(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.NN.divide(self.temp, target=self.temp)
         state.mult_by_row(self.temp)
     else:
         raise Exception('Unknown activation')
Exemple #2
0
 def ApplyActivation(self):
     state = self.state
     if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
         cm.sigmoid(state)
     elif self.activation == deepnet_pb2.Hyperparams.TANH:
         cm.tanh(state)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR:
         state.greater_than(0, target=self.temp)
         state.mult(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR_SMOOTH:
         cm.log_1_plus_exp(state)
     elif self.activation == deepnet_pb2.Hyperparams.LINEAR:
         pass
     elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.temp.reciprocal()
         state.mult_by_row(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.NN.divide(self.temp, target=self.temp)
         state.mult_by_row(self.temp)
     else:
         raise Exception("Unknown activation")
Exemple #3
0
def grad(X, Y, act, params, grads, aux):

    H, bh = params
    _H, _bh = grads
    a, eh, loss = aux

    # forward pass
    a[0].assign(X)
    n_layers = len(eh)

    for i in range(n_layers):
        # a = sigmoid( ap*H + bh )
        a[i].dot(H[i], target=a[i + 1])
        a[i + 1].add_row_vec(bh[i])

        if i < n_layers - 1:
            cm.sigmoid(a[i + 1])
        else:
            # last layer
            if act == 'logistic':
                cm.sigmoid(a[i + 1])
            elif act == 'softmax':
                a_t = a[i + 1].transpose()
                cm.softmax(a_t)
                a_t.transpose(target=a[i + 1])
                a_t.free_device_memory()
            else:
                pass

    # backward pass

    # compute error term of the last layer
    a[-1].subtract(Y, target=eh[-1])

    # check the following
    for i in range(n_layers - 1, -1, -1):

        # compute derivatives
        _H[i].assign(0.0)
        _H[i].add_dot(a[i].T, eh[i])
        eh[i].sum(axis=0, target=_bh[i])

        # compute error term for the previous layer
        if i > 0:
            # eh = sigmoid'(a) x ( ehp*H' )
            eh[i].dot(H[i].T, target=eh[i - 1])
            eh[i - 1].apply_logistic_deriv(a[i])

    if act == 'logistic':
        cm.cross_entropy_bernoulli(Y, a[n_layers], target=loss)
    elif act == 'softmax':
        loss = cm.cross_entropy(Y, a[n_layers], target=loss)
    elif act == 'linear':
        a[-1].mult(a[-1], target=loss)

    return loss.sum()
Exemple #4
0
def grad(X, Y, act, params, grads, aux):

    H, bh = params
    _H, _bh = grads
    a, eh, loss = aux

    # forward pass
    a[0].assign(X)
    n_layers = len(eh)

    for i in range(n_layers):
        # a = sigmoid( ap*H + bh )
        a[i].dot(H[i], target = a[i+1])
        a[i+1].add_row_vec(bh[i])

        if i < n_layers-1:
            cm.sigmoid(a[i+1])
        else:
            # last layer
            if act == 'logistic':
                cm.sigmoid(a[i+1])
            elif act == 'softmax':
                a_t = a[i+1].transpose()
                cm.softmax(a_t)
                a_t.transpose(target=a[i+1])
                a_t.free_device_memory()
            else:
                pass

    # backward pass

    # compute error term of the last layer
    a[-1].subtract(Y, target=eh[-1])

    # check the following
    for i in range(n_layers-1, -1, -1):

        # compute derivatives
        _H[i].assign(0.0)
        _H[i].add_dot(a[i].T, eh[i])
        eh[i].sum(axis=0, target=_bh[i])

        # compute error term for the previous layer
        if i > 0:
            # eh = sigmoid'(a) x ( ehp*H' )
            eh[i].dot(H[i].T, target=eh[i-1])
            eh[i-1].apply_logistic_deriv(a[i])

    if act == 'logistic':
        cm.cross_entropy_bernoulli(Y, a[n_layers], target=loss)
    elif act == 'softmax':
        loss = cm.cross_entropy(Y, a[n_layers], target=loss)
    elif act == 'linear':
        a[-1].mult(a[-1], target=loss)

    return loss.sum()
def activate(X, params, a):

    batch_size = X.shape[0]
    H, O, bh, bo = params

    # a = f( x*H + bh )

    X.dot(H, target=a)
    a.add_row_vec(bh)
    cm.sigmoid(a)

    return a
def grad(X, Y, act_type, rho, params, grads, aux):

    H, O, bh, bo = params
    _H, _O, _bh, _bo = grads

    a, z, eh, eo, loss, s, s_m = aux

    _H.assign(0.0)
    _O.assign(0.0)
    _bh.assign(0.0)
    _bo.assign(0.0)

    # watch out for the redundand accumulations

    ### FORWARD PASS ###

    # a = tanh( x*H + bh )

    X.dot(H, target=a)
    a.add_row_vec(bh)
    cm.sigmoid(a)

    # b = sigm( a*O + bo )
    #a.dot(H.T, target=z)   # use tyied weights

    a.dot(O, target=z)
    z.add_row_vec(bo)

    if act_type == 'logistic':
        cm.sigmoid(z)          # DEBUG

    ### BACKWARD PASS ###

    # eo = z - y

    z.subtract(Y, target=eo)

    # eh = sigmoid'(a) x ( eo * O + (rho-1)/(s-1) - rho/s )

    eo.dot(O.T, target = eh)

    # the following needs to be verified
    if rho > 0:
        a.sum(axis=0, target=s)
        s.mult(1.0/a.shape[0])        # normalize by batch_size
        s.reciprocal()
        s.mult(rho)

        a.sum(axis=0, target=s_m)   # TODO: remove this redundancy
        s_m.mult(1.0/a.shape[0])        # normalize by batch_size
        s_m.subtract(1.0)
        s_m.reciprocal()
        s_m.mult(rho-1)
        s.subtract(s_m)

        eh.add_row_mult(s, -1.0)
    
    eh.apply_logistic_deriv(a)

    ### COMPUTE GRADIENTS ###

    _O.add_dot(a.T, eo)
    _H.add_dot(X.T, eh)

    _bo.add_sums(eo, axis=0)
    _bh.add_sums(eh, axis=0)

    ### COMPUTE ERROR ###
    if act_type == 'logistic':
        cm.cross_entropy_bernoulli(Y, z, target=loss)
    elif act_type == 'linear':
        eo.mult(eo, target=loss) #loss.add_mult(eo, eo)   # DEBUG
    else:
        raise ValueError("Activation function '%s' is unknown" % args.act_type)

    err = loss.sum()

    return err