Ejemplo n.º 1
0
def grad(X, Y, act, params, grads, aux):

    H, bh = params
    _H, _bh = grads
    a, eh, loss = aux

    # forward pass
    a[0].assign(X)
    n_layers = len(eh)

    for i in range(n_layers):
        # a = sigmoid( ap*H + bh )
        a[i].dot(H[i], target=a[i + 1])
        a[i + 1].add_row_vec(bh[i])

        if i < n_layers - 1:
            cm.sigmoid(a[i + 1])
        else:
            # last layer
            if act == 'logistic':
                cm.sigmoid(a[i + 1])
            elif act == 'softmax':
                a_t = a[i + 1].transpose()
                cm.softmax(a_t)
                a_t.transpose(target=a[i + 1])
                a_t.free_device_memory()
            else:
                pass

    # backward pass

    # compute error term of the last layer
    a[-1].subtract(Y, target=eh[-1])

    # check the following
    for i in range(n_layers - 1, -1, -1):

        # compute derivatives
        _H[i].assign(0.0)
        _H[i].add_dot(a[i].T, eh[i])
        eh[i].sum(axis=0, target=_bh[i])

        # compute error term for the previous layer
        if i > 0:
            # eh = sigmoid'(a) x ( ehp*H' )
            eh[i].dot(H[i].T, target=eh[i - 1])
            eh[i - 1].apply_logistic_deriv(a[i])

    if act == 'logistic':
        cm.cross_entropy_bernoulli(Y, a[n_layers], target=loss)
    elif act == 'softmax':
        loss = cm.cross_entropy(Y, a[n_layers], target=loss)
    elif act == 'linear':
        a[-1].mult(a[-1], target=loss)

    return loss.sum()
Ejemplo n.º 2
0
def grad(X, Y, act, params, grads, aux):

    H, bh = params
    _H, _bh = grads
    a, eh, loss = aux

    # forward pass
    a[0].assign(X)
    n_layers = len(eh)

    for i in range(n_layers):
        # a = sigmoid( ap*H + bh )
        a[i].dot(H[i], target = a[i+1])
        a[i+1].add_row_vec(bh[i])

        if i < n_layers-1:
            cm.sigmoid(a[i+1])
        else:
            # last layer
            if act == 'logistic':
                cm.sigmoid(a[i+1])
            elif act == 'softmax':
                a_t = a[i+1].transpose()
                cm.softmax(a_t)
                a_t.transpose(target=a[i+1])
                a_t.free_device_memory()
            else:
                pass

    # backward pass

    # compute error term of the last layer
    a[-1].subtract(Y, target=eh[-1])

    # check the following
    for i in range(n_layers-1, -1, -1):

        # compute derivatives
        _H[i].assign(0.0)
        _H[i].add_dot(a[i].T, eh[i])
        eh[i].sum(axis=0, target=_bh[i])

        # compute error term for the previous layer
        if i > 0:
            # eh = sigmoid'(a) x ( ehp*H' )
            eh[i].dot(H[i].T, target=eh[i-1])
            eh[i-1].apply_logistic_deriv(a[i])

    if act == 'logistic':
        cm.cross_entropy_bernoulli(Y, a[n_layers], target=loss)
    elif act == 'softmax':
        loss = cm.cross_entropy(Y, a[n_layers], target=loss)
    elif act == 'linear':
        a[-1].mult(a[-1], target=loss)

    return loss.sum()
Ejemplo n.º 3
0
def grad(X, Y, act_type, rho, params, grads, aux):

    H, O, bh, bo = params
    _H, _O, _bh, _bo = grads

    a, z, eh, eo, loss, s, s_m = aux

    _H.assign(0.0)
    _O.assign(0.0)
    _bh.assign(0.0)
    _bo.assign(0.0)

    # watch out for the redundand accumulations

    ### FORWARD PASS ###

    # a = tanh( x*H + bh )

    X.dot(H, target=a)
    a.add_row_vec(bh)
    cm.sigmoid(a)

    # b = sigm( a*O + bo )
    #a.dot(H.T, target=z)   # use tyied weights

    a.dot(O, target=z)
    z.add_row_vec(bo)

    if act_type == 'logistic':
        cm.sigmoid(z)          # DEBUG

    ### BACKWARD PASS ###

    # eo = z - y

    z.subtract(Y, target=eo)

    # eh = sigmoid'(a) x ( eo * O + (rho-1)/(s-1) - rho/s )

    eo.dot(O.T, target = eh)

    # the following needs to be verified
    if rho > 0:
        a.sum(axis=0, target=s)
        s.mult(1.0/a.shape[0])        # normalize by batch_size
        s.reciprocal()
        s.mult(rho)

        a.sum(axis=0, target=s_m)   # TODO: remove this redundancy
        s_m.mult(1.0/a.shape[0])        # normalize by batch_size
        s_m.subtract(1.0)
        s_m.reciprocal()
        s_m.mult(rho-1)
        s.subtract(s_m)

        eh.add_row_mult(s, -1.0)
    
    eh.apply_logistic_deriv(a)

    ### COMPUTE GRADIENTS ###

    _O.add_dot(a.T, eo)
    _H.add_dot(X.T, eh)

    _bo.add_sums(eo, axis=0)
    _bh.add_sums(eh, axis=0)

    ### COMPUTE ERROR ###
    if act_type == 'logistic':
        cm.cross_entropy_bernoulli(Y, z, target=loss)
    elif act_type == 'linear':
        eo.mult(eo, target=loss) #loss.add_mult(eo, eo)   # DEBUG
    else:
        raise ValueError("Activation function '%s' is unknown" % args.act_type)

    err = loss.sum()

    return err
Ejemplo n.º 4
0
  def GetLoss(self, get_deriv=False):
    """Compute loss and also deriv w.r.t to it if asked for.

    Compute the loss function. Targets should be in self.data, predictions
    should be in self.state.
    Args:
      get_deriv: If True, compute the derivative w.r.t the loss function and put
        it in self.deriv.
    """
    perf = deepnet_pb2.Metrics()
    perf.MergeFrom(self.proto.performance_stats)
    perf.count = self.batchsize
    tiny = self.tiny
    if self.loss_function == deepnet_pb2.Layer.CROSS_ENTROPY:
      if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
        data = self.data
        state = self.state
        deriv = self.deriv
        temp3 = self.dimsize
        unitcell = self.unitcell
 
        cm.cross_entropy_bernoulli(data, state, target=deriv, tiny=self.tiny)
        deriv.sum(axis=1, target=temp3)
        temp3.sum(axis=0, target=unitcell)
        cross_entropy = unitcell.euclid_norm()

        cm.correct_preds(data, state, target=deriv, cutoff=0.5)
        deriv.sum(axis=1, target=temp3)
        temp3.sum(axis=0, target=unitcell)
        correct_preds = unitcell.euclid_norm()

        if get_deriv:
          self.state.subtract(self.data, target=self.deriv)

        perf.cross_entropy = cross_entropy
        perf.correct_preds = correct_preds
      elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
        temp2 = self.temp2
        temp = self.temp
        batchsize = self.batchsize
        dimensions = self.dimensions
        numlabels = self.numlabels
        state = self.state
        data = self.data
        unitcell = self.unitcell
        indices = self.indices

        # Optimized for space to handle large number of labels in a softmax.
        data.reshape((1, batchsize * dimensions))
        data.add(self.rowshift, target=indices)
        state.reshape((numlabels, dimensions * batchsize))
        state.max(axis=0, target=temp2)
        state.reshape((1, batchsize * numlabels * dimensions))
        state.select_columns(indices, temp)
        temp2.subtract(temp)
        temp2.sign(target=temp2)
        temp2.sum(axis=1, target=unitcell)
        correct_preds = batchsize - unitcell.euclid_norm()
        if get_deriv:
          temp.subtract(1, target=temp2)
          state.set_selected_columns(indices, temp2)
          state.reshape((numlabels * dimensions, batchsize))
          self.deriv.assign(self.state)
        state.reshape((numlabels * dimensions, batchsize))
        temp.add(tiny)
        cm.log(temp)
        temp.sum(axis=1, target=unitcell)
        cross_entropy = unitcell.euclid_norm()
        perf.cross_entropy = cross_entropy
        perf.correct_preds = correct_preds
    elif self.loss_function == deepnet_pb2.Layer.SQUARED_LOSS:
      if self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
        if self.hyperparams.normalize_error:
          self.data.sum(axis=0, target=self.temp)
          self.temp.add(self.tiny)
          self.data.div_by_row(self.temp, target=self.deriv)
          self.state.div_by_row(self.NN, target=self.expanded_batch)
          self.deriv.subtract(self.expanded_batch)
        else:
          self.data.sum(axis=0, target=self.temp)
          self.temp.add(self.tiny)
          self.state.div_by_row(self.temp, target=self.deriv)
          self.deriv.subtract(self.data)
      elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
        self.expansion_matrix.select_columns(self.data, target=self.expanded_batch)
        self.state.subtract(self.expanded_batch, target=self.deriv)
      else:
        if 'precision' in self.params:
          self.data.mult_by_col(self.params['precision'], target=self.deriv)
          self.deriv.subtract(self.state)
        else:
          self.state.subtract(self.data, target=self.deriv)
      error = self.deriv.euclid_norm()**2
      perf.error = error
      if self.activation != deepnet_pb2.Hyperparams.SOFTMAX and \
         self.activation != deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
        self.ComputeDeriv()
    else:
      raise Exception('Unknown loss function.')
    return perf