Example #1
0
   def update_learner(self,example):
      self.layers[0][:] = example[0]

      # fprop
      for h in range(self.n_hidden_layers):
         mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1])
         self.layer_acts[h+1] += self.cs[h]
         mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1])

      mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act)
      self.output_act += self.d
      mlnonlin.softmax(self.output_act,self.output)

      self.doutput_act[:] = self.output
      self.doutput_act[example[1]] -= 1
      self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)

      self.dd[:] = self.doutput_act
      mllin.outer(self.doutput_act,self.layers[-1],self.dU)      
      mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1])
      mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      for h in range(self.n_hidden_layers-1,-1,-1):
         self.dcs[h][:] = self.dlayer_acts[h+1]
         mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h])
         mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h])
         mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h])

      self.U -= self.dU
      self.d -= self.dd
      for h in range(self.n_hidden_layers-1,-1,-1):
         self.Ws[h] -= self.dWs[h]
         self.cs[h] -= self.dcs[h]

      self.n_updates += 1
Example #2
0
    def update_learner(self, example):
        self.input[self.input_order] = example

        # fprop
        np.multiply(self.input, self.W, self.input_times_W)
        np.add.accumulate(self.input_times_W[:, :-1],
                          axis=1,
                          out=self.acc_input_times_W[:, 1:])
        self.acc_input_times_W[:, 0] = 0
        self.acc_input_times_W += self.c[:, np.newaxis]
        mlnonlin.sigmoid(self.acc_input_times_W, self.hid)

        if self.untied_weights:
            np.multiply(self.hid, self.V, self.Whid)
        else:
            np.multiply(self.hid, self.W, self.Whid)

        mllin.sum_columns(self.Whid, self.recact)
        self.recact += self.b
        mlnonlin.sigmoid(self.recact, self.rec)

        # bprop
        np.subtract(self.rec, self.input, self.drec)
        self.drec *= self.alpha
        self.db[:] = self.drec

        if self.untied_weights:
            np.multiply(self.drec, self.hid, self.dV)
            np.multiply(self.drec, self.V, self.dhid)
            self.dW[:] = 0
        else:
            np.multiply(self.drec, self.hid, self.dW)
            np.multiply(self.drec, self.W, self.dhid)

        mlnonlin.dsigmoid(self.hid, self.dhid, self.dacc_input_times_W)
        mllin.sum_rows(self.dacc_input_times_W, self.dc)
        np.add.accumulate(self.dacc_input_times_W[:, :0:-1],
                          axis=1,
                          out=self.dWenc[:, -2::-1])
        self.dWenc[:, -1] = 0
        self.dWenc *= self.input
        self.dW += self.dWenc

        self.dW *= self.learning_rate / (
            1. + self.decrease_constant * self.n_updates)
        self.db *= self.learning_rate / (
            1. + self.decrease_constant * self.n_updates)
        self.dc *= self.learning_rate / (
            1. + self.decrease_constant * self.n_updates)

        self.W -= self.dW
        self.b -= self.db
        self.c -= self.dc

        if self.untied_weights:
            self.dV *= self.learning_rate / (
                1. + self.decrease_constant * self.n_updates)
            self.V -= self.dV
        self.n_updates += 1
Example #3
0
   def bprop(self,target):
       """
       Computes the loss derivatives with respect to all parameters
       times the current learning rate.  It assumes that
       ``self.fprop(input)`` was called first. All the derivatives are
       put in their corresponding object attributes (i.e. ``self.d*``).
       """
       self.doutput_act[:] = self.output
       self.doutput_act[target] -= 1
       self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)
 
       self.dd[:] = self.doutput_act
       for k in range(self.n_k_means):
           c = self.cluster_indices[k]
           idx = c + k*self.n_clusters
 
           mllin.outer(self.doutput_act,self.layers[k],self.dVs[idx])
           mllin.product_matrix_vector(self.Vs[idx].T,self.doutput_act,self.dlayers[k])
           #mlnonlin.dsigmoid(self.layers[k],self.dlayers[k],self.dlayer_acts[k])
           if self.activation_function == 'sigmoid':
               mlnonlin.dsigmoid(self.layers[k],self.dlayers[k],self.dlayer_acts[k])
           elif self.activation_function == 'tanh':
               mlnonlin.dtanh(self.layers[k],self.dlayers[k],self.dlayer_acts[k])
           elif self.activation_function == 'reclin':
               mlnonlin.dreclin(self.layers[k],self.dlayers[k],self.dlayer_acts[k])
           else:
               raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

           self.dcs[idx][:] = self.dlayer_acts[k]
           mllin.outer(self.dlayer_acts[k],self.input,self.dWs[idx])

       if self.autoencoder_regularization != 0:
           self.dae_doutput_act[:] = self.dae_output
           self.dae_doutput_act[:] -= self.input
           self.dae_doutput_act *= 2*self.autoencoder_regularization*self.learning_rate/(1.+self.decrease_constant*self.n_updates)
           
           self.dae_dd[:] = self.dae_doutput_act
           for k in range(self.n_k_means):
               c = self.cluster_indices[k]
               idx = c + k*self.n_clusters
           
               mllin.outer(self.dae_doutput_act,self.dae_layers[k],self.dae_dWsT[idx])
               self.dWs[idx] += self.dae_dWsT[idx].T
               mllin.product_matrix_vector(self.Ws[idx],self.dae_doutput_act,self.dae_dlayers[k])
               #mlnonlin.dsigmoid(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k])
               if self.activation_function == 'sigmoid':
                   mlnonlin.dsigmoid(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k])     
               elif self.activation_function == 'tanh':
                   mlnonlin.dtanh(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k])     
               elif self.activation_function == 'reclin':
                   mlnonlin.dreclin(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k])     
               else:
                   raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

               self.dcs[idx] += self.dae_dlayer_acts[k]
               mllin.outer(self.dae_dlayer_acts[k],self.dae_input,self.dae_dWs[idx])
               self.dWs[idx] += self.dae_dWs[idx]               
Example #4
0
   def update_learner(self,example):
      self.layers[0][:] = example[0]

      # fprop
      for h in range(self.n_hidden_layers):
         mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1])
         self.layer_acts[h+1] += self.cs[h]
         if self.activation_function == 'sigmoid':
             mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1])
         elif self.activation_function == 'tanh':
             mlnonlin.tanh(self.layer_acts[h+1],self.layers[h+1])
         elif self.activation_function == 'reclin':
             mlnonlin.reclin(self.layer_acts[h+1],self.layers[h+1])
         else:
             raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act)
      self.output_act += self.d
      mlnonlin.softmax(self.output_act,self.output)

      self.doutput_act[:] = self.output
      self.doutput_act[example[1]] -= 1
      self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)

      self.dd[:] = self.doutput_act
      mllin.outer(self.doutput_act,self.layers[-1],self.dU)      
      mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1])
      if self.activation_function == 'sigmoid':
          mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      elif self.activation_function == 'tanh':
          mlnonlin.dtanh(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      elif self.activation_function == 'reclin':
          mlnonlin.dreclin(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      else:
          raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      for h in range(self.n_hidden_layers-1,-1,-1):
         self.dcs[h][:] = self.dlayer_acts[h+1]
         mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h])
         mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h])
         if self.activation_function == 'sigmoid':
             mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h])
         elif self.activation_function == 'tanh':
             mlnonlin.dtanh(self.layers[h],self.dlayers[h],self.dlayer_acts[h])
         elif self.activation_function == 'reclin':
             mlnonlin.dreclin(self.layers[h],self.dlayers[h],self.dlayer_acts[h])
         else:
             raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      self.U -= self.dU
      self.d -= self.dd
      for h in range(self.n_hidden_layers-1,-1,-1):
         self.Ws[h] -= self.dWs[h]
         self.cs[h] -= self.dcs[h]

      self.n_updates += 1
Example #5
0
def test_dsigmoid():
    """
    Testing nonlinear sigmoid deriv.
    """

    input = np.random.randn(30, 20)
    output = np.zeros((30, 20))
    nonlinear.sigmoid(input, output)
    dinput = np.zeros((30, 20))
    doutput = np.random.randn(30, 20)
    nonlinear.dsigmoid(output, doutput, dinput)
    assert np.sum(np.abs(dinput - doutput * output * (1 - output))) < 1e-12
Example #6
0
def test_dsigmoid():
    """
    Testing nonlinear sigmoid deriv.
    """
    
    input = np.random.randn(30,20)
    output = np.zeros((30,20))
    nonlinear.sigmoid(input,output)
    dinput = np.zeros((30,20))
    doutput = np.random.randn(30,20)
    nonlinear.dsigmoid(output,doutput,dinput)
    assert np.sum(np.abs(dinput-doutput*output*(1-output))) < 1e-12
Example #7
0
   def update_learner(self,example):
      self.input[self.input_order] = example
   
      # fprop
      np.multiply(self.input,self.W,self.input_times_W)
      np.add.accumulate(self.input_times_W[:,:-1],axis=1,out=self.acc_input_times_W[:,1:])
      self.acc_input_times_W[:,0] = 0
      self.acc_input_times_W += self.c[:,np.newaxis]
      mlnonlin.sigmoid(self.acc_input_times_W,self.hid)

      if self.untied_weights:
          np.multiply(self.hid,self.V,self.Whid)
      else:
          np.multiply(self.hid,self.W,self.Whid)

      mllin.sum_columns(self.Whid,self.recact)
      self.recact += self.b
      mlnonlin.sigmoid(self.recact,self.rec)

      # bprop
      np.subtract(self.rec,self.input,self.drec)
      self.drec *= self.alpha
      self.db[:] = self.drec

      if self.untied_weights:
          np.multiply(self.drec,self.hid,self.dV)
          np.multiply(self.drec,self.V,self.dhid)
          self.dW[:] = 0
      else:
          np.multiply(self.drec,self.hid,self.dW)
          np.multiply(self.drec,self.W,self.dhid)

      mlnonlin.dsigmoid(self.hid,self.dhid,self.dacc_input_times_W)
      mllin.sum_rows(self.dacc_input_times_W,self.dc)      
      np.add.accumulate(self.dacc_input_times_W[:,:0:-1],axis=1,out=self.dWenc[:,-2::-1])
      self.dWenc[:,-1] = 0
      self.dWenc *= self.input
      self.dW += self.dWenc

      self.dW *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)
      self.db *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)
      self.dc *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)

      self.W -= self.dW
      self.b -= self.db
      self.c -= self.dc

      if self.untied_weights:
          self.dV *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)
          self.V -= self.dV
      self.n_updates += 1
Example #8
0
    def apply_dactivation(self, output, doutput, dinput):
        """
        Apply the derivative of the activatiun fonction
	"""
        if self.activation_function == "sigmoid":
            mlnonlin.dsigmoid(output, doutput, dinput)
        elif self.activation_function == "tanh":
            mlnonlin.dtanh(output, doutput, dinput)
        elif self.activation_function == "reclin":
            mlnonlin.dreclin(output, doutput, dinput)
        elif self.activation_function == "softmax":
            dinput[:] = output * (doutput - (doutput * output).sum(axis=1).reshape((-1, 1)))
        else:
            raise ValueError("activation_function must be either 'sigmoid', 'tanh' or 'reclin'")
Example #9
0
    def apply_dactivation(self, output, doutput, dinput):
        """
        Apply the derivative of the activatiun fonction
	"""
        if self.activation_function == 'sigmoid':
            mlnonlin.dsigmoid(output,doutput,dinput)
        elif self.activation_function == 'tanh':
            mlnonlin.dtanh(output,doutput,dinput)
        elif self.activation_function == 'reclin':
            mlnonlin.dreclin(output,doutput,dinput)
        elif self.activation_function == 'softmax':
            dinput[:] = output*(doutput-(doutput*output).sum(axis=1).reshape((-1,1)))
	else:
            raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')
    def update_learner(self, example):
        self.layers[0][:] = example[0]

        # fprop
        for h in range(self.n_hidden_layers):
            mllin.product_matrix_vector(self.Ws[h], self.layers[h],
                                        self.layer_acts[h + 1])
            self.layer_acts[h + 1] += self.cs[h]
            mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1])

        mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act)
        self.output_act += self.d
        mlnonlin.softmax(self.output_act, self.output)

        self.doutput_act[:] = self.output
        self.doutput_act[example[1]] -= 1
        self.doutput_act *= self.learning_rate / (
            1. + self.decrease_constant * self.n_updates)

        self.dd[:] = self.doutput_act
        mllin.outer(self.doutput_act, self.layers[-1], self.dU)
        mllin.product_matrix_vector(self.U.T, self.doutput_act,
                                    self.dlayers[-1])
        mlnonlin.dsigmoid(self.layers[-1], self.dlayers[-1],
                          self.dlayer_acts[-1])
        for h in range(self.n_hidden_layers - 1, -1, -1):
            self.dcs[h][:] = self.dlayer_acts[h + 1]
            mllin.outer(self.dlayer_acts[h + 1], self.layers[h], self.dWs[h])
            mllin.product_matrix_vector(self.Ws[h].T, self.dlayer_acts[h + 1],
                                        self.dlayers[h])
            mlnonlin.dsigmoid(self.layers[h], self.dlayers[h],
                              self.dlayer_acts[h])

        self.U -= self.dU
        self.d -= self.dd
        for h in range(self.n_hidden_layers - 1, -1, -1):
            self.Ws[h] -= self.dWs[h]
            self.cs[h] -= self.dcs[h]

        self.n_updates += 1
Example #11
0
    def update_learner(self, vec_input):
        
        self.vec_input[self.input_order] = vec_input

        #fprop
        self.fprop()

        #bprob, computing gradient of -log p(vec_input)
        np.subtract(self.vec_recProb,self.vec_input,self.vec_grad_bias_inp)
        np.multiply(self.vec_grad_bias_inp,self.mat_h,self.mat_grad_V)
        np.multiply(self.vec_grad_bias_inp,self.mat_V,self.mat_grad_h)
        mlnonlin.dsigmoid(self.mat_h,self.mat_grad_h,self.mat_grad_temp)
        mllin.sum_rows(self.mat_grad_temp,self.vec_grad_bias_h)
        np.add.accumulate(self.mat_grad_temp[:,:0:-1],axis=1,out=self.mat_grad_W[:,-2::-1])
        self.mat_grad_W[:,-1] = 0
        self.mat_grad_W *= self.vec_input
        
        #update
        self.vec_bias_inp -= self.learning_rate*self.vec_grad_bias_inp
        self.vec_bias_h -= self.learning_rate*self.vec_grad_bias_h
        self.mat_W -= self.learning_rate*self.mat_grad_W
        self.mat_V -= self.learning_rate*self.mat_grad_V
Example #12
0
    def update_learner(self, vec_input):

        self.vec_input[self.input_order] = vec_input

        #fprop
        self.fprop()

        #bprob, computing gradient of -log p(vec_input)
        np.subtract(self.vec_recProb, self.vec_input, self.vec_grad_bias_inp)
        np.multiply(self.vec_grad_bias_inp, self.mat_h, self.mat_grad_V)
        np.multiply(self.vec_grad_bias_inp, self.mat_V, self.mat_grad_h)
        mlnonlin.dsigmoid(self.mat_h, self.mat_grad_h, self.mat_grad_temp)
        mllin.sum_rows(self.mat_grad_temp, self.vec_grad_bias_h)
        np.add.accumulate(self.mat_grad_temp[:, :0:-1],
                          axis=1,
                          out=self.mat_grad_W[:, -2::-1])
        self.mat_grad_W[:, -1] = 0
        self.mat_grad_W *= self.vec_input

        #update
        self.vec_bias_inp -= self.learning_rate * self.vec_grad_bias_inp
        self.vec_bias_h -= self.learning_rate * self.vec_grad_bias_h
        self.mat_W -= self.learning_rate * self.mat_grad_W
        self.mat_V -= self.learning_rate * self.mat_grad_V
Example #13
0
P2, L2, U2 = scipy.linalg.lu(A)
print "Scipy vs mathutils.linalg diff. P:", np.sum(np.abs(P - P2))
print "Scipy vs mathutils.linalg diff. L:", np.sum(np.abs(L - L2))
print "Scipy vs mathutils.linalg diff. U:", np.sum(np.abs(U - U2))

print 'Testing nonlinear sigmoid'
input = np.random.randn(30, 20)
output = np.zeros((30, 20))
nonlinear.sigmoid(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(output - 1 / (1 + np.exp(-input))))

print 'Testing nonlinear sigmoid deriv.'
dinput = np.zeros((30, 20))
doutput = np.random.randn(30, 20)
nonlinear.dsigmoid(output, doutput, dinput)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(dinput - doutput * output * (1 - output)))

print 'Testing nonlinear softmax'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softmax(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(output - np.exp(input) / np.sum(np.exp(input))))

print 'Testing nonlinear softplus'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softplus(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
Example #14
0
    def update_learner(self, example):
        # apply example to the inputs
        self.layers[0][:] = example[0]
        
        # forward propagation: compute activation values of all units
        
        # hidden layers
        for h in range(self.n_hidden_layers):
            mllin.product_matrix_vector(self.Ws[h], self.layers[h], self.layer_acts[h + 1])
            self.layer_acts[h + 1] += self.cs[h]
            mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1])
        
        # output layer
        mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act)
        self.output_act += self.d
        mlnonlin.softmax(self.output_act, self.output)
        
        # back propagation: compute delta errors and updates to weights and
        # biases
        
        # TA:begin
        
        if   self.cost_function == 'CE': 
            self.doutput_act[:] = self.output
            self.doutput_act[example[1]] -= 1
            
        elif self.cost_function == 'SSE':
            y = self.output.copy()
            t = np.zeros(np.shape(y))
            t[example[1]] = 1
            
            # nr of classes
            c = np.size(y)
            
            T2 = (y-t)*y
            T2 = np.array([T2])
            T2 = T2.T
            T2 = np.tile(T2,[1,c])
            
            T3 = np.eye(c,c)
            T3 = T3 - np.tile(y,[c,1])
            
            # delta error at output layer
            self.doutput_act = np.sum(T2*T3,axis=0)
            
        elif self.cost_function == 'EXP':
            y = self.output.copy()
            t = np.zeros(np.shape(y))
            t[example[1]] = 1
            
            # nr of classes
            c = np.size(y)
            
            T1 = y-t
            T1 = np.square(T1)
            T1 = np.sum(T1)
            T1 = T1/self.tau
            T1 = np.exp(T1)
            T1 = 2*T1
            
            T2 = (y-t)*y
            T2 = np.array([T2])
            T2 = T2.T
            T2 = np.tile(T2,[1,c])
            
            T3 = np.eye(c,c)
            T3 = T3 - np.tile(y,[c,1])
            
            # delta error at output layer
            self.doutput_act = T1 * np.sum(T2*T3,axis=0)
            
        # TA:end
        
        self.doutput_act *= self.learning_rate / (1. + self.decrease_constant * self.n_updates)
        self.dd[:] = self.doutput_act
        mllin.outer(self.doutput_act, self.layers[-1], self.dU)
        
        mllin.product_matrix_vector(self.U.T, self.doutput_act, self.dlayers[-1])
        """
        The description and argument names of dsigmoid() are unclear. In
        practice, dsigmoid(s,dx,ds) computes s*(1-s)*dx element-wise and puts
        the result in ds. [TA]
        """
        mlnonlin.dsigmoid(self.layers[-1], self.dlayers[-1], self.dlayer_acts[-1])
        
        for h in range(self.n_hidden_layers - 1, -1, -1):
            self.dcs[h][:] = self.dlayer_acts[h + 1]
            mllin.outer(self.dlayer_acts[h + 1], self.layers[h], self.dWs[h])
            mllin.product_matrix_vector(self.Ws[h].T, self.dlayer_acts[h + 1], self.dlayers[h])
            mlnonlin.dsigmoid(self.layers[h], self.dlayers[h], self.dlayer_acts[h])
        
        #TA:
        if not self.freeze_Ws_cs:
            # update output weights and biases
            self.U -= self.dU
            self.d -= self.dd
            
            # update all hidden weights and biases
            for h in range(self.n_hidden_layers - 1, -1, -1):
                self.Ws[h] -= self.dWs[h]
                self.cs[h] -= self.dcs[h]
        else:
            # update output weights and biases
            self.U -= self.dU
            self.d -= self.dd
            
#             # update only highest hidden layer
#             h = self.n_hidden_layers - 1
#             self.Ws[h] -= self.dWs[h]
#             self.cs[h] -= self.dcs[h]
        
        self.n_updates += 1
Example #15
0
    P_row[p_el] = 1
P2,L2,U2 = scipy.linalg.lu(A)
print "Scipy vs mathutils.linalg diff. P:",np.sum(np.abs(P-P2))
print "Scipy vs mathutils.linalg diff. L:",np.sum(np.abs(L-L2))
print "Scipy vs mathutils.linalg diff. U:",np.sum(np.abs(U-U2))

print 'Testing nonlinear sigmoid'
input = np.random.randn(30,20)
output = np.zeros((30,20))
nonlinear.sigmoid(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-1/(1+np.exp(-input))))

print 'Testing nonlinear sigmoid deriv.'
dinput = np.zeros((30,20))
doutput = np.random.randn(30,20)
nonlinear.dsigmoid(output,doutput,dinput)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(dinput-doutput*output*(1-output)))

print 'Testing nonlinear softmax'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softmax(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-np.exp(input)/np.sum(np.exp(input))))

print 'Testing nonlinear softplus'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softplus(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-np.log(1+np.exp(input))))

print 'Testing nonlinear reclin'