def update_learner(self,example):
      self.layers[0][:] = example[0]

      # fprop
      for h in range(self.n_hidden_layers):
         mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1])
         self.layer_acts[h+1] += self.cs[h]
         mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1])

      mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act)
      self.output_act += self.d
      mlnonlin.softmax(self.output_act,self.output)

      self.doutput_act[:] = self.output
      self.doutput_act[example[1]] -= 1
      self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)

      self.dd[:] = self.doutput_act
      mllin.outer(self.doutput_act,self.layers[-1],self.dU)      
      mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1])
      mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      for h in range(self.n_hidden_layers-1,-1,-1):
         self.dcs[h][:] = self.dlayer_acts[h+1]
         mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h])
         mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h])
         mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h])

      self.U -= self.dU
      self.d -= self.dd
      for h in range(self.n_hidden_layers-1,-1,-1):
         self.Ws[h] -= self.dWs[h]
         self.cs[h] -= self.dcs[h]

      self.n_updates += 1
Exemple #2
0
    def cost(self, outputs, example):
        hidden = outputs[0]
        self.input[:] = 0
        self.input[example[1]] = example[0]
        mllin.product_matrix_vector(self.W.T, hidden, self.neg_input_act)
        self.neg_input_act += self.b
        mlnonlin.softmax(self.neg_input_act, self.neg_input_prob)

        return [np.sum((self.input - self.input.sum() * self.neg_input_prob) ** 2)]
Exemple #3
0
 def cost(self,outputs,example):
     hidden = outputs[0]
     self.input[:] = 0
     self.input[example[1]] = example[0]
     mllin.product_matrix_vector(self.W.T,hidden,self.neg_input_act)
     self.neg_input_act += self.b
     mlnonlin.softmax(self.neg_input_act,self.neg_input_prob)
     
     return [ np.sum((self.input-self.input.sum()*self.neg_input_prob)**2) ]
def test_softmax():
    """
    Testing nonlinear softmax.
    """

    input = np.random.randn(20)
    output = np.zeros((20))
    nonlinear.softmax(input,output)
    assert np.sum(np.abs(output-np.exp(input)/np.sum(np.exp(input)))) < 1e-12
Exemple #5
0
   def update_learner(self,example):
      self.layers[0][:] = example[0]

      # fprop
      for h in range(self.n_hidden_layers):
         mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1])
         self.layer_acts[h+1] += self.cs[h]
         if self.activation_function == 'sigmoid':
             mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1])
         elif self.activation_function == 'tanh':
             mlnonlin.tanh(self.layer_acts[h+1],self.layers[h+1])
         elif self.activation_function == 'reclin':
             mlnonlin.reclin(self.layer_acts[h+1],self.layers[h+1])
         else:
             raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act)
      self.output_act += self.d
      mlnonlin.softmax(self.output_act,self.output)

      self.doutput_act[:] = self.output
      self.doutput_act[example[1]] -= 1
      self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)

      self.dd[:] = self.doutput_act
      mllin.outer(self.doutput_act,self.layers[-1],self.dU)      
      mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1])
      if self.activation_function == 'sigmoid':
          mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      elif self.activation_function == 'tanh':
          mlnonlin.dtanh(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      elif self.activation_function == 'reclin':
          mlnonlin.dreclin(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1])
      else:
          raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      for h in range(self.n_hidden_layers-1,-1,-1):
         self.dcs[h][:] = self.dlayer_acts[h+1]
         mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h])
         mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h])
         if self.activation_function == 'sigmoid':
             mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h])
         elif self.activation_function == 'tanh':
             mlnonlin.dtanh(self.layers[h],self.dlayers[h],self.dlayer_acts[h])
         elif self.activation_function == 'reclin':
             mlnonlin.dreclin(self.layers[h],self.dlayers[h],self.dlayer_acts[h])
         else:
             raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      self.U -= self.dU
      self.d -= self.dd
      for h in range(self.n_hidden_layers-1,-1,-1):
         self.Ws[h] -= self.dWs[h]
         self.cs[h] -= self.dcs[h]

      self.n_updates += 1
Exemple #6
0
def test_softmax():
    """
    Testing nonlinear softmax.
    """

    input = np.random.randn(20)
    output = np.zeros((20))
    nonlinear.softmax(input, output)
    assert np.sum(
        np.abs(output - np.exp(input) / np.sum(np.exp(input)))) < 1e-12
Exemple #7
0
   def fprop(self,input):
       """
       Computes the output given some input. Puts the result in ``self.output``
       """
       self.input[:] = input
       self.output_act[:] = self.d
       for k in range(self.n_k_means):
           if self.n_k_means_inputs == self.input_size:
               c = self.clusterings[k].compute_cluster(self.input)
           else:
               c = self.clusterings[k].compute_cluster(self.input[self.k_means_subset_inputs[k]])
           idx = c + k*self.n_clusters
           self.cluster_indices[k] = c
           
           mllin.product_matrix_vector(self.Ws[idx],self.input,self.layer_acts[k])
           self.layer_acts[k] += self.cs[idx]
           #mlnonlin.sigmoid(self.layer_acts[k],self.layers[k])
           if self.activation_function == 'sigmoid':
               mlnonlin.sigmoid(self.layer_acts[k],self.layers[k])
           elif self.activation_function == 'tanh':
               mlnonlin.tanh(self.layer_acts[k],self.layers[k])
           elif self.activation_function == 'reclin':
               mlnonlin.reclin(self.layer_acts[k],self.layers[k])
           else:
               raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')
       
           mllin.product_matrix_vector(self.Vs[idx],self.layers[k],self.output_acts[k])
           self.output_act += self.output_acts[k]
       mlnonlin.softmax(self.output_act,self.output)

       if self.autoencoder_regularization != 0:
           self.dae_input[:] = input
           self.rng.shuffle(self.input_idx)
           self.dae_input[self.input_idx[:int(self.autoencoder_missing_fraction*self.input_size)]] = 0
           self.dae_output_act[:] = self.dae_d
           for k in range(self.n_k_means):
               idx = self.cluster_indices[k] + k*self.n_clusters
               
               mllin.product_matrix_vector(self.Ws[idx],self.dae_input,self.dae_layer_acts[k])
               self.dae_layer_acts[k] += self.cs[idx]
               #mlnonlin.sigmoid(self.dae_layer_acts[k],self.dae_layers[k])
               if self.activation_function == 'sigmoid':
                   mlnonlin.sigmoid(self.dae_layer_acts[k],self.dae_layers[k])
               elif self.activation_function == 'tanh':
                   mlnonlin.tanh(self.dae_layer_acts[k],self.dae_layers[k])
               elif self.activation_function == 'reclin':
                   mlnonlin.reclin(self.dae_layer_acts[k],self.dae_layers[k])
               else:
                   raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')
           
               mllin.product_matrix_vector(self.Ws[idx].T,self.dae_layers[k],self.dae_output_acts[k])
               self.dae_output_act += self.dae_output_acts[k]
           self.dae_output[:] = self.dae_output_act
Exemple #8
0
 def use_learner(self, example):
     output = np.zeros((self.n_classes))
     self.layers[0][:] = example[0]
     
     # fprop
     for h in range(self.n_hidden_layers):
         mllin.product_matrix_vector(self.Ws[h], self.layers[h], self.layer_acts[h + 1])
         self.layer_acts[h + 1] += self.cs[h]
         mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1])
     
     mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act)
     self.output_act += self.d
     mlnonlin.softmax(self.output_act, output)
     
     return [output.argmax(), output]
    def use_learner(self, example):
        output = np.zeros((self.n_classes))
        self.layers[0][:] = example[0]

        # fprop
        for h in range(self.n_hidden_layers):
            mllin.product_matrix_vector(self.Ws[h], self.layers[h],
                                        self.layer_acts[h + 1])
            self.layer_acts[h + 1] += self.cs[h]
            mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1])

        mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act)
        self.output_act += self.d
        mlnonlin.softmax(self.output_act, output)

        return [output.argmax(), output]
Exemple #10
0
    def update_learner(self, example):
        self.input[:] = 0
        self.input[example[1]] = example[0]
        n_words = int(self.input.sum())

        # Performing CD-k
        mllin.product_matrix_vector(self.W, self.input, self.hidden_act)
        self.hidden_act += self.c * n_words
        mlnonlin.sigmoid(self.hidden_act, self.hidden_prob)
        self.neg_hidden_prob[:] = self.hidden_prob

        for k in range(self.k_contrastive_divergence_steps):
            if self.mean_field:
                self.hidden[:] = self.neg_hidden_prob
            else:
                np.less(self.rng.rand(self.hidden_size), self.neg_hidden_prob, self.hidden)

            mllin.product_matrix_vector(self.W.T, self.hidden, self.neg_input_act)
            self.neg_input_act += self.b
            mlnonlin.softmax(self.neg_input_act, self.neg_input_prob)
            if self.mean_field:
                self.neg_input[:] = n_words * self.neg_input_prob
            else:
                self.neg_input[:] = self.rng.multinomial(n_words, self.neg_input_prob)

            mllin.product_matrix_vector(self.W, self.neg_input, self.neg_hidden_act)
            self.neg_hidden_act += self.c * n_words
            mlnonlin.sigmoid(self.neg_hidden_act, self.neg_hidden_prob)

        mllin.outer(self.hidden_prob, self.input, self.deltaW)
        mllin.outer(self.neg_hidden_prob, self.neg_input, self.neg_stats)
        self.deltaW -= self.neg_stats

        np.subtract(self.input, self.neg_input, self.deltab)
        np.subtract(self.hidden_prob, self.neg_hidden_prob, self.deltac)

        self.deltaW *= self.learning_rate / (1.0 + self.decrease_constant * self.n_updates)
        self.deltab *= self.learning_rate / (1.0 + self.decrease_constant * self.n_updates)
        self.deltac *= n_words * self.learning_rate / (1.0 + self.decrease_constant * self.n_updates)

        self.W += self.deltaW
        self.b += self.deltab
        self.c += self.deltac

        self.n_updates += 1
Exemple #11
0
 def update_learner(self,example):
     self.input[:] = 0
     self.input[example[1]] = example[0]
     n_words = int(self.input.sum())
     
     # Performing CD-k
     mllin.product_matrix_vector(self.W,self.input,self.hidden_act)
     self.hidden_act += self.c*n_words
     mlnonlin.sigmoid(self.hidden_act,self.hidden_prob)
     self.neg_hidden_prob[:] = self.hidden_prob
     
     for k in range(self.k_contrastive_divergence_steps):
         if self.mean_field:
            self.hidden[:] = self.neg_hidden_prob
         else: 
            np.less(self.rng.rand(self.hidden_size),self.neg_hidden_prob,self.hidden)
     
         mllin.product_matrix_vector(self.W.T,self.hidden,self.neg_input_act)
         self.neg_input_act += self.b
         mlnonlin.softmax(self.neg_input_act,self.neg_input_prob)
         if self.mean_field:
            self.neg_input[:] = n_words*self.neg_input_prob
         else:
            self.neg_input[:] = self.rng.multinomial(n_words,self.neg_input_prob)
     
         mllin.product_matrix_vector(self.W,self.neg_input,self.neg_hidden_act)
         self.neg_hidden_act += self.c*n_words
         mlnonlin.sigmoid(self.neg_hidden_act,self.neg_hidden_prob)
     
     mllin.outer(self.hidden_prob,self.input,self.deltaW)
     mllin.outer(self.neg_hidden_prob,self.neg_input,self.neg_stats)
     self.deltaW -= self.neg_stats
     
     np.subtract(self.input,self.neg_input,self.deltab)
     np.subtract(self.hidden_prob,self.neg_hidden_prob,self.deltac)
     
     self.deltaW *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)
     self.deltab *= self.learning_rate/(1.+self.decrease_constant*self.n_updates)
     self.deltac *= n_words*self.learning_rate/(1.+self.decrease_constant*self.n_updates)         
     
     self.W += self.deltaW
     self.b += self.deltab
     self.c += self.deltac
     
     self.n_updates += 1
    def update_learner(self, example):
        self.layers[0][:] = example[0]

        # fprop
        for h in range(self.n_hidden_layers):
            mllin.product_matrix_vector(self.Ws[h], self.layers[h],
                                        self.layer_acts[h + 1])
            self.layer_acts[h + 1] += self.cs[h]
            mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1])

        mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act)
        self.output_act += self.d
        mlnonlin.softmax(self.output_act, self.output)

        self.doutput_act[:] = self.output
        self.doutput_act[example[1]] -= 1
        self.doutput_act *= self.learning_rate / (
            1. + self.decrease_constant * self.n_updates)

        self.dd[:] = self.doutput_act
        mllin.outer(self.doutput_act, self.layers[-1], self.dU)
        mllin.product_matrix_vector(self.U.T, self.doutput_act,
                                    self.dlayers[-1])
        mlnonlin.dsigmoid(self.layers[-1], self.dlayers[-1],
                          self.dlayer_acts[-1])
        for h in range(self.n_hidden_layers - 1, -1, -1):
            self.dcs[h][:] = self.dlayer_acts[h + 1]
            mllin.outer(self.dlayer_acts[h + 1], self.layers[h], self.dWs[h])
            mllin.product_matrix_vector(self.Ws[h].T, self.dlayer_acts[h + 1],
                                        self.dlayers[h])
            mlnonlin.dsigmoid(self.layers[h], self.dlayers[h],
                              self.dlayer_acts[h])

        self.U -= self.dU
        self.d -= self.dd
        for h in range(self.n_hidden_layers - 1, -1, -1):
            self.Ws[h] -= self.dWs[h]
            self.cs[h] -= self.dcs[h]

        self.n_updates += 1
Exemple #13
0
   def use_learner(self,example):
      output = np.zeros((self.n_classes))
      self.layers[0][:] = example[0]

      # fprop
      for h in range(self.n_hidden_layers):
         mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1])
         self.layer_acts[h+1] += self.cs[h]
         if self.activation_function == 'sigmoid':
             mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1])
         elif self.activation_function == 'tanh':
             mlnonlin.tanh(self.layer_acts[h+1],self.layers[h+1])
         elif self.activation_function == 'reclin':
             mlnonlin.reclin(self.layer_acts[h+1],self.layers[h+1])
         else:
             raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')

      mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act)
      self.output_act += self.d
      mlnonlin.softmax(self.output_act,output)

      return [output.argmax(),output]
Exemple #14
0
output = np.zeros((30, 20))
nonlinear.sigmoid(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(output - 1 / (1 + np.exp(-input))))

print 'Testing nonlinear sigmoid deriv.'
dinput = np.zeros((30, 20))
doutput = np.random.randn(30, 20)
nonlinear.dsigmoid(output, doutput, dinput)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(dinput - doutput * output * (1 - output)))

print 'Testing nonlinear softmax'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softmax(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(output - np.exp(input) / np.sum(np.exp(input))))

print 'Testing nonlinear softplus'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softplus(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
    np.abs(output - np.log(1 + np.exp(input))))

print 'Testing nonlinear reclin'
input = np.random.randn(30, 20)
output = np.zeros((30, 20))
nonlinear.reclin(input, output)
print 'NumPy vs mathutils.nonlinear diff. output:', np.sum(
Exemple #15
0
    def update_learner(self, example):
        # apply example to the inputs
        self.layers[0][:] = example[0]
        
        # forward propagation: compute activation values of all units
        
        # hidden layers
        for h in range(self.n_hidden_layers):
            mllin.product_matrix_vector(self.Ws[h], self.layers[h], self.layer_acts[h + 1])
            self.layer_acts[h + 1] += self.cs[h]
            mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1])
        
        # output layer
        mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act)
        self.output_act += self.d
        mlnonlin.softmax(self.output_act, self.output)
        
        # back propagation: compute delta errors and updates to weights and
        # biases
        
        # TA:begin
        
        if   self.cost_function == 'CE': 
            self.doutput_act[:] = self.output
            self.doutput_act[example[1]] -= 1
            
        elif self.cost_function == 'SSE':
            y = self.output.copy()
            t = np.zeros(np.shape(y))
            t[example[1]] = 1
            
            # nr of classes
            c = np.size(y)
            
            T2 = (y-t)*y
            T2 = np.array([T2])
            T2 = T2.T
            T2 = np.tile(T2,[1,c])
            
            T3 = np.eye(c,c)
            T3 = T3 - np.tile(y,[c,1])
            
            # delta error at output layer
            self.doutput_act = np.sum(T2*T3,axis=0)
            
        elif self.cost_function == 'EXP':
            y = self.output.copy()
            t = np.zeros(np.shape(y))
            t[example[1]] = 1
            
            # nr of classes
            c = np.size(y)
            
            T1 = y-t
            T1 = np.square(T1)
            T1 = np.sum(T1)
            T1 = T1/self.tau
            T1 = np.exp(T1)
            T1 = 2*T1
            
            T2 = (y-t)*y
            T2 = np.array([T2])
            T2 = T2.T
            T2 = np.tile(T2,[1,c])
            
            T3 = np.eye(c,c)
            T3 = T3 - np.tile(y,[c,1])
            
            # delta error at output layer
            self.doutput_act = T1 * np.sum(T2*T3,axis=0)
            
        # TA:end
        
        self.doutput_act *= self.learning_rate / (1. + self.decrease_constant * self.n_updates)
        self.dd[:] = self.doutput_act
        mllin.outer(self.doutput_act, self.layers[-1], self.dU)
        
        mllin.product_matrix_vector(self.U.T, self.doutput_act, self.dlayers[-1])
        """
        The description and argument names of dsigmoid() are unclear. In
        practice, dsigmoid(s,dx,ds) computes s*(1-s)*dx element-wise and puts
        the result in ds. [TA]
        """
        mlnonlin.dsigmoid(self.layers[-1], self.dlayers[-1], self.dlayer_acts[-1])
        
        for h in range(self.n_hidden_layers - 1, -1, -1):
            self.dcs[h][:] = self.dlayer_acts[h + 1]
            mllin.outer(self.dlayer_acts[h + 1], self.layers[h], self.dWs[h])
            mllin.product_matrix_vector(self.Ws[h].T, self.dlayer_acts[h + 1], self.dlayers[h])
            mlnonlin.dsigmoid(self.layers[h], self.dlayers[h], self.dlayer_acts[h])
        
        #TA:
        if not self.freeze_Ws_cs:
            # update output weights and biases
            self.U -= self.dU
            self.d -= self.dd
            
            # update all hidden weights and biases
            for h in range(self.n_hidden_layers - 1, -1, -1):
                self.Ws[h] -= self.dWs[h]
                self.cs[h] -= self.dcs[h]
        else:
            # update output weights and biases
            self.U -= self.dU
            self.d -= self.dd
            
#             # update only highest hidden layer
#             h = self.n_hidden_layers - 1
#             self.Ws[h] -= self.dWs[h]
#             self.cs[h] -= self.dcs[h]
        
        self.n_updates += 1
Exemple #16
0
print 'Testing nonlinear sigmoid'
input = np.random.randn(30,20)
output = np.zeros((30,20))
nonlinear.sigmoid(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-1/(1+np.exp(-input))))

print 'Testing nonlinear sigmoid deriv.'
dinput = np.zeros((30,20))
doutput = np.random.randn(30,20)
nonlinear.dsigmoid(output,doutput,dinput)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(dinput-doutput*output*(1-output)))

print 'Testing nonlinear softmax'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softmax(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-np.exp(input)/np.sum(np.exp(input))))

print 'Testing nonlinear softplus'
input = np.random.randn(20)
output = np.zeros((20))
nonlinear.softplus(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-np.log(1+np.exp(input))))

print 'Testing nonlinear reclin'
input = np.random.randn(30,20)
output = np.zeros((30,20))
nonlinear.reclin(input,output)
print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-(input>0)*input))

print 'Testing nonlinear reclin deriv.'