def use_learner(self,example): output = np.zeros((self.hidden_size)) mllin.product_matrix_vector(self.W,example,self.hidden_act) self.hidden_act += self.c mlnonlin.sigmoid(self.hidden_act,output) return [output]
def update_learner(self,example): self.layers[0][:] = example[0] # fprop for h in range(self.n_hidden_layers): mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1]) self.layer_acts[h+1] += self.cs[h] mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1]) mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act) self.output_act += self.d mlnonlin.softmax(self.output_act,self.output) self.doutput_act[:] = self.output self.doutput_act[example[1]] -= 1 self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dd[:] = self.doutput_act mllin.outer(self.doutput_act,self.layers[-1],self.dU) mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1]) mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) for h in range(self.n_hidden_layers-1,-1,-1): self.dcs[h][:] = self.dlayer_acts[h+1] mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h]) mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h]) mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) self.U -= self.dU self.d -= self.dd for h in range(self.n_hidden_layers-1,-1,-1): self.Ws[h] -= self.dWs[h] self.cs[h] -= self.dcs[h] self.n_updates += 1
def cost(self,outputs,example): hidden = outputs[0] mllin.product_matrix_vector(self.W.T,hidden,self.neg_input_act) self.neg_input_act += self.b mlnonlin.sigmoid(self.neg_input_act,self.neg_input_prob) return [ np.sum((example-self.neg_input_prob)**2) ]
def compute_document_representation(self, word_counts_sparse): self.input[:] = 0 self.input[word_counts_sparse[1]] = word_counts_sparse[0] output = np.zeros((self.hidden_size,)) mllin.product_matrix_vector(self.W, self.input, self.hidden_act) self.hidden_act += self.c * self.input.sum() mlnonlin.sigmoid(self.hidden_act, output) return output
def test_sigmoid(): """ Testing nonlinear sigmoid. """ input = np.random.randn(30,20) output = np.zeros((30,20)) nonlinear.sigmoid(input,output) assert np.sum(np.abs(output-1/(1+np.exp(-input)))) < 1e-12
def use_learner(self, example): self.input[:] = 0 self.input[example[1]] = example[0] output = np.zeros((self.hidden_size)) mllin.product_matrix_vector(self.W, self.input, self.hidden_act) self.hidden_act += self.c * self.input.sum() mlnonlin.sigmoid(self.hidden_act, output) return [output]
def update_learner(self,example): self.layers[0][:] = example[0] # fprop for h in range(self.n_hidden_layers): mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1]) self.layer_acts[h+1] += self.cs[h] if self.activation_function == 'sigmoid': mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1]) elif self.activation_function == 'tanh': mlnonlin.tanh(self.layer_acts[h+1],self.layers[h+1]) elif self.activation_function == 'reclin': mlnonlin.reclin(self.layer_acts[h+1],self.layers[h+1]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act) self.output_act += self.d mlnonlin.softmax(self.output_act,self.output) self.doutput_act[:] = self.output self.doutput_act[example[1]] -= 1 self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dd[:] = self.doutput_act mllin.outer(self.doutput_act,self.layers[-1],self.dU) mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1]) if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) elif self.activation_function == 'tanh': mlnonlin.dtanh(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) elif self.activation_function == 'reclin': mlnonlin.dreclin(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') for h in range(self.n_hidden_layers-1,-1,-1): self.dcs[h][:] = self.dlayer_acts[h+1] mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h]) mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h]) if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) elif self.activation_function == 'tanh': mlnonlin.dtanh(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) elif self.activation_function == 'reclin': mlnonlin.dreclin(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') self.U -= self.dU self.d -= self.dd for h in range(self.n_hidden_layers-1,-1,-1): self.Ws[h] -= self.dWs[h] self.cs[h] -= self.dcs[h] self.n_updates += 1
def use_learner(self,example): self.input[self.input_order] = example output = np.zeros((self.input_size)) recact = np.zeros((self.input_size)) # fprop mllin.product_matrix_vector(self.W,self.input,recact) recact += self.b mlnonlin.sigmoid(recact,output) return [output,recact]
def decode(self, input_size): """ Decode the hidden layer and return the output. """ output = np.zeros(input_size) preactivation = np.dot(self.W, self.h) + self.c sigmoid(preactivation, output) return output
def encode(self, input): """ Encode the input vector and return the hidden layer. """ h = np.zeros(self.hidden_size) preactivation = np.dot(self.W.T, input) + self.b sigmoid(preactivation, h) return h
def test_dsigmoid(): """ Testing nonlinear sigmoid deriv. """ input = np.random.randn(30,20) output = np.zeros((30,20)) nonlinear.sigmoid(input,output) dinput = np.zeros((30,20)) doutput = np.random.randn(30,20) nonlinear.dsigmoid(output,doutput,dinput) assert np.sum(np.abs(dinput-doutput*output*(1-output))) < 1e-12
def fprop(self,input): """ Computes the output given some input. Puts the result in ``self.output`` """ self.input[:] = input self.output_act[:] = self.d for k in range(self.n_k_means): if self.n_k_means_inputs == self.input_size: c = self.clusterings[k].compute_cluster(self.input) else: c = self.clusterings[k].compute_cluster(self.input[self.k_means_subset_inputs[k]]) idx = c + k*self.n_clusters self.cluster_indices[k] = c mllin.product_matrix_vector(self.Ws[idx],self.input,self.layer_acts[k]) self.layer_acts[k] += self.cs[idx] #mlnonlin.sigmoid(self.layer_acts[k],self.layers[k]) if self.activation_function == 'sigmoid': mlnonlin.sigmoid(self.layer_acts[k],self.layers[k]) elif self.activation_function == 'tanh': mlnonlin.tanh(self.layer_acts[k],self.layers[k]) elif self.activation_function == 'reclin': mlnonlin.reclin(self.layer_acts[k],self.layers[k]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') mllin.product_matrix_vector(self.Vs[idx],self.layers[k],self.output_acts[k]) self.output_act += self.output_acts[k] mlnonlin.softmax(self.output_act,self.output) if self.autoencoder_regularization != 0: self.dae_input[:] = input self.rng.shuffle(self.input_idx) self.dae_input[self.input_idx[:int(self.autoencoder_missing_fraction*self.input_size)]] = 0 self.dae_output_act[:] = self.dae_d for k in range(self.n_k_means): idx = self.cluster_indices[k] + k*self.n_clusters mllin.product_matrix_vector(self.Ws[idx],self.dae_input,self.dae_layer_acts[k]) self.dae_layer_acts[k] += self.cs[idx] #mlnonlin.sigmoid(self.dae_layer_acts[k],self.dae_layers[k]) if self.activation_function == 'sigmoid': mlnonlin.sigmoid(self.dae_layer_acts[k],self.dae_layers[k]) elif self.activation_function == 'tanh': mlnonlin.tanh(self.dae_layer_acts[k],self.dae_layers[k]) elif self.activation_function == 'reclin': mlnonlin.reclin(self.dae_layer_acts[k],self.dae_layers[k]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') mllin.product_matrix_vector(self.Ws[idx].T,self.dae_layers[k],self.dae_output_acts[k]) self.dae_output_act += self.dae_output_acts[k] self.dae_output[:] = self.dae_output_act
def fprop(self): np.multiply(self.vec_input,self.mat_W,self.mat_inp_times_W) np.add.accumulate(self.mat_inp_times_W[:,:-1],axis=1,out=self.mat_acc_inp_times_W[:,1:]) self.mat_acc_inp_times_W[:,0] = 0 self.mat_acc_inp_times_W += self.vec_bias_h[:,np.newaxis] # The column's are the hidden_act_i mlnonlin.sigmoid(self.mat_acc_inp_times_W,self.mat_h) # The column's are the hidden_layer_i np.multiply(self.mat_h,self.mat_V,self.mat_Vhid) mllin.sum_columns(self.mat_Vhid,self.vec_recact) self.vec_recact += self.vec_bias_inp if self.fPoisson: self.vec_recProb = np.exp(self.vec_recact) else: mlnonlin.sigmoid(self.vec_recact,self.vec_recProb)
def update_learner(self,example): self.input[self.input_order] = example # fprop np.multiply(self.input,self.W,self.input_times_W) np.add.accumulate(self.input_times_W[:,:-1],axis=1,out=self.acc_input_times_W[:,1:]) self.acc_input_times_W[:,0] = 0 self.acc_input_times_W += self.c[:,np.newaxis] mlnonlin.sigmoid(self.acc_input_times_W,self.hid) if self.untied_weights: np.multiply(self.hid,self.V,self.Whid) else: np.multiply(self.hid,self.W,self.Whid) mllin.sum_columns(self.Whid,self.recact) self.recact += self.b mlnonlin.sigmoid(self.recact,self.rec) # bprop np.subtract(self.rec,self.input,self.drec) self.drec *= self.alpha self.db[:] = self.drec if self.untied_weights: np.multiply(self.drec,self.hid,self.dV) np.multiply(self.drec,self.V,self.dhid) self.dW[:] = 0 else: np.multiply(self.drec,self.hid,self.dW) np.multiply(self.drec,self.W,self.dhid) mlnonlin.dsigmoid(self.hid,self.dhid,self.dacc_input_times_W) mllin.sum_rows(self.dacc_input_times_W,self.dc) np.add.accumulate(self.dacc_input_times_W[:,:0:-1],axis=1,out=self.dWenc[:,-2::-1]) self.dWenc[:,-1] = 0 self.dWenc *= self.input self.dW += self.dWenc self.dW *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.db *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dc *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.W -= self.dW self.b -= self.db self.c -= self.dc if self.untied_weights: self.dV *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.V -= self.dV self.n_updates += 1
def use_learner(self, example): output = np.zeros((self.n_classes)) self.layers[0][:] = example[0] # fprop for h in range(self.n_hidden_layers): mllin.product_matrix_vector(self.Ws[h], self.layers[h], self.layer_acts[h + 1]) self.layer_acts[h + 1] += self.cs[h] mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1]) mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act) self.output_act += self.d mlnonlin.softmax(self.output_act, output) return [output.argmax(), output]
def apply_activation(self, input_data, output): """ Apply the activation function """ if self.activation_function == "sigmoid": mlnonlin.sigmoid(input_data, output) elif self.activation_function == "tanh": mlnonlin.tanh(input_data, output) elif self.activation_function == "reclin": mlnonlin.reclin(input_data, output) elif self.activation_function == "softmax": m = input_data.max(axis=1) output[:] = np.exp(input_data - m.reshape((-1, 1))) output[:] /= output.sum(axis=1).reshape((-1, 1)) else: raise ValueError("activation_function must be either 'sigmoid', 'tanh' or 'reclin'")
def update_learner(self, example): self.input[:] = 0 self.input[example[1]] = example[0] n_words = int(self.input.sum()) # Performing CD-k mllin.product_matrix_vector(self.W, self.input, self.hidden_act) self.hidden_act += self.c * n_words mlnonlin.sigmoid(self.hidden_act, self.hidden_prob) self.neg_hidden_prob[:] = self.hidden_prob for k in range(self.k_contrastive_divergence_steps): if self.mean_field: self.hidden[:] = self.neg_hidden_prob else: np.less(self.rng.rand(self.hidden_size), self.neg_hidden_prob, self.hidden) mllin.product_matrix_vector(self.W.T, self.hidden, self.neg_input_act) self.neg_input_act += self.b mlnonlin.softmax(self.neg_input_act, self.neg_input_prob) if self.mean_field: self.neg_input[:] = n_words * self.neg_input_prob else: self.neg_input[:] = self.rng.multinomial(n_words, self.neg_input_prob) mllin.product_matrix_vector(self.W, self.neg_input, self.neg_hidden_act) self.neg_hidden_act += self.c * n_words mlnonlin.sigmoid(self.neg_hidden_act, self.neg_hidden_prob) mllin.outer(self.hidden_prob, self.input, self.deltaW) mllin.outer(self.neg_hidden_prob, self.neg_input, self.neg_stats) self.deltaW -= self.neg_stats np.subtract(self.input, self.neg_input, self.deltab) np.subtract(self.hidden_prob, self.neg_hidden_prob, self.deltac) self.deltaW *= self.learning_rate / (1.0 + self.decrease_constant * self.n_updates) self.deltab *= self.learning_rate / (1.0 + self.decrease_constant * self.n_updates) self.deltac *= n_words * self.learning_rate / (1.0 + self.decrease_constant * self.n_updates) self.W += self.deltaW self.b += self.deltab self.c += self.deltac self.n_updates += 1
def use_learner(self,example): self.input[self.input_order] = example output = np.zeros((self.input_size)) recact = np.zeros((self.input_size)) # fprop np.multiply(self.input,self.W,self.input_times_W) np.add.accumulate(self.input_times_W[:,:-1],axis=1,out=self.acc_input_times_W[:,1:]) self.acc_input_times_W[:,0] = 0 self.acc_input_times_W += self.c[:,np.newaxis] mlnonlin.sigmoid(self.acc_input_times_W,self.hid) if self.untied_weights: np.multiply(self.hid,self.V,self.Whid) else: np.multiply(self.hid,self.W,self.Whid) mllin.sum_columns(self.Whid,recact) recact += self.b mlnonlin.sigmoid(recact,output) return [output,recact]
def update_learner(self,example): self.input[self.input_order] = example # fprop mllin.product_matrix_vector(self.W,self.input,self.recact) self.recact += self.b mlnonlin.sigmoid(self.recact,self.rec) # bprop np.subtract(self.rec,self.input,self.drec) self.db[:] = self.drec mllin.outer(self.drec,self.input,self.dW) self.dW *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.db *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.W -= self.dW self.b -= self.db self.W.ravel()[self.utri_index] = 0 # Setting back upper diagonal to 0 self.n_updates += 1
def use_learner(self,example): output = np.zeros((self.n_classes)) self.layers[0][:] = example[0] # fprop for h in range(self.n_hidden_layers): mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1]) self.layer_acts[h+1] += self.cs[h] if self.activation_function == 'sigmoid': mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1]) elif self.activation_function == 'tanh': mlnonlin.tanh(self.layer_acts[h+1],self.layers[h+1]) elif self.activation_function == 'reclin': mlnonlin.reclin(self.layer_acts[h+1],self.layers[h+1]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act) self.output_act += self.d mlnonlin.softmax(self.output_act,output) return [output.argmax(),output]
def sample(self): input = np.zeros(self.input_size) input_prob = np.zeros(self.input_size) hid_i = np.zeros(self.hidden_size) for i in range(self.input_size): if i > 0: mlnonlin.sigmoid(self.c+np.dot(self.W[:,:i],input[:i]),hid_i) else: mlnonlin.sigmoid(self.c,hid_i) if self.untied_weights: mlnonlin.sigmoid(np.dot(hid_i,self.V[:,i])+self.b[i:i+1],input_prob[i:i+1]) else: mlnonlin.sigmoid(np.dot(hid_i,self.W[:,i])+self.b[i:i+1],input_prob[i:i+1]) input[i] = (self.rng.rand()<input_prob[i]) return (input[self.input_order],input_prob[self.input_order])
def update_learner(self,example): self.input[:] = example # Performing CD-1 mllin.product_matrix_vector(self.W,self.input,self.hidden_act) self.hidden_act += self.c mlnonlin.sigmoid(self.hidden_act,self.hidden_prob) np.less(self.rng.rand(self.hidden_size),self.hidden_prob,self.hidden) mllin.product_matrix_vector(self.W.T,self.hidden,self.neg_input_act) self.neg_input_act += self.b mlnonlin.sigmoid(self.neg_input_act,self.neg_input_prob) np.less(self.rng.rand(self.input_size),self.neg_input_prob,self.neg_input) mllin.product_matrix_vector(self.W,self.neg_input,self.neg_hidden_act) self.neg_hidden_act += self.c mlnonlin.sigmoid(self.neg_hidden_act,self.neg_hidden_prob) mllin.outer(self.hidden_prob,self.input,self.deltaW) mllin.outer(self.neg_hidden_prob,self.neg_input,self.neg_stats) self.deltaW -= self.neg_stats np.subtract(self.input,self.neg_input,self.deltab) np.subtract(self.hidden_prob,self.neg_hidden_prob,self.deltac) self.deltaW *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.deltab *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.deltac *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.W += self.deltaW self.b += self.deltab self.c += self.deltac if self.l1_regularization > 0: self.W *= (np.abs(self.W) > (self.l1_regularization * self.learning_rate/(1.+self.decrease_constant*self.n_updates))) self.n_updates += 1
L = np.zeros((30,20)) U = np.zeros((20,20)) linalg.lu(A,p,L,U) # Writing permutation vector p in matrix form P = np.zeros((30,30)) for P_row,p_el in zip(P.T,p): P_row[p_el] = 1 P2,L2,U2 = scipy.linalg.lu(A) print "Scipy vs mathutils.linalg diff. P:",np.sum(np.abs(P-P2)) print "Scipy vs mathutils.linalg diff. L:",np.sum(np.abs(L-L2)) print "Scipy vs mathutils.linalg diff. U:",np.sum(np.abs(U-U2)) print 'Testing nonlinear sigmoid' input = np.random.randn(30,20) output = np.zeros((30,20)) nonlinear.sigmoid(input,output) print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-1/(1+np.exp(-input)))) print 'Testing nonlinear sigmoid deriv.' dinput = np.zeros((30,20)) doutput = np.random.randn(30,20) nonlinear.dsigmoid(output,doutput,dinput) print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(dinput-doutput*output*(1-output))) print 'Testing nonlinear softmax' input = np.random.randn(20) output = np.zeros((20)) nonlinear.softmax(input,output) print 'NumPy vs mathutils.nonlinear diff. output:',np.sum(np.abs(output-np.exp(input)/np.sum(np.exp(input)))) print 'Testing nonlinear softplus'
def update_learner(self, example): # apply example to the inputs self.layers[0][:] = example[0] # forward propagation: compute activation values of all units # hidden layers for h in range(self.n_hidden_layers): mllin.product_matrix_vector(self.Ws[h], self.layers[h], self.layer_acts[h + 1]) self.layer_acts[h + 1] += self.cs[h] mlnonlin.sigmoid(self.layer_acts[h + 1], self.layers[h + 1]) # output layer mllin.product_matrix_vector(self.U, self.layers[-1], self.output_act) self.output_act += self.d mlnonlin.softmax(self.output_act, self.output) # back propagation: compute delta errors and updates to weights and # biases # TA:begin if self.cost_function == 'CE': self.doutput_act[:] = self.output self.doutput_act[example[1]] -= 1 elif self.cost_function == 'SSE': y = self.output.copy() t = np.zeros(np.shape(y)) t[example[1]] = 1 # nr of classes c = np.size(y) T2 = (y-t)*y T2 = np.array([T2]) T2 = T2.T T2 = np.tile(T2,[1,c]) T3 = np.eye(c,c) T3 = T3 - np.tile(y,[c,1]) # delta error at output layer self.doutput_act = np.sum(T2*T3,axis=0) elif self.cost_function == 'EXP': y = self.output.copy() t = np.zeros(np.shape(y)) t[example[1]] = 1 # nr of classes c = np.size(y) T1 = y-t T1 = np.square(T1) T1 = np.sum(T1) T1 = T1/self.tau T1 = np.exp(T1) T1 = 2*T1 T2 = (y-t)*y T2 = np.array([T2]) T2 = T2.T T2 = np.tile(T2,[1,c]) T3 = np.eye(c,c) T3 = T3 - np.tile(y,[c,1]) # delta error at output layer self.doutput_act = T1 * np.sum(T2*T3,axis=0) # TA:end self.doutput_act *= self.learning_rate / (1. + self.decrease_constant * self.n_updates) self.dd[:] = self.doutput_act mllin.outer(self.doutput_act, self.layers[-1], self.dU) mllin.product_matrix_vector(self.U.T, self.doutput_act, self.dlayers[-1]) """ The description and argument names of dsigmoid() are unclear. In practice, dsigmoid(s,dx,ds) computes s*(1-s)*dx element-wise and puts the result in ds. [TA] """ mlnonlin.dsigmoid(self.layers[-1], self.dlayers[-1], self.dlayer_acts[-1]) for h in range(self.n_hidden_layers - 1, -1, -1): self.dcs[h][:] = self.dlayer_acts[h + 1] mllin.outer(self.dlayer_acts[h + 1], self.layers[h], self.dWs[h]) mllin.product_matrix_vector(self.Ws[h].T, self.dlayer_acts[h + 1], self.dlayers[h]) mlnonlin.dsigmoid(self.layers[h], self.dlayers[h], self.dlayer_acts[h]) #TA: if not self.freeze_Ws_cs: # update output weights and biases self.U -= self.dU self.d -= self.dd # update all hidden weights and biases for h in range(self.n_hidden_layers - 1, -1, -1): self.Ws[h] -= self.dWs[h] self.cs[h] -= self.dcs[h] else: # update output weights and biases self.U -= self.dU self.d -= self.dd # # update only highest hidden layer # h = self.n_hidden_layers - 1 # self.Ws[h] -= self.dWs[h] # self.cs[h] -= self.dcs[h] self.n_updates += 1