def update_learner(self, example): self.input[self.input_order] = example # fprop np.multiply(self.input, self.W, self.input_times_W) np.add.accumulate(self.input_times_W[:, :-1], axis=1, out=self.acc_input_times_W[:, 1:]) self.acc_input_times_W[:, 0] = 0 self.acc_input_times_W += self.c[:, np.newaxis] mlnonlin.sigmoid(self.acc_input_times_W, self.hid) if self.untied_weights: np.multiply(self.hid, self.V, self.Whid) else: np.multiply(self.hid, self.W, self.Whid) mllin.sum_columns(self.Whid, self.recact) self.recact += self.b mlnonlin.sigmoid(self.recact, self.rec) # bprop np.subtract(self.rec, self.input, self.drec) self.drec *= self.alpha self.db[:] = self.drec if self.untied_weights: np.multiply(self.drec, self.hid, self.dV) np.multiply(self.drec, self.V, self.dhid) self.dW[:] = 0 else: np.multiply(self.drec, self.hid, self.dW) np.multiply(self.drec, self.W, self.dhid) mlnonlin.dsigmoid(self.hid, self.dhid, self.dacc_input_times_W) mllin.sum_rows(self.dacc_input_times_W, self.dc) np.add.accumulate(self.dacc_input_times_W[:, :0:-1], axis=1, out=self.dWenc[:, -2::-1]) self.dWenc[:, -1] = 0 self.dWenc *= self.input self.dW += self.dWenc self.dW *= self.learning_rate / ( 1. + self.decrease_constant * self.n_updates) self.db *= self.learning_rate / ( 1. + self.decrease_constant * self.n_updates) self.dc *= self.learning_rate / ( 1. + self.decrease_constant * self.n_updates) self.W -= self.dW self.b -= self.db self.c -= self.dc if self.untied_weights: self.dV *= self.learning_rate / ( 1. + self.decrease_constant * self.n_updates) self.V -= self.dV self.n_updates += 1
def update_learner(self,example): self.input[self.input_order] = example # fprop np.multiply(self.input,self.W,self.input_times_W) np.add.accumulate(self.input_times_W[:,:-1],axis=1,out=self.acc_input_times_W[:,1:]) self.acc_input_times_W[:,0] = 0 self.acc_input_times_W += self.c[:,np.newaxis] mlnonlin.sigmoid(self.acc_input_times_W,self.hid) if self.untied_weights: np.multiply(self.hid,self.V,self.Whid) else: np.multiply(self.hid,self.W,self.Whid) mllin.sum_columns(self.Whid,self.recact) self.recact += self.b mlnonlin.sigmoid(self.recact,self.rec) # bprop np.subtract(self.rec,self.input,self.drec) self.drec *= self.alpha self.db[:] = self.drec if self.untied_weights: np.multiply(self.drec,self.hid,self.dV) np.multiply(self.drec,self.V,self.dhid) self.dW[:] = 0 else: np.multiply(self.drec,self.hid,self.dW) np.multiply(self.drec,self.W,self.dhid) mlnonlin.dsigmoid(self.hid,self.dhid,self.dacc_input_times_W) mllin.sum_rows(self.dacc_input_times_W,self.dc) np.add.accumulate(self.dacc_input_times_W[:,:0:-1],axis=1,out=self.dWenc[:,-2::-1]) self.dWenc[:,-1] = 0 self.dWenc *= self.input self.dW += self.dWenc self.dW *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.db *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dc *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.W -= self.dW self.b -= self.db self.c -= self.dc if self.untied_weights: self.dV *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.V -= self.dV self.n_updates += 1
def update_learner(self, vec_input): self.vec_input[self.input_order] = vec_input #fprop self.fprop() #bprob, computing gradient of -log p(vec_input) np.subtract(self.vec_recProb,self.vec_input,self.vec_grad_bias_inp) np.multiply(self.vec_grad_bias_inp,self.mat_h,self.mat_grad_V) np.multiply(self.vec_grad_bias_inp,self.mat_V,self.mat_grad_h) mlnonlin.dsigmoid(self.mat_h,self.mat_grad_h,self.mat_grad_temp) mllin.sum_rows(self.mat_grad_temp,self.vec_grad_bias_h) np.add.accumulate(self.mat_grad_temp[:,:0:-1],axis=1,out=self.mat_grad_W[:,-2::-1]) self.mat_grad_W[:,-1] = 0 self.mat_grad_W *= self.vec_input #update self.vec_bias_inp -= self.learning_rate*self.vec_grad_bias_inp self.vec_bias_h -= self.learning_rate*self.vec_grad_bias_h self.mat_W -= self.learning_rate*self.mat_grad_W self.mat_V -= self.learning_rate*self.mat_grad_V
def update_learner(self, vec_input): self.vec_input[self.input_order] = vec_input #fprop self.fprop() #bprob, computing gradient of -log p(vec_input) np.subtract(self.vec_recProb, self.vec_input, self.vec_grad_bias_inp) np.multiply(self.vec_grad_bias_inp, self.mat_h, self.mat_grad_V) np.multiply(self.vec_grad_bias_inp, self.mat_V, self.mat_grad_h) mlnonlin.dsigmoid(self.mat_h, self.mat_grad_h, self.mat_grad_temp) mllin.sum_rows(self.mat_grad_temp, self.vec_grad_bias_h) np.add.accumulate(self.mat_grad_temp[:, :0:-1], axis=1, out=self.mat_grad_W[:, -2::-1]) self.mat_grad_W[:, -1] = 0 self.mat_grad_W *= self.vec_input #update self.vec_bias_inp -= self.learning_rate * self.vec_grad_bias_inp self.vec_bias_h -= self.learning_rate * self.vec_grad_bias_h self.mat_W -= self.learning_rate * self.mat_grad_W self.mat_V -= self.learning_rate * self.mat_grad_V