def applyDeltaWeights(self, dWList, dBList, updateOnlyLast, batchSize): if self.useRPROP: for i in reversed(xrange(self.NumberOfLayers - 1)): cp.rprop(self.Weights[i], dWList[i], self.DeltaWeightsOld[i], self.WeightsLearnRate[i], self.cfg.finetune_cost) cp.rprop(self.Bias[i], dBList[i], self.DeltaBiasOld[i], self.BiasLearnRate[i], self.cfg.finetune_cost) if updateOnlyLast: break else: for i in reversed(xrange(self.NumberOfLayers - 1)): W, B = self.Weights[i], self.Bias[i] dW, dWo = dWList[i], self.DeltaWeightsOld[i] dB, dBo = dBList[i], self.DeltaBiasOld[i] cp.apply_binary_functor(dW, dWo, cp.binary_functor.XPBY, self.cfg.finetune_momentum) cp.apply_binary_functor(dB, dBo, cp.binary_functor.XPBY, self.cfg.finetune_momentum) cp.learn_step_weight_decay( W, dW, self.cfg.finetune_learnrate / batchSize, self.cfg.finetune_cost) cp.learn_step_weight_decay( B, dB, self.cfg.finetune_learnrate / batchSize, self.cfg.finetune_cost) cp.copy(dWo, dW) cp.copy(dBo, dB) if updateOnlyLast: break
def weight_update(self, learnrate=0.01, decay=0.0): """Updates the weights and the bias using source activations and target deltas. @param learnrate how strongly the gradient influences the weights @param decay large values result in a regularization with to the squared weight value""" batch_size=self.source.activations.w h = cp.dev_matrix_cmf(self.weight.h, self.weight.w) cp.prod(h, self.target.deltas, self.source.activations, 'n', 't') cp.learn_step_weight_decay(self.weight, h, learnrate/batch_size, decay) h.dealloc() h = cp.get_filled_matrix(self.target.activations.h, 1, 0) cp.reduce_to_col(h.vec, self.target.deltas) cp.learn_step_weight_decay(self.bias, h, learnrate/batch_size, decay) h.dealloc()
def applyDeltaWeights(self, dWList,dBList, updateOnlyLast, batchSize): if self.useRPROP: for i in reversed(xrange(self.NumberOfLayers-1)): cp.rprop(self.Weights[i], dWList[i], self.DeltaWeightsOld[i], self.WeightsLearnRate[i], self.cfg.finetune_cost) cp.rprop(self.Bias[i], dBList[i], self.DeltaBiasOld[i], self.BiasLearnRate[i], self.cfg.finetune_cost) if updateOnlyLast: break else: for i in reversed(xrange(self.NumberOfLayers-1)): W, B = self.Weights[i], self.Bias[i] dW,dWo = dWList[i], self.DeltaWeightsOld[i] dB,dBo = dBList[i], self.DeltaBiasOld[i] cp.apply_binary_functor( dW, dWo, cp.binary_functor.XPBY, self.cfg.finetune_momentum) cp.apply_binary_functor( dB, dBo, cp.binary_functor.XPBY, self.cfg.finetune_momentum) cp.learn_step_weight_decay(W, dW, self.cfg.finetune_learnrate/batchSize, self.cfg.finetune_cost) cp.learn_step_weight_decay(B, dB, self.cfg.finetune_learnrate/batchSize, self.cfg.finetune_cost) cp.copy(dWo,dW) cp.copy(dBo,dB) if updateOnlyLast: break
def backward(self, learnrate=0.01, decay=0.0): """Backward pass, calculates the deltas of lower layer and updates the weights. @param learnrate how strongly the gradient influences the weights @param decay large values result in a regularization with to the squared weight value""" cp.prod(self.source.deltas, self.weight, self.target.deltas, 't', 'n') h = self.source.activations.copy() self.source.d_nonlinearity(h) self.source.deltas *= h h.dealloc() batch_size = self.source.activations.shape[1] dw = cp.prod(self.target.deltas, self.source.activations, 'n', 't') cp.learn_step_weight_decay(self.weight, dw, learnrate / batch_size, decay) dw.dealloc() db = cp.sum(self.target.deltas, 1) cp.learn_step_weight_decay(self.bias, db, learnrate / batch_size, decay) db.dealloc()
def updateStep(self, learnrate, cost): cp.learn_step_weight_decay(self.mat, self.w_tmp, learnrate, cost) # W + = learnrate(dW - cost*W) cp.learn_step_weight_decay(self.bias_lo, self.blo_tmp, learnrate, cost) # W + = learnrate(dW - cost*W) cp.learn_step_weight_decay(self.bias_hi, self.bhi_tmp, learnrate, cost) # W + = learnrate(dW - cost*W)