def getTheta(self): if self.use_tensor: return utils.vectorizeParams( self.Ws, self.L, self.W, self.V) else: return utils.vectorizeParams( self.Ws, self.L, self.W)
def calculateTotalGradient(self, model): grad = np.zeros(model.num_parameters) self.dJ_dWs += len(self.trees_train) * model.lambda_Ws * model.Ws self.dJ_dL += len(self.trees_train) * model.lambda_L * model.L self.dJ_dW += len(self.trees_train) * model.lambda_W * model.W if model.use_tensor: self.dJ_dV += len(self.trees_train) * model.lambda_V * model.V grad = utils.vectorizeParams(self.dJ_dWs, self.dJ_dL, self.dJ_dW, self.dJ_dV) else: grad = utils.vectorizeParams(self.dJ_dWs, self.dJ_dL, self.dJ_dW) return grad
def calculateTotalGradient(self, model): grad = np.zeros(model.num_parameters) # add regularizer gradients self.dJ_dWs += len(self.trees_train) * model.lambda_Ws * model.Ws self.dJ_dL += len(self.trees_train) * model.lambda_L * model.L self.dJ_dW += len(self.trees_train) * model.lambda_W * model.W if model.use_tensor: self.dJ_dV += len(self.trees_train)*model.lambda_V * model.V grad = utils.vectorizeParams( self.dJ_dWs, self.dJ_dL, self.dJ_dW, self.dJ_dV) else: grad = utils.vectorizeParams( self.dJ_dWs, self.dJ_dL, self.dJ_dW) return grad
def calculateTotalGradient(self, model, scaler): grad = np.zeros(model.num_parameters) # average the gradient by dividing the minibatch size self.dJ_dWs *= scaler self.dJ_dL *= scaler self.dJ_dW *= scaler # add regularizer gradients self.dJ_dWs += model.lambda_Ws * model.Ws self.dJ_dL += model.lambda_L * model.L self.dJ_dW += model.lambda_W * model.W if model.use_tensor: self.dJ_dV *= scaler self.dJ_dV += model.lambda_V * model.V grad = utils.vectorizeParams( self.dJ_dWs, self.dJ_dL, self.dJ_dW, self.dJ_dV) else: grad = utils.vectorizeParams( self.dJ_dWs, self.dJ_dL, self.dJ_dW) return grad
def getTheta(self): if self.use_tensor: return utils.vectorizeParams(self.Ws, self.L, self.W, self.V) else: return utils.vectorizeParams(self.Ws, self.L, self.W)