def checkGradient_MiniBatch(dictionary, trees): model = RNTNModel(dictionary) theta_init = model.getTheta() # compute analytical gradient costObj = ComputeCostAndGradMiniBatch() cost, grad = costObj.compute(theta_init, dictionary, trees) eps = 1E-4 numgrad = np.zeros(grad.shape) # compute numerical gradient for i in range(model.num_parameters): if i % 10 == 0: print '%d/%d' % (i, model.num_parameters) indicator = np.zeros(model.num_parameters) indicator[i] = 1 theta_plus = theta_init + eps*indicator cost_plus, grad_plus = costObj.compute(theta_plus, dictionary, trees) theta_minus = theta_init - eps*indicator cost_minus, grad_minus = costObj.compute(theta_minus, dictionary, trees) numgrad[i] = (cost_plus - cost_minus)/(2*eps) print 'analytical gradient: ', grad print 'numerical gradient: ', numgrad normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print 'Norm difference: ', normdiff return normdiff
def checkGradient_MiniBatch(dictionary, trees): model = RNTNModel(dictionary) theta_init = model.getTheta() # compute analytical gradient costObj = ComputeCostAndGradMiniBatch() cost, grad = costObj.compute(theta_init, dictionary, trees) eps = 1E-4 numgrad = np.zeros(grad.shape) # compute numerical gradient for i in range(model.num_parameters): if i % 10 == 0: print '%d/%d' % (i, model.num_parameters) indicator = np.zeros(model.num_parameters) indicator[i] = 1 theta_plus = theta_init + eps * indicator cost_plus, grad_plus = costObj.compute(theta_plus, dictionary, trees) theta_minus = theta_init - eps * indicator cost_minus, grad_minus = costObj.compute(theta_minus, dictionary, trees) numgrad[i] = (cost_plus - cost_minus) / (2 * eps) print 'analytical gradient: ', grad print 'numerical gradient: ', numgrad normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print 'Norm difference: ', normdiff return normdiff
def compute(self, theta, dictionary, trees_train, trees_dev=None): self.dictionary = dictionary self.trees_train = trees_train self.trees_dev = trees_dev model = RNTNModel(self.dictionary) if theta is not None: model.updateParamsGivenTheta(theta) cost = 0.0 grad = np.zeros(model.num_parameters) self.loss = 0.0 self.dJ_dWs = np.zeros(model.Ws.shape) self.dJ_dL = np.zeros(model.L.shape) self.dJ_dW = np.zeros(model.W.shape) self.dJ_dV = np.zeros(model.V.shape) tree_train_clone = [] for tree in self.trees_train: cloned_tree = tree.clone() self.forwardPass(model, cloned_tree) tree_train_clone.append(cloned_tree) scaler = 1.0 / len(self.trees_train) cost = self.loss * scaler + self.calculateRegularizationCost(model) # Backprop on cloned trees for tree in tree_train_clone: dJ_dz_prop = np.zeros(model.dim) self.backwardPass(model, tree, dJ_dz_prop) grad = self.calculateTotalGradient(model, scaler) return cost, grad
def __init__(self, dictionary, X): self.costObj = ComputeCostAndGradMiniBatch() self.model = RNTNModel(dictionary) self.trees = X self.num_correct = 0.0 self.num_wrong = 0.0 self.num_correct_root = 0.0 self.num_wrong_root = 0.0
class Test: def __init__(self, dictionary, X): self.costObj = ComputeCostAndGradMiniBatch() self.model = RNTNModel(dictionary) self.trees = X self.num_correct = 0.0 self.num_wrong = 0.0 self.num_correct_root = 0.0 self.num_wrong_root = 0.0 def test(self, theta): self.model.updateParamsGivenTheta(theta) tree_clone = [] for tree in self.trees: cloned_tree = tree.clone() self.costObj.forwardPass(self.model, cloned_tree) tree_clone.append(cloned_tree) # Traverse the tree and compare with labels for tree in tree_clone: self.evaluate_allnode(tree) self.evaluate_rootnode(tree) accuracy_allnode = self.num_correct/(self.num_correct + self.num_wrong)*100 accuracy_rootnode = self.num_correct_root/(self.num_correct_root + self.num_wrong_root)*100 return accuracy_allnode, accuracy_rootnode def evaluate_allnode(self, tree): if not tree.is_leaf(): left_child = tree.subtrees[0] right_child = tree.subtrees[1] self.evaluate_allnode(left_child) self.evaluate_allnode(right_child) if int(tree.label) == np.argmax(tree.prediction): self.num_correct += 1 else: self.num_wrong += 1 def evaluate_rootnode(self, tree): if int(tree.label) == np.argmax(tree.prediction): self.num_correct_root += 1 else: self.num_wrong_root += 1
def compute(self, theta, dictionary, trees_train, trees_dev=None): # set variables self.dictionary = dictionary self.trees_train = trees_train self.trees_dev = trees_dev # Create a new model from scratch model = RNTNModel(self.dictionary) if theta is not None: model.updateParamsGivenTheta(theta) # Initialize all parameters cost = 0.0 grad = np.zeros(model.num_parameters) self.loss = 0.0 self.dJ_dWs = np.zeros(model.Ws.shape) self.dJ_dL = np.zeros(model.L.shape) self.dJ_dW = np.zeros(model.W.shape) self.dJ_dV = np.zeros(model.V.shape) # Copy tree and forward prop to populate node vectors # return the cost of the network tree_train_clone = [] for tree in self.trees_train: cloned_tree = tree.clone() self.forwardPass(model, cloned_tree) tree_train_clone.append(cloned_tree) # Scaler to take average over batch elements scaler = 1.0 / len(self.trees_train) # Compute cost: sum of the prediction loss and add regularization terms cost = self.loss*scaler + self.calculateRegularizationCost(model) # Backprop on cloned trees # return the gradient of the network params for tree in tree_train_clone: dJ_dz_prop = np.zeros(model.dim) self.backwardPass(model, tree, dJ_dz_prop) # Compute full gradient: sum of gradient matrices and \Delta J_reg terms grad = self.calculateTotalGradient(model, scaler) return cost, grad
def __init__(self, dictionary, X_train, X_dev=None, X_test=None): self.X_train = X_train self.X_dev = X_dev self.X_test = X_test self.dictionary = dictionary self.costObj = ComputeCostAndGradMiniBatch() dumb_model = RNTNModel(dictionary) self.theta_init = dumb_model.getTheta() self.num_data = len(X_train) self.num_parameters = dumb_model.num_parameters # SGD params self.batch_size = dumb_model.batch_size self.num_batches = self.num_data / self.batch_size self.max_epochs = dumb_model.max_epochs self.learning_rate = dumb_model.learning_rate self.fudge = 1E-3 self.epoch_save_freq = 5 # save every 5 epochs
def checkGradientClean(dictionary, trees): # Code adopted from UFLDL gradientChecker # http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization model = RNTNModel(dictionary) theta_init = model.getTheta() # compute analytical gradient costObj = ComputeCostAndGrad(dictionary, trees) cost, grad = costObj.compute(theta_init) eps = 1E-4 numgrad = np.zeros(grad.shape) # compute numerical gradient for i in range(model.num_parameters): if i % 10 == 0: print '%d/%d' % (i, model.num_parameters) indicator = np.zeros(model.num_parameters) indicator[i] = 1 theta_plus = theta_init + eps*indicator cost_plus, grad_plus = costObj.compute(theta_plus) theta_minus = theta_init - eps*indicator cost_minus, grad_minus = costObj.compute(theta_minus) numgrad[i] = (cost_plus - cost_minus)/(2*eps) print 'analytical gradient: ', grad print 'numerical gradient: ', numgrad normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print 'Norm difference: ', normdiff return normdiff
def checkGradientClean(dictionary, trees): # Code adopted from UFLDL gradientChecker # http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization model = RNTNModel(dictionary) theta_init = model.getTheta() # compute analytical gradient costObj = ComputeCostAndGrad(dictionary, trees) cost, grad = costObj.compute(theta_init) eps = 1E-4 numgrad = np.zeros(grad.shape) # compute numerical gradient for i in range(model.num_parameters): if i % 10 == 0: print '%d/%d' % (i, model.num_parameters) indicator = np.zeros(model.num_parameters) indicator[i] = 1 theta_plus = theta_init + eps * indicator cost_plus, grad_plus = costObj.compute(theta_plus) theta_minus = theta_init - eps * indicator cost_minus, grad_minus = costObj.compute(theta_minus) numgrad[i] = (cost_plus - cost_minus) / (2 * eps) print 'analytical gradient: ', grad print 'numerical gradient: ', numgrad normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print 'Norm difference: ', normdiff return normdiff
def __init__(self, dictionary, X): self.costObj = ComputeCostAndGrad(dictionary, X) dumb_model = RNTNModel(dictionary) self.theta_init = dumb_model.getTheta()