Esempio n. 1
0
def checkGradient_MiniBatch(dictionary, trees):

    model = RNTNModel(dictionary)
    theta_init = model.getTheta()

    # compute analytical gradient
    costObj = ComputeCostAndGradMiniBatch()
    cost, grad = costObj.compute(theta_init, dictionary, trees)

    eps = 1E-4
    numgrad = np.zeros(grad.shape)

    # compute numerical gradient
    for i in range(model.num_parameters):
        if i % 10 == 0:
            print '%d/%d' % (i, model.num_parameters)

        indicator = np.zeros(model.num_parameters)
        indicator[i] = 1

        theta_plus = theta_init + eps*indicator
        cost_plus, grad_plus = costObj.compute(theta_plus, dictionary, trees)

        theta_minus = theta_init - eps*indicator
        cost_minus, grad_minus = costObj.compute(theta_minus, dictionary, trees)

        numgrad[i] = (cost_plus - cost_minus)/(2*eps)

    print 'analytical gradient: ', grad
    print 'numerical gradient: ', numgrad

    normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print 'Norm difference: ', normdiff
    return normdiff
Esempio n. 2
0
def checkGradient_MiniBatch(dictionary, trees):

    model = RNTNModel(dictionary)
    theta_init = model.getTheta()

    # compute analytical gradient
    costObj = ComputeCostAndGradMiniBatch()
    cost, grad = costObj.compute(theta_init, dictionary, trees)

    eps = 1E-4
    numgrad = np.zeros(grad.shape)

    # compute numerical gradient
    for i in range(model.num_parameters):
        if i % 10 == 0:
            print '%d/%d' % (i, model.num_parameters)

        indicator = np.zeros(model.num_parameters)
        indicator[i] = 1

        theta_plus = theta_init + eps * indicator
        cost_plus, grad_plus = costObj.compute(theta_plus, dictionary, trees)

        theta_minus = theta_init - eps * indicator
        cost_minus, grad_minus = costObj.compute(theta_minus, dictionary,
                                                 trees)

        numgrad[i] = (cost_plus - cost_minus) / (2 * eps)

    print 'analytical gradient: ', grad
    print 'numerical gradient: ', numgrad

    normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print 'Norm difference: ', normdiff
    return normdiff
 def compute(self, theta, dictionary, trees_train, trees_dev=None):
     self.dictionary = dictionary
     self.trees_train = trees_train
     self.trees_dev = trees_dev
     model = RNTNModel(self.dictionary)
     if theta is not None:
         model.updateParamsGivenTheta(theta)
     cost = 0.0
     grad = np.zeros(model.num_parameters)
     self.loss = 0.0
     self.dJ_dWs = np.zeros(model.Ws.shape)
     self.dJ_dL = np.zeros(model.L.shape)
     self.dJ_dW = np.zeros(model.W.shape)
     self.dJ_dV = np.zeros(model.V.shape)
     tree_train_clone = []
     for tree in self.trees_train:
         cloned_tree = tree.clone()
         self.forwardPass(model, cloned_tree)
         tree_train_clone.append(cloned_tree)
     scaler = 1.0 / len(self.trees_train)
     cost = self.loss * scaler + self.calculateRegularizationCost(model)
     # Backprop on cloned trees
     for tree in tree_train_clone:
         dJ_dz_prop = np.zeros(model.dim)
         self.backwardPass(model, tree, dJ_dz_prop)
     grad = self.calculateTotalGradient(model, scaler)
     return cost, grad
Esempio n. 4
0
    def __init__(self, dictionary, X):
        self.costObj = ComputeCostAndGradMiniBatch()
        self.model = RNTNModel(dictionary)
        self.trees = X

        self.num_correct = 0.0
        self.num_wrong = 0.0

        self.num_correct_root = 0.0
        self.num_wrong_root = 0.0
Esempio n. 5
0
class Test:

    def __init__(self, dictionary, X):
        self.costObj = ComputeCostAndGradMiniBatch()
        self.model = RNTNModel(dictionary)
        self.trees = X

        self.num_correct = 0.0
        self.num_wrong = 0.0

        self.num_correct_root = 0.0
        self.num_wrong_root = 0.0

    def test(self, theta):
        self.model.updateParamsGivenTheta(theta)

        tree_clone = []
        for tree in self.trees:
            cloned_tree = tree.clone()
            self.costObj.forwardPass(self.model, cloned_tree)
            tree_clone.append(cloned_tree)

        # Traverse the tree and compare with labels
        for tree in tree_clone:
            self.evaluate_allnode(tree)
            self.evaluate_rootnode(tree)

        accuracy_allnode = self.num_correct/(self.num_correct + self.num_wrong)*100
        accuracy_rootnode = self.num_correct_root/(self.num_correct_root + self.num_wrong_root)*100
        return accuracy_allnode, accuracy_rootnode

    def evaluate_allnode(self, tree):
        if not tree.is_leaf():
            left_child = tree.subtrees[0]
            right_child = tree.subtrees[1]
            self.evaluate_allnode(left_child)
            self.evaluate_allnode(right_child)

        if int(tree.label) == np.argmax(tree.prediction):
            self.num_correct += 1
        else:
            self.num_wrong += 1

    def evaluate_rootnode(self, tree):
        if int(tree.label) == np.argmax(tree.prediction):
            self.num_correct_root += 1
        else:
            self.num_wrong_root += 1
Esempio n. 6
0
class Test:

    def __init__(self, dictionary, X):
        self.costObj = ComputeCostAndGradMiniBatch()
        self.model = RNTNModel(dictionary)
        self.trees = X

        self.num_correct = 0.0
        self.num_wrong = 0.0

        self.num_correct_root = 0.0
        self.num_wrong_root = 0.0

    def test(self, theta):
        self.model.updateParamsGivenTheta(theta)

        tree_clone = []
        for tree in self.trees:
            cloned_tree = tree.clone()
            self.costObj.forwardPass(self.model, cloned_tree)
            tree_clone.append(cloned_tree)

        # Traverse the tree and compare with labels
        for tree in tree_clone:
            self.evaluate_allnode(tree)
            self.evaluate_rootnode(tree)

        accuracy_allnode = self.num_correct/(self.num_correct + self.num_wrong)*100
        accuracy_rootnode = self.num_correct_root/(self.num_correct_root + self.num_wrong_root)*100
        return accuracy_allnode, accuracy_rootnode

    def evaluate_allnode(self, tree):
        if not tree.is_leaf():
            left_child = tree.subtrees[0]
            right_child = tree.subtrees[1]
            self.evaluate_allnode(left_child)
            self.evaluate_allnode(right_child)

        if int(tree.label) == np.argmax(tree.prediction):
            self.num_correct += 1
        else:
            self.num_wrong += 1

    def evaluate_rootnode(self, tree):
        if int(tree.label) == np.argmax(tree.prediction):
            self.num_correct_root += 1
        else:
            self.num_wrong_root += 1
    def compute(self, theta, dictionary, trees_train, trees_dev=None):

        # set variables
        self.dictionary = dictionary
        self.trees_train = trees_train
        self.trees_dev = trees_dev

        # Create a new model from scratch
        model = RNTNModel(self.dictionary)
        if theta is not None:
            model.updateParamsGivenTheta(theta)

        # Initialize all parameters
        cost = 0.0
        grad = np.zeros(model.num_parameters)

        self.loss = 0.0

        self.dJ_dWs = np.zeros(model.Ws.shape)
        self.dJ_dL = np.zeros(model.L.shape)
        self.dJ_dW = np.zeros(model.W.shape)
        self.dJ_dV = np.zeros(model.V.shape)

        # Copy tree and forward prop to populate node vectors
        #   return the cost of the network
        tree_train_clone = []
        for tree in self.trees_train:
            cloned_tree = tree.clone()
            self.forwardPass(model, cloned_tree)
            tree_train_clone.append(cloned_tree)

        # Scaler to take average over batch elements
        scaler = 1.0 / len(self.trees_train)
        # Compute cost: sum of the prediction loss and add regularization terms
        cost = self.loss*scaler + self.calculateRegularizationCost(model)

        # Backprop on cloned trees
        #   return the gradient of the network params
        for tree in tree_train_clone:
            dJ_dz_prop = np.zeros(model.dim)
            self.backwardPass(model, tree, dJ_dz_prop)

        # Compute full gradient: sum of gradient matrices and \Delta J_reg terms
        grad = self.calculateTotalGradient(model, scaler)
        return cost, grad
Esempio n. 8
0
    def __init__(self, dictionary, X_train, X_dev=None, X_test=None):
        self.X_train = X_train
        self.X_dev = X_dev
        self.X_test = X_test
        self.dictionary = dictionary
        self.costObj = ComputeCostAndGradMiniBatch()
        dumb_model = RNTNModel(dictionary)
        self.theta_init = dumb_model.getTheta()
        self.num_data = len(X_train)
        self.num_parameters = dumb_model.num_parameters

        # SGD params
        self.batch_size = dumb_model.batch_size
        self.num_batches = self.num_data / self.batch_size
        self.max_epochs = dumb_model.max_epochs
        self.learning_rate = dumb_model.learning_rate
        self.fudge = 1E-3
        self.epoch_save_freq = 5  # save every 5 epochs
Esempio n. 9
0
    def __init__(self, dictionary, X_train, X_dev=None, X_test=None):
        self.X_train = X_train
        self.X_dev = X_dev
        self.X_test = X_test
        self.dictionary = dictionary
        self.costObj = ComputeCostAndGradMiniBatch()
        dumb_model = RNTNModel(dictionary)
        self.theta_init = dumb_model.getTheta()
        self.num_data = len(X_train)
        self.num_parameters = dumb_model.num_parameters

        # SGD params
        self.batch_size = dumb_model.batch_size
        self.num_batches = self.num_data / self.batch_size
        self.max_epochs = dumb_model.max_epochs
        self.learning_rate = dumb_model.learning_rate
        self.fudge = 1E-3
        self.epoch_save_freq = 5  # save every 5 epochs
Esempio n. 10
0
    def __init__(self, dictionary, X):
        self.costObj = ComputeCostAndGradMiniBatch()
        self.model = RNTNModel(dictionary)
        self.trees = X

        self.num_correct = 0.0
        self.num_wrong = 0.0

        self.num_correct_root = 0.0
        self.num_wrong_root = 0.0
Esempio n. 11
0
def checkGradientClean(dictionary, trees):

    # Code adopted from UFLDL gradientChecker
    # http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization

    model = RNTNModel(dictionary)
    theta_init = model.getTheta()

    # compute analytical gradient
    costObj = ComputeCostAndGrad(dictionary, trees)
    cost, grad = costObj.compute(theta_init)

    eps = 1E-4
    numgrad = np.zeros(grad.shape)

    # compute numerical gradient
    for i in range(model.num_parameters):
        if i % 10 == 0:
            print '%d/%d' % (i, model.num_parameters)

        indicator = np.zeros(model.num_parameters)
        indicator[i] = 1

        theta_plus = theta_init + eps*indicator
        cost_plus, grad_plus = costObj.compute(theta_plus)

        theta_minus = theta_init - eps*indicator
        cost_minus, grad_minus = costObj.compute(theta_minus)

        numgrad[i] = (cost_plus - cost_minus)/(2*eps)

    print 'analytical gradient: ', grad
    print 'numerical gradient: ', numgrad

    normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print 'Norm difference: ', normdiff
    return normdiff
Esempio n. 12
0
def checkGradientClean(dictionary, trees):

    # Code adopted from UFLDL gradientChecker
    # http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization

    model = RNTNModel(dictionary)
    theta_init = model.getTheta()

    # compute analytical gradient
    costObj = ComputeCostAndGrad(dictionary, trees)
    cost, grad = costObj.compute(theta_init)

    eps = 1E-4
    numgrad = np.zeros(grad.shape)

    # compute numerical gradient
    for i in range(model.num_parameters):
        if i % 10 == 0:
            print '%d/%d' % (i, model.num_parameters)

        indicator = np.zeros(model.num_parameters)
        indicator[i] = 1

        theta_plus = theta_init + eps * indicator
        cost_plus, grad_plus = costObj.compute(theta_plus)

        theta_minus = theta_init - eps * indicator
        cost_minus, grad_minus = costObj.compute(theta_minus)

        numgrad[i] = (cost_plus - cost_minus) / (2 * eps)

    print 'analytical gradient: ', grad
    print 'numerical gradient: ', numgrad

    normdiff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print 'Norm difference: ', normdiff
    return normdiff
Esempio n. 13
0
 def __init__(self, dictionary, X):
     self.costObj = ComputeCostAndGrad(dictionary, X)
     dumb_model = RNTNModel(dictionary)
     self.theta_init = dumb_model.getTheta()
Esempio n. 14
0
 def __init__(self, dictionary, X):
     self.costObj = ComputeCostAndGrad(dictionary, X)
     dumb_model = RNTNModel(dictionary)
     self.theta_init = dumb_model.getTheta()