Python getMinibatchesIdx Examples

Programming Language: Python

Namespace/Package Name: util.utils

Method/Function: getMinibatchesIdx

Examples at hotexamples.com: 6

Python getMinibatchesIdx - 6 examples found. These are the top rated real world Python examples of util.utils.getMinibatchesIdx extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def computeAccuracy(self, dataPremiseMat, dataHypothesisMat, dataTarget,
                        predictFunc):
        """
        Computes the accuracy for the given network on a certain dataset.
        """
        numExamples = len(dataTarget)
        correctPredictions = 0.

        # Arbitrary batch size set
        minibatches = getMinibatchesIdx(len(dataTarget), 1)
        pad = "right"

        for _, minibatch in minibatches:
            batchPremiseTensor, batchHypothesisTensor, batchLabels = \
                    convertDataToTrainingBatch(dataPremiseMat, self.numTimestepsPremise, dataHypothesisMat,
                                               self.numTimestepsHypothesis, pad, self.embeddingTable,
                                               dataTarget, minibatch)
            prediction = predictFunc(batchPremiseTensor, batchHypothesisTensor)
            batchGoldIdx = [ex.argmax(axis=0) for ex in batchLabels]

            correctPredictions += (
                np.array(prediction) == np.array(batchGoldIdx)).sum()

        return correctPredictions / numExamples

Example #2

Show file

File: network.py Project: BinbinBian/LSTM-NLI

    def computeAccuracy(self, dataPremiseMat, dataHypothesisMat, dataTarget,
                        predictFunc):
        """
        Computes the accuracy for the given network on a certain dataset.
        """
        numExamples = len(dataTarget)
        correctPredictions = 0.

        # Arbitrary batch size set
        minibatches = getMinibatchesIdx(len(dataTarget), 1)
        pad = "right"

        for _, minibatch in minibatches:
            batchPremiseTensor, batchHypothesisTensor, batchLabels = \
                    convertDataToTrainingBatch(dataPremiseMat, self.numTimestepsPremise, dataHypothesisMat,
                                               self.numTimestepsHypothesis, pad, self.embeddingTable,
                                               dataTarget, minibatch)
            prediction = predictFunc(batchPremiseTensor, batchHypothesisTensor)
            batchGoldIdx = [ex.argmax(axis=0) for ex in batchLabels]

            correctPredictions += (np.array(prediction) ==
                                   np.array(batchGoldIdx)).sum()

        return correctPredictions/numExamples

Example #3

Show file

File: sum_embeddings.py Project: BinbinBian/NNLI

def main(exp_name, embed_data, train_data, train_data_stats, val_data, val_data_stats,
         test_data, test_data_stats, log_path, batch_size, num_epochs,
         unroll_steps, learn_rate, num_dense, dense_dim, penalty, reg_coeff):
    """
    Main run function for training model.
    :param exp_name:
    :param embed_data:
    :param train_data:
    :param train_data_stats:
    :param val_data:
    :param val_data_stats:
    :param test_data:
    :param test_data_stats:
    :param log_path:
    :param batch_size:
    :param num_epochs:
    :param unroll_steps:
    :param learn_rate:
    :param num_dense: Number of dense fully connected layers to add after concatenation layer
    :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1
    :param penalty: Penalty to use for regularization
    :param reg_weight: Regularization coeff to use for each layer of network; may
                       want to support different coefficient for different layers
    :return:
    """
    # Set random seed for deterministic results
    np.random.seed(0)
    num_ex_to_train = 30

    # Load embedding table
    table = EmbeddingTable(embed_data)
    vocab_size = table.sizeVocab
    dim_embeddings = table.dimEmbeddings
    embeddings_mat = table.embeddings


    train_prem, train_hyp = generate_data(train_data, train_data_stats, "left", "right", table, seq_len=unroll_steps)
    val_prem, val_hyp = generate_data(val_data, val_data_stats, "left", "right", table, seq_len=unroll_steps)
    train_labels = convertLabelsToMat(train_data)
    val_labels = convertLabelsToMat(val_data)

    # To test for overfitting capabilities of model
    if num_ex_to_train > 0:
        val_prem = val_prem[0:num_ex_to_train]
        val_hyp = val_hyp[0:num_ex_to_train]
        val_labels = val_labels[0:num_ex_to_train]

    # Theano expressions for premise/hypothesis inputs to network
    x_p = T.imatrix()
    x_h = T.imatrix()
    target_values = T.fmatrix(name="target_output")


    # Embedding layer for premise
    l_in_prem = InputLayer((batch_size, unroll_steps))
    l_embed_prem = EmbeddingLayer(l_in_prem, input_size=vocab_size,
                        output_size=dim_embeddings, W=embeddings_mat)

    # Embedding layer for hypothesis
    l_in_hyp = InputLayer((batch_size, unroll_steps))
    l_embed_hyp = EmbeddingLayer(l_in_hyp, input_size=vocab_size,
                        output_size=dim_embeddings, W=embeddings_mat)


    # Ensure embedding matrix parameters are not trainable
    l_embed_hyp.params[l_embed_hyp.W].remove('trainable')
    l_embed_prem.params[l_embed_prem.W].remove('trainable')

    l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp)
    l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem)

    # Concatenate sentence embeddings for premise and hypothesis
    l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum])

    l_in = l_concat
    l_output = l_concat
    # Add 'num_dense' dense layers with tanh
    # top layer is softmax
    if num_dense > 1:
        for n in range(num_dense):
            if n == num_dense-1:
                l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax)
            else:
                l_in = DenseLayer(l_in, num_units=dense_dim, nonlinearity=lasagne.nonlinearities.tanh)
    else:
        l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax)

    network_output = get_output(l_output, {l_in_prem: x_p, l_in_hyp: x_h}) # Will have shape (batch_size, 3)
    f_dense_output = theano.function([x_p, x_h], network_output, on_unused_input='warn')

    # Compute cost
    if penalty == "l2":
        p_metric = l2
    elif penalty == "l1":
        p_metric = l1

    layers = lasagne.layers.get_all_layers(l_output)
    layer_dict = {l: reg_coeff for l in layers}
    reg_cost = reg_coeff * regularize_layer_params_weighted(layer_dict, p_metric)
    cost = T.mean(T.nnet.categorical_crossentropy(network_output, target_values).mean()) + reg_cost
    compute_cost = theano.function([x_p, x_h, target_values], cost)

    # Compute accuracy
    accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1), T.argmax(target_values, axis=-1)),
                      dtype=theano.config.floatX)
    compute_accuracy = theano.function([x_p, x_h, target_values], accuracy)

    label_output = T.argmax(network_output, axis=-1)
    predict = theano.function([x_p, x_h], label_output)

    # Define update/train functions
    all_params = lasagne.layers.get_all_params(l_output, trainable=True)
    updates = lasagne.updates.rmsprop(cost, all_params, learn_rate)
    train = theano.function([x_p, x_h, target_values], cost, updates=updates)

    # TODO: Augment embedding layer to allow for masking inputs

    stats = Stats(exp_name)
    acc_num = 10

    #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size)
    minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size)
    print("Training ...")
    try:
        total_num_ex = 0
        for epoch in xrange(num_epochs):
            for _, minibatch in minibatches:
                total_num_ex += len(minibatch)
                stats.log("Processed {0} total examples in epoch {1}".format(str(total_num_ex),
                                                                          str(epoch)))

                #prem_batch = val_prem[minibatch]
                #hyp_batch = val_hyp[minibatch]
                #labels_batch = val_labels[minibatch]

                prem_batch = train_prem[minibatch]
                hyp_batch = train_hyp[minibatch]
                labels_batch = train_labels[minibatch]

                train(prem_batch, hyp_batch, labels_batch)
                cost_val = compute_cost(prem_batch, hyp_batch, labels_batch)

                stats.recordCost(total_num_ex, cost_val)
                # Periodically compute and log train/dev accuracy
                if total_num_ex%(acc_num*batch_size) == 0:
                    train_acc = compute_accuracy(train_prem, train_hyp, train_labels)
                    dev_acc = compute_accuracy(val_prem, val_hyp, val_labels)
                    stats.recordAcc(total_num_ex, train_acc, dataset="train")
                    stats.recordAcc(total_num_ex, dev_acc, dataset="dev")

    except KeyboardInterrupt:
        pass

Example #4

Show file

File: lstmp2h.py Project: BinbinBian/LSTM-NLI

    def train(self, numEpochs=1, batchSize=5, learnRateVal=0.1, numExamplesToTrain=-1, gradMax=3.,
                L2regularization=0.0, dropoutRate=0.0, sentenceAttention=False,
                wordwiseAttention=False):
        """
        Takes care of training model, including propagation of errors and updating of
        parameters.
        """
        expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \
                       "wordAttn_{5}".format(str(numEpochs), str(learnRateVal),
                                             str(L2regularization), str(dropoutRate),
                                             str(sentenceAttention), str(wordwiseAttention))
        self.configs.update(locals())
        trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
                                  self.trainData, self.trainDataStats)
        trainGoldLabel = convertLabelsToMat(self.trainData)

        valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
                                self.valData, self.valDataStats)
        valGoldLabel = convertLabelsToMat(self.valData)

        # If you want to train on less than full dataset
        if numExamplesToTrain > 0:
            valPremiseIdxMat = valPremiseIdxMat[:, range(numExamplesToTrain), :]
            valHypothesisIdxMat = valHypothesisIdxMat[:, range(numExamplesToTrain), :]
            valGoldLabel = valGoldLabel[range(numExamplesToTrain)]


        #Whether zero-padded on left or right
        pad = "right"

        # Get full premise/hypothesis tensors
        # batchPremiseTensor, batchHypothesisTensor, batchLabels = \
        #             convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
        #                                        self.numTimestepsHypothesis, "right", self.embeddingTable,
        #                                        valGoldLabel, range(len(valGoldLabel)))
        #sharedValPremise = theano.shared(batchPremiseTensor)
        #sharedValHypothesis = theano.shared(batchHypothesisTensor)
        #sharedValLabels = theano.shared(batchLabels)


        inputPremise = T.ftensor3(name="inputPremise")
        inputHypothesis = T.ftensor3(name="inputHypothesis")
        yTarget = T.fmatrix(name="yTarget")
        learnRate = T.scalar(name="learnRate", dtype='float32')


        fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \
            fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise,
                                            inputHypothesis, yTarget, learnRate, gradMax,
                                            L2regularization, dropoutRate, sentenceAttention,
                                            wordwiseAttention, batchSize)

        totalExamples = 0
        stats = Stats(self.logger, expName)

        # Training
        self.logger.Log("Model configs: {0}".format(self.configs))
        self.logger.Log("Starting training with {0} epochs, {1} batchSize,"
                " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate".format(
            numEpochs, batchSize, learnRateVal, L2regularization, dropoutRate))


        predictFunc = self.predictFunc(inputPremise, inputHypothesis, dropoutRate)

        for epoch in xrange(numEpochs):
            self.logger.Log("Epoch number: %d" %(epoch))

            if numExamplesToTrain > 0:
                minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize)
            else:
                minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize)

            numExamples = 0
            for _, minibatch in minibatches:
                self.dropoutMode.set_value(1.0)
                numExamples += len(minibatch)
                totalExamples += len(minibatch)

                self.logger.Log("Processed {0} examples in current epoch".
                                format(str(numExamples)))

                batchPremiseTensor, batchHypothesisTensor, batchLabels = \
                    convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
                                               self.numTimestepsHypothesis, pad, self.embeddingTable,
                                               valGoldLabel, minibatch)

                gradHypothesisOut = fGradSharedHypothesis(batchPremiseTensor,
                                       batchHypothesisTensor, batchLabels)
                gradPremiseOut = fGradSharedPremise(batchPremiseTensor,
                                       batchHypothesisTensor, batchLabels)
                fUpdatePremise(learnRateVal)
                fUpdateHypothesis(learnRateVal)

                predictLabels = self.predict(batchPremiseTensor, batchHypothesisTensor, predictFunc)
                #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels)))


                cost = costFn(batchPremiseTensor, batchHypothesisTensor, batchLabels)
                stats.recordCost(totalExamples, cost)

                # Note: Big time sink happens here
                if totalExamples%(100) == 0:
                    # TODO: Don't compute accuracy of dev set
                    self.dropoutMode.set_value(0.0)
                    devAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                                       valHypothesisIdxMat, valGoldLabel, predictFunc)
                    stats.recordAcc(totalExamples, devAccuracy, "dev")


        stats.recordFinalTrainingTime(totalExamples)

        # Save model to disk
        self.logger.Log("Saving model...")
        self.extractParams()
        configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format(str(batchSize),
                                            str(numEpochs), str(learnRateVal),
                                            str(self.dimHidden), str(self.dimInput))
        self.saveModel(currDir + "/savedmodels/basicLSTM_"+configString+".npz")
        self.logger.Log("Model saved!")

        # Set dropout to 0. again for testing
        self.dropoutMode.set_value(0.0)

        #Train Accuracy
        # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat,
        #                              trainHypothesisIdxMat, trainGoldLabel, predictFunc)
        # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy))

        # Val Accuracy
        valAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                    valHypothesisIdxMat, valGoldLabel, predictFunc)
        # TODO: change -1 for training acc to actual value when I enable train computation
        stats.recordFinalStats(totalExamples, -1, valAccuracy)

Example #5

Show file

def main(exp_name, embed_data, train_data, train_data_stats, val_data,
         val_data_stats, test_data, test_data_stats, log_path, batch_size,
         num_epochs, unroll_steps, learn_rate, num_dense, dense_dim, penalty,
         reg_coeff):
    """
    Main run function for training model.
    :param exp_name:
    :param embed_data:
    :param train_data:
    :param train_data_stats:
    :param val_data:
    :param val_data_stats:
    :param test_data:
    :param test_data_stats:
    :param log_path:
    :param batch_size:
    :param num_epochs:
    :param unroll_steps:
    :param learn_rate:
    :param num_dense: Number of dense fully connected layers to add after concatenation layer
    :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1
    :param penalty: Penalty to use for regularization
    :param reg_weight: Regularization coeff to use for each layer of network; may
                       want to support different coefficient for different layers
    :return:
    """
    # Set random seed for deterministic results
    np.random.seed(0)
    num_ex_to_train = 30

    # Load embedding table
    table = EmbeddingTable(embed_data)
    vocab_size = table.sizeVocab
    dim_embeddings = table.dimEmbeddings
    embeddings_mat = table.embeddings

    train_prem, train_hyp = generate_data(train_data,
                                          train_data_stats,
                                          "left",
                                          "right",
                                          table,
                                          seq_len=unroll_steps)
    val_prem, val_hyp = generate_data(val_data,
                                      val_data_stats,
                                      "left",
                                      "right",
                                      table,
                                      seq_len=unroll_steps)
    train_labels = convertLabelsToMat(train_data)
    val_labels = convertLabelsToMat(val_data)

    # To test for overfitting capabilities of model
    if num_ex_to_train > 0:
        val_prem = val_prem[0:num_ex_to_train]
        val_hyp = val_hyp[0:num_ex_to_train]
        val_labels = val_labels[0:num_ex_to_train]

    # Theano expressions for premise/hypothesis inputs to network
    x_p = T.imatrix()
    x_h = T.imatrix()
    target_values = T.fmatrix(name="target_output")

    # Embedding layer for premise
    l_in_prem = InputLayer((batch_size, unroll_steps))
    l_embed_prem = EmbeddingLayer(l_in_prem,
                                  input_size=vocab_size,
                                  output_size=dim_embeddings,
                                  W=embeddings_mat)

    # Embedding layer for hypothesis
    l_in_hyp = InputLayer((batch_size, unroll_steps))
    l_embed_hyp = EmbeddingLayer(l_in_hyp,
                                 input_size=vocab_size,
                                 output_size=dim_embeddings,
                                 W=embeddings_mat)

    # Ensure embedding matrix parameters are not trainable
    l_embed_hyp.params[l_embed_hyp.W].remove('trainable')
    l_embed_prem.params[l_embed_prem.W].remove('trainable')

    l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp)
    l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem)

    # Concatenate sentence embeddings for premise and hypothesis
    l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum])

    l_in = l_concat
    l_output = l_concat
    # Add 'num_dense' dense layers with tanh
    # top layer is softmax
    if num_dense > 1:
        for n in range(num_dense):
            if n == num_dense - 1:
                l_output = DenseLayer(
                    l_in,
                    num_units=NUM_DENSE_UNITS,
                    nonlinearity=lasagne.nonlinearities.softmax)
            else:
                l_in = DenseLayer(l_in,
                                  num_units=dense_dim,
                                  nonlinearity=lasagne.nonlinearities.tanh)
    else:
        l_output = DenseLayer(l_in,
                              num_units=NUM_DENSE_UNITS,
                              nonlinearity=lasagne.nonlinearities.softmax)

    network_output = get_output(l_output, {
        l_in_prem: x_p,
        l_in_hyp: x_h
    })  # Will have shape (batch_size, 3)
    f_dense_output = theano.function([x_p, x_h],
                                     network_output,
                                     on_unused_input='warn')

    # Compute cost
    if penalty == "l2":
        p_metric = l2
    elif penalty == "l1":
        p_metric = l1

    layers = lasagne.layers.get_all_layers(l_output)
    layer_dict = {l: reg_coeff for l in layers}
    reg_cost = reg_coeff * regularize_layer_params_weighted(
        layer_dict, p_metric)
    cost = T.mean(
        T.nnet.categorical_crossentropy(network_output,
                                        target_values).mean()) + reg_cost
    compute_cost = theano.function([x_p, x_h, target_values], cost)

    # Compute accuracy
    accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1),
                           T.argmax(target_values, axis=-1)),
                      dtype=theano.config.floatX)
    compute_accuracy = theano.function([x_p, x_h, target_values], accuracy)

    label_output = T.argmax(network_output, axis=-1)
    predict = theano.function([x_p, x_h], label_output)

    # Define update/train functions
    all_params = lasagne.layers.get_all_params(l_output, trainable=True)
    updates = lasagne.updates.rmsprop(cost, all_params, learn_rate)
    train = theano.function([x_p, x_h, target_values], cost, updates=updates)

    # TODO: Augment embedding layer to allow for masking inputs

    stats = Stats(exp_name)
    acc_num = 10

    #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size)
    minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size)
    print("Training ...")
    try:
        total_num_ex = 0
        for epoch in xrange(num_epochs):
            for _, minibatch in minibatches:
                total_num_ex += len(minibatch)
                stats.log("Processed {0} total examples in epoch {1}".format(
                    str(total_num_ex), str(epoch)))

                #prem_batch = val_prem[minibatch]
                #hyp_batch = val_hyp[minibatch]
                #labels_batch = val_labels[minibatch]

                prem_batch = train_prem[minibatch]
                hyp_batch = train_hyp[minibatch]
                labels_batch = train_labels[minibatch]

                train(prem_batch, hyp_batch, labels_batch)
                cost_val = compute_cost(prem_batch, hyp_batch, labels_batch)

                stats.recordCost(total_num_ex, cost_val)
                # Periodically compute and log train/dev accuracy
                if total_num_ex % (acc_num * batch_size) == 0:
                    train_acc = compute_accuracy(train_prem, train_hyp,
                                                 train_labels)
                    dev_acc = compute_accuracy(val_prem, val_hyp, val_labels)
                    stats.recordAcc(total_num_ex, train_acc, dataset="train")
                    stats.recordAcc(total_num_ex, dev_acc, dataset="dev")

    except KeyboardInterrupt:
        pass

Example #6

Show file

File: lstmp2h.py Project: BinbinBian/LSTM-NLI

    def train(self,
              numEpochs=1,
              batchSize=5,
              learnRateVal=0.1,
              numExamplesToTrain=-1,
              gradMax=3.,
              L2regularization=0.0,
              dropoutRate=0.0,
              sentenceAttention=False,
              wordwiseAttention=False):
        """
        Takes care of training model, including propagation of errors and updating of
        parameters.
        """
        expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \
                       "wordAttn_{5}".format(str(numEpochs), str(learnRateVal),
                                             str(L2regularization), str(dropoutRate),
                                             str(sentenceAttention), str(wordwiseAttention))
        self.configs.update(locals())
        trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
            self.trainData, self.trainDataStats)
        trainGoldLabel = convertLabelsToMat(self.trainData)

        valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
            self.valData, self.valDataStats)
        valGoldLabel = convertLabelsToMat(self.valData)

        # If you want to train on less than full dataset
        if numExamplesToTrain > 0:
            valPremiseIdxMat = valPremiseIdxMat[:,
                                                range(numExamplesToTrain), :]
            valHypothesisIdxMat = valHypothesisIdxMat[:,
                                                      range(numExamplesToTrain
                                                            ), :]
            valGoldLabel = valGoldLabel[range(numExamplesToTrain)]

        #Whether zero-padded on left or right
        pad = "right"

        # Get full premise/hypothesis tensors
        # batchPremiseTensor, batchHypothesisTensor, batchLabels = \
        #             convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
        #                                        self.numTimestepsHypothesis, "right", self.embeddingTable,
        #                                        valGoldLabel, range(len(valGoldLabel)))
        #sharedValPremise = theano.shared(batchPremiseTensor)
        #sharedValHypothesis = theano.shared(batchHypothesisTensor)
        #sharedValLabels = theano.shared(batchLabels)

        inputPremise = T.ftensor3(name="inputPremise")
        inputHypothesis = T.ftensor3(name="inputHypothesis")
        yTarget = T.fmatrix(name="yTarget")
        learnRate = T.scalar(name="learnRate", dtype='float32')


        fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \
            fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise,
                                            inputHypothesis, yTarget, learnRate, gradMax,
                                            L2regularization, dropoutRate, sentenceAttention,
                                            wordwiseAttention, batchSize)

        totalExamples = 0
        stats = Stats(self.logger, expName)

        # Training
        self.logger.Log("Model configs: {0}".format(self.configs))
        self.logger.Log(
            "Starting training with {0} epochs, {1} batchSize,"
            " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate"
            .format(numEpochs, batchSize, learnRateVal, L2regularization,
                    dropoutRate))

        predictFunc = self.predictFunc(inputPremise, inputHypothesis,
                                       dropoutRate)

        for epoch in xrange(numEpochs):
            self.logger.Log("Epoch number: %d" % (epoch))

            if numExamplesToTrain > 0:
                minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize)
            else:
                minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize)

            numExamples = 0
            for _, minibatch in minibatches:
                self.dropoutMode.set_value(1.0)
                numExamples += len(minibatch)
                totalExamples += len(minibatch)

                self.logger.Log(
                    "Processed {0} examples in current epoch".format(
                        str(numExamples)))

                batchPremiseTensor, batchHypothesisTensor, batchLabels = \
                    convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
                                               self.numTimestepsHypothesis, pad, self.embeddingTable,
                                               valGoldLabel, minibatch)

                gradHypothesisOut = fGradSharedHypothesis(
                    batchPremiseTensor, batchHypothesisTensor, batchLabels)
                gradPremiseOut = fGradSharedPremise(batchPremiseTensor,
                                                    batchHypothesisTensor,
                                                    batchLabels)
                fUpdatePremise(learnRateVal)
                fUpdateHypothesis(learnRateVal)

                predictLabels = self.predict(batchPremiseTensor,
                                             batchHypothesisTensor,
                                             predictFunc)
                #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels)))

                cost = costFn(batchPremiseTensor, batchHypothesisTensor,
                              batchLabels)
                stats.recordCost(totalExamples, cost)

                # Note: Big time sink happens here
                if totalExamples % (100) == 0:
                    # TODO: Don't compute accuracy of dev set
                    self.dropoutMode.set_value(0.0)
                    devAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                                       valHypothesisIdxMat,
                                                       valGoldLabel,
                                                       predictFunc)
                    stats.recordAcc(totalExamples, devAccuracy, "dev")

        stats.recordFinalTrainingTime(totalExamples)

        # Save model to disk
        self.logger.Log("Saving model...")
        self.extractParams()
        configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format(
            str(batchSize), str(numEpochs), str(learnRateVal),
            str(self.dimHidden), str(self.dimInput))
        self.saveModel(currDir + "/savedmodels/basicLSTM_" + configString +
                       ".npz")
        self.logger.Log("Model saved!")

        # Set dropout to 0. again for testing
        self.dropoutMode.set_value(0.0)

        #Train Accuracy
        # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat,
        #                              trainHypothesisIdxMat, trainGoldLabel, predictFunc)
        # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy))

        # Val Accuracy
        valAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                           valHypothesisIdxMat, valGoldLabel,
                                           predictFunc)
        # TODO: change -1 for training acc to actual value when I enable train computation
        stats.recordFinalStats(totalExamples, -1, valAccuracy)