Example #1
0
def main():
    print("Getting features for valid papers from the database")
    if(os.path.exists("features_valid.obj")):
        with open("features_valid.obj", 'r') as loadfile:
            data = cPickle.load(loadfile)
    else:
        data = data_io.get_features_db("ValidPaper")
        with open("features_valid.obj", 'w') as dumpfile:
            cPickle.dump(data, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]
    
    #code for including keywords match feature
    print "adding addtional features..."
    import additional_features as af
    all_features = af.get_additional_features()    
    _, _, kw_features = all_features    
    for i in range(len(features)):
        features[i]+= tuple(kw_features[i][2:])
    
    featuresnp = np.array(features, dtype='int32')
        
#    featuresnp -= np.mean(featuresnp, axis=0)
#    featuresnp /= np.std(featuresnp, axis=0)
    
    
    print("Loading the classifier")
    classifier = data_io.load_model(prefix="forest_")

    print("Making predictions")
    predictions = classifier.predict_proba(featuresnp)[:,1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions, prefix="forest_")
Example #2
0
def main():
    print("Getting features for deleted papers from the database")
    if(os.path.exists("features_deleted.obj")):
        with open("features_deleted.obj", 'r') as loadfile:
            features_deleted = cPickle.load(loadfile)
    else:
        features_deleted = data_io.get_features_db("TrainDeleted")
        with open("features_deleted.obj", 'w') as dumpfile:
            cPickle.dump(features_deleted, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)

    print("Getting features for confirmed papers from the database")
    if(os.path.exists("features_confirmed.obj")):
        with open("features_confirmed.obj", 'r') as loadfile:
            features_conf = cPickle.load(loadfile)
    else:
        features_conf = data_io.get_features_db("TrainConfirmed")
        with open("features_confirmed.obj", 'w') as dumpfile:
            cPickle.dump(features_conf, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)

    features = [x[2:] for x in features_deleted + features_conf]
    target = [0 for x in range(len(features_deleted))] + [1 for x in range(len(features_conf))]
    
    
    #code for including keywords match feature
    print "adding addtional features..."
    import additional_features as af
    all_features = af.get_additional_features()    
    kw_deleted, kw_confirmed, _ = all_features
    kw_features = kw_deleted+kw_confirmed
    for i in range(len(features)):
        features[i]+= tuple(kw_features[i][2:])
 
 
    #Simple K-Fold cross validation. 10 folds.
    #cv = cross_validation.KFold(len(features), n_folds=5)
    cv = cross_validation.ShuffleSplit(len(features), n_iter=4, test_size=0.4, random_state=0)
    
    print("Training the Classifier")
    classifier = RandomForestClassifier(n_estimators=100, 
                                        verbose=2,
                                        n_jobs=1,
                                        min_samples_split=1,
                                        random_state=0, 
                                        compute_importances=True                                        
                                        )
 

    featuresnp = np.array(features, dtype='int32')
    targetnp = np.array(target, dtype='int32')
    
#    with open("wrong_predictions.txt", 'w' ) as wp:
#        class1count = 0; class2count =0; rpredictions = 0
#        for train, test in cv:
#            x_train = featuresnp[train];        y_train = targetnp[train]
#            x_test = featuresnp[test];         y_test = targetnp[test]
#            classifier.fit(x_train, y_train)
#            predictions = classifier.predict_proba(x_test)
#            pred_classes = classifier.predict(x_test)
#            for i in range(len(y_test)):
#            
#                if y_test[i] != pred_classes[i] :
#                    if(predictions[i,0] > 0.5 and predictions[i,0] < 0.6):
#                        class1count+=1;
#                    if(predictions[i,1] > 0.5 and predictions[i,1] < 0.6):
#                        class2count+=1;
#                    line = "feat: "+str(features[test[i]])+" ".join([ " a:",str(y_test[i])," p:", str(pred_classes[i])," proba:", str(predictions[i]), "\n"])
#                    wp.write(line)
#                else:
#                    if(predictions[i,0] > 0.4 and predictions[i,0] < 0.6):
#                        rpredictions+=1;
#                    
#        print "number of wrong predictions of deleted class: ", class1count
#        print "number of wrong predictions of confirmed class: ", class2count
#        print "number of right predictions with close probas", rpredictions
#        for train, test in cv:
#            print "total number of test examples: ", len(test)
        

#    classifier.fit(featuresnp, targetnp)
#    importances = classifier.feature_importances_
##    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
##                 axis=0)
#    indices = np.argsort(importances)[::-1]
#    
#    # Print the feature ranking
#    print("Feature ranking:")
#    
#    for f in range(len(indices)):
#        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
#
#    numFeatures = 15   
#    prunedFeatures = np.zeros(shape=(featuresnp.shape[0], numFeatures), dtype="int32")
#    for i in range(prunedFeatures.shape[0]):
#        for j, fi in enumerate(indices[0:numFeatures]):
#            prunedFeatures[i,j] = featuresnp[i, fi]  
            

#    featuresnp -= np.mean(featuresnp, axis=0)
#    featuresnp /= np.std(featuresnp, axis=0)

#       
    results = cross_validation.cross_val_score(classifier, X=featuresnp, y=targetnp, cv=cv, n_jobs=4, verbose=True)
    #print out the mean of the cross-validated results
    print "Results: ", results
    print "Results: " + str( np.array(results).mean())
Example #3
0
def main():
    print("Getting features for deleted papers from the database")
    if (os.path.exists("features_deleted.obj")):
        with open("features_deleted.obj", 'r') as loadfile:
            features_deleted = cPickle.load(loadfile)
    else:
        features_deleted = data_io.get_features_db("TrainDeleted")
        with open("features_deleted.obj", 'w') as dumpfile:
            cPickle.dump(features_deleted,
                         dumpfile,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    print("Getting features for confirmed papers from the database")
    if (os.path.exists("features_confirmed.obj")):
        with open("features_confirmed.obj", 'r') as loadfile:
            features_conf = cPickle.load(loadfile)
    else:
        features_conf = data_io.get_features_db("TrainConfirmed")
        with open("features_confirmed.obj", 'w') as dumpfile:
            cPickle.dump(features_conf,
                         dumpfile,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    features = [x[2:] for x in features_deleted + features_conf]
    target = [0 for x in range(len(features_deleted))
              ] + [1 for x in range(len(features_conf))]

    #code for including keywords match feature
    print "adding addtional features..."
    import additional_features as af
    all_features = af.get_additional_features()
    kw_deleted, kw_confirmed, _ = all_features
    kw_features = kw_deleted + kw_confirmed
    for i in range(len(features)):
        features[i] += tuple(kw_features[i][2:])

    #Simple K-Fold cross validation. 10 folds.
    #cv = cross_validation.KFold(len(features), n_folds=5)
    cv = cross_validation.ShuffleSplit(len(features),
                                       n_iter=4,
                                       test_size=0.4,
                                       random_state=0)

    print("Training the Classifier")
    classifier = RandomForestClassifier(n_estimators=100,
                                        verbose=2,
                                        n_jobs=1,
                                        min_samples_split=1,
                                        random_state=0,
                                        compute_importances=True)

    featuresnp = np.array(features, dtype='int32')
    targetnp = np.array(target, dtype='int32')

    #    with open("wrong_predictions.txt", 'w' ) as wp:
    #        class1count = 0; class2count =0; rpredictions = 0
    #        for train, test in cv:
    #            x_train = featuresnp[train];        y_train = targetnp[train]
    #            x_test = featuresnp[test];         y_test = targetnp[test]
    #            classifier.fit(x_train, y_train)
    #            predictions = classifier.predict_proba(x_test)
    #            pred_classes = classifier.predict(x_test)
    #            for i in range(len(y_test)):
    #
    #                if y_test[i] != pred_classes[i] :
    #                    if(predictions[i,0] > 0.5 and predictions[i,0] < 0.6):
    #                        class1count+=1;
    #                    if(predictions[i,1] > 0.5 and predictions[i,1] < 0.6):
    #                        class2count+=1;
    #                    line = "feat: "+str(features[test[i]])+" ".join([ " a:",str(y_test[i])," p:", str(pred_classes[i])," proba:", str(predictions[i]), "\n"])
    #                    wp.write(line)
    #                else:
    #                    if(predictions[i,0] > 0.4 and predictions[i,0] < 0.6):
    #                        rpredictions+=1;
    #
    #        print "number of wrong predictions of deleted class: ", class1count
    #        print "number of wrong predictions of confirmed class: ", class2count
    #        print "number of right predictions with close probas", rpredictions
    #        for train, test in cv:
    #            print "total number of test examples: ", len(test)

    #    classifier.fit(featuresnp, targetnp)
    #    importances = classifier.feature_importances_
    ##    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
    ##                 axis=0)
    #    indices = np.argsort(importances)[::-1]
    #
    #    # Print the feature ranking
    #    print("Feature ranking:")
    #
    #    for f in range(len(indices)):
    #        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
    #
    #    numFeatures = 15
    #    prunedFeatures = np.zeros(shape=(featuresnp.shape[0], numFeatures), dtype="int32")
    #    for i in range(prunedFeatures.shape[0]):
    #        for j, fi in enumerate(indices[0:numFeatures]):
    #            prunedFeatures[i,j] = featuresnp[i, fi]

    #    featuresnp -= np.mean(featuresnp, axis=0)
    #    featuresnp /= np.std(featuresnp, axis=0)

    #
    results = cross_validation.cross_val_score(classifier,
                                               X=featuresnp,
                                               y=targetnp,
                                               cv=cv,
                                               n_jobs=4,
                                               verbose=True)
    #print out the mean of the cross-validated results
    print "Results: ", results
    print "Results: " + str(np.array(results).mean())
Example #4
0
def test_mlp(learning_rate=0.017,
             L1_reg=0.0001,
             L2_reg=0.0003,
             n_epochs=10000,
             n_hidden=50):
    """
    
        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
        gradient
    
        :type L1_reg: float
        :param L1_reg: L1-norm's weight when added to the cost (see
        regularization)
    
        :type L2_reg: float
        :param L2_reg: L2-norm's weight when added to the cost (see
        regularization)
    
        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer
    
    
       """
    np.random.seed(17)
    print("Getting features for deleted papers from the database")
    features_deleted = None
    features_conf = None
    if (os.path.exists("features_deleted.obj")):
        with open("features_deleted.obj", 'r') as loadfile:
            features_deleted = cPickle.load(loadfile)
    else:
        features_deleted = data_io.get_features_db("TrainDeleted")
        with open("features_deleted.obj", 'w') as dumpfile:
            cPickle.dump(features_deleted,
                         dumpfile,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    print("Getting features for confirmed papers from the database")
    if (os.path.exists("features_confirmed.obj")):
        with open("features_confirmed.obj", 'r') as loadfile:
            features_conf = cPickle.load(loadfile)
    else:
        features_conf = data_io.get_features_db("TrainConfirmed")
        with open("features_confirmed.obj", 'w') as dumpfile:
            cPickle.dump(features_conf,
                         dumpfile,
                         protocol=cPickle.HIGHEST_PROTOCOL)

#        predictInts = []
#        for tup in features_valid:
#           a, b, c, d, e = tup
#           predictInts.append((int(a), int(b), int(c), int(d), int(e)))
#
#        predictsMat = np.ndarray(shape=(len(predictInts), 5), dtype='int32')
#        for i, tup in enumerate(predictInts):
#            a, b, c, d, e = tup
#            predictsMat[i, 0] = a;  predictsMat[i, 1] = b; predictsMat[i, 2] = c; predictsMat[i, 3] = d; predictsMat[i, 4] = e;
#        predict_set_x = theano.shared(features_validnp, borrow=True)

    features = [x[2:] for x in features_deleted + features_conf]
    target = [0 for x in range(len(features_deleted))
              ] + [1 for x in range(len(features_conf))]

    #code for including keywords match feature
    print "adding additional features..."
    import additional_features as af
    all_features = af.get_additional_features()
    kw_deleted, kw_confirmed, _ = all_features
    kw_features = kw_deleted + kw_confirmed
    for i in range(len(features)):
        features[i] += tuple(kw_features[i][2:])

    featuresnp = np.array(features, dtype='float64')
    targetnp = np.array(target, dtype='int32')

    featuresnp -= np.mean(featuresnp, axis=0)
    featuresnp /= np.std(featuresnp, axis=0)

    cv = cross_validation.ShuffleSplit(len(features),
                                       n_iter=1,
                                       test_size=0.25,
                                       random_state=0)
    for train, test in cv:
        train_set_x = theano.shared(featuresnp[train], borrow=True)
        test_set_x = theano.shared(featuresnp[test], borrow=True)
        train_set_y = theano.shared(targetnp[train], borrow=True)
        test_set_y = theano.shared(targetnp[test], borrow=True)

    batch_size = 20  # size of the minibatch

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    #        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data

    #        size = T.lscalar()
    index = T.lscalar()
    x = T.matrix(
        'x', dtype='float64'
    )  # sparse.csr_matrix('x', dtype='int32'); the data is presented as sparse matrix
    y = T.ivector('y')  # the labels are presented as 1D vector of

    # [int] labels

    rng = np.random.RandomState(113)

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=featuresnp.shape[1],
                     n_hidden=n_hidden,
                     n_out=2)

    cost = classifier.negative_log_likelihood(y) \
         + L1_reg * classifier.L1 \
         + L2_reg * classifier.L2_sqr

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    # specify how to update the parameters of the model as a dictionary
    updates = OrderedDict()
    # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
    # same length, zip generates a list C of same size, where each element
    # is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    for param, gparam in zip(classifier.params, gparams):
        updates[param] = param - learning_rate * gparam

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.0995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    best_params = None
    while True:

        try:

            epoch = epoch + 1
            training_cost = []
            for minibatch_index in xrange(n_train_batches):
                minibatch_avg_cost = train_model(minibatch_index)
                training_cost.append(minibatch_avg_cost)
                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    # compute zero-one loss on validation set
                    validation_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    this_validation_loss = np.mean(validation_losses)

                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        #improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss *  \
                               improvement_threshold:
                            patience = max(patience, iter * patience_increase)

                        best_validation_loss = this_validation_loss
                        best_iter = iter
                        best_params = classifier.params

            mean_cost = np.mean(training_cost)
            print "Epoch ", epoch, " training cost: ", mean_cost

        except KeyboardInterrupt:
            print "Training ended by user.\n"
            #                #update params one last time in case we interrupted the training in middle of updates
            #                for minibatch_index in xrange(n_train_batches):
            #                    train_model(minibatch_index)
            print "Best Validation loss:", best_validation_loss
            break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    print("Saving the mlp best params")
    data_io.save_model(best_params, prefix="theano_")
Example #5
0
def test_mlp(learning_rate=0.017, L1_reg=0.0001, L2_reg=0.0003, n_epochs=10000,
                          n_hidden=50):
        """
    
        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
        gradient
    
        :type L1_reg: float
        :param L1_reg: L1-norm's weight when added to the cost (see
        regularization)
    
        :type L2_reg: float
        :param L2_reg: L2-norm's weight when added to the cost (see
        regularization)
    
        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer
    
    
       """
        np.random.seed(17)  
        print("Getting features for deleted papers from the database")
        features_deleted = None; features_conf = None
        if(os.path.exists("features_deleted.obj")):
            with open("features_deleted.obj", 'r') as loadfile:
                features_deleted = cPickle.load(loadfile)
        else:
            features_deleted = data_io.get_features_db("TrainDeleted")
            with open("features_deleted.obj", 'w') as dumpfile:
                cPickle.dump(features_deleted, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
    
        print("Getting features for confirmed papers from the database")
        if(os.path.exists("features_confirmed.obj")):
            with open("features_confirmed.obj", 'r') as loadfile:
                features_conf = cPickle.load(loadfile)
        else:
            features_conf = data_io.get_features_db("TrainConfirmed")
            with open("features_confirmed.obj", 'w') as dumpfile:
                cPickle.dump(features_conf, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
                
                
       
        
#        predictInts = []
#        for tup in features_valid:
#           a, b, c, d, e = tup
#           predictInts.append((int(a), int(b), int(c), int(d), int(e)))
#      
#        predictsMat = np.ndarray(shape=(len(predictInts), 5), dtype='int32')
#        for i, tup in enumerate(predictInts):
#            a, b, c, d, e = tup
#            predictsMat[i, 0] = a;  predictsMat[i, 1] = b; predictsMat[i, 2] = c; predictsMat[i, 3] = d; predictsMat[i, 4] = e; 
#        predict_set_x = theano.shared(features_validnp, borrow=True)       
    
        features = [x[2:] for x in features_deleted + features_conf]
        target = [0 for x in range(len(features_deleted))] + [1 for x in range(len(features_conf))]
        
        #code for including keywords match feature
        print "adding additional features..."
        import additional_features as af
        all_features = af.get_additional_features()    
        kw_deleted, kw_confirmed, _ = all_features
        kw_features = kw_deleted+kw_confirmed
        for i in range(len(features)):
            features[i]+= tuple(kw_features[i][2:])
        
        
        
        featuresnp = np.array(features, dtype='float64')
        targetnp = np.array(target, dtype='int32')
    
        featuresnp -=np.mean(featuresnp, axis=0)
        featuresnp /=np.std(featuresnp, axis=0)
 
            
        cv = cross_validation.ShuffleSplit(len(features), n_iter=1, test_size=0.25, random_state=0)
        for train, test in cv:
            train_set_x = theano.shared(featuresnp[train], borrow=True)
            test_set_x = theano.shared(featuresnp[test], borrow=True)
            train_set_y = theano.shared(targetnp[train], borrow=True)
            test_set_y=theano.shared(targetnp[test], borrow=True)
 
    
        batch_size = 20    # size of the minibatch

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
#        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        n_test_batches  = test_set_x.get_value(borrow=True).shape[0]  / batch_size
    
        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the model'
    
        # allocate symbolic variables for the data
        
#        size = T.lscalar()  
        index = T.lscalar()
        x = T.matrix('x', dtype='float64')  # sparse.csr_matrix('x', dtype='int32'); the data is presented as sparse matrix
        y = T.ivector('y')  # the labels are presented as 1D vector of
    
                            # [int] labels
    
        rng = np.random.RandomState(113)
        
        # construct the MLP class
        classifier = MLP(rng=rng, input=x, n_in=featuresnp.shape[1],
                         n_hidden=n_hidden, n_out=2)
    
        cost = classifier.negative_log_likelihood(y) \
             + L1_reg * classifier.L1 \
             + L2_reg * classifier.L2_sqr
    

        test_model = theano.function(inputs=[index],
                outputs=classifier.errors(y),
                givens={
                    x: test_set_x[index * batch_size: (index + 1) * batch_size],
                    y: test_set_y[index * batch_size: (index + 1) * batch_size]})
 
        
    
        # compute the gradient of cost with respect to theta (sotred in params)
        # the resulting gradients will be stored in a list gparams
        gparams = []
        for param in classifier.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)
    
        # specify how to update the parameters of the model as a dictionary
        updates = OrderedDict()
        # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
        # same length, zip generates a list C of same size, where each element
        # is a pair formed from the two lists :
        #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
        for param, gparam in zip(classifier.params, gparams):
            updates[param] = param - learning_rate * gparam
 
        train_model = theano.function(inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})
    
        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        

        # early-stopping parameters
        patience = 1000000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
                               # found
        improvement_threshold = 0.0995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch
    
        best_params = None
        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
    
        epoch = 0
        done_looping = False
    
        best_params = None
        while True:

            try :
                
                epoch = epoch + 1
                training_cost = []
                for minibatch_index in xrange(n_train_batches):
                    minibatch_avg_cost = train_model(minibatch_index)
                    training_cost.append(minibatch_avg_cost)
                    # iteration number
                    iter = (epoch - 1) * n_train_batches + minibatch_index
        
                    if (iter + 1) % validation_frequency == 0:
                        # compute zero-one loss on validation set
                        validation_losses = [test_model(i) for i
                                             in xrange(n_test_batches)]
                        this_validation_loss = np.mean(validation_losses)
        
                        print('epoch %i, minibatch %i/%i, validation error %f %%' %
                             (epoch, minibatch_index + 1, n_train_batches,
                              this_validation_loss * 100.))
        
                        # if we got the best validation score until now
                        if this_validation_loss < best_validation_loss:
                            #improve patience if loss improvement is good enough
                            if this_validation_loss < best_validation_loss *  \
                                   improvement_threshold:
                                patience = max(patience, iter * patience_increase)
        
                            best_validation_loss = this_validation_loss
                            best_iter = iter
                            best_params = classifier.params
        
                mean_cost = np.mean(training_cost)
                print "Epoch ", epoch," training cost: ", mean_cost
                
            except KeyboardInterrupt:
                print "Training ended by user.\n"
#                #update params one last time in case we interrupted the training in middle of updates
#                for minibatch_index in xrange(n_train_batches):
#                    train_model(minibatch_index)
                print "Best Validation loss:", best_validation_loss                
                break 
                
       
    
        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
        
        print("Saving the mlp best params")
        data_io.save_model(best_params, prefix="theano_")