Ejemplo n.º 1
0
def main():
    
    description = "An integrated sklearn API to run training and prediction. Simple example: ./train_pred.py -i TRAIN_INPUT -t TEST_INPUT -m svm -o PREDICT"
    parser = load_parser(description)
    parser.add_argument('-t' , '--test'         , dest='test_filename' , required=True  , help='Specify the test file path')
    parser.add_argument('-o' , '--output'       , dest='output_filename'                , help='Specify the output predict file path [optional]')
    parser.add_argument('-om', '--output-model' , dest='model_filename'                 , help='Specify the output model file path [optional]')
    parser.add_argument('-op', '--output-prob'  , dest='prob_filename'                  , help='Specify the output probability file path [optional]')
    opts = parser.parse_args(sys.argv[1:]) 

    # pre-check options before loading data
    opts.model = opts.model.upper()
    opts.kernel = opts.kernel.lower()
    opts.base_estimator = opts.base_estimator.upper()
    check_options(opts) 
    

    # Loading training data
    print "Loading %s ..." %opts.train_filename
    x_train, y_train = load_svmlight_file(opts.train_filename)
    x_train = x_train.todense()
    (N, D) = x_train.shape
    print "training data dimension = (%d, %d)" %(N, D)


    # Loading testing data
    print "Loading %s ..." %opts.test_filename
    x_test, y_test = load_svmlight_file(opts.test_filename)
    x_test = x_test.todense()
    (N, D) = x_test.shape
    print "testing data dimension = (%d, %d)" %(N, D)


    # feature normalization
    if( opts.normalized ):      
        if( opts.normalized == 1 ):
            scaler_filename = opts.train_filename + '.scaler-11.pkl'
        elif( opts.normalized == 2 ):
            scaler_filename = opts.train_filename + '.scaler-01.pkl'
        elif( opts.normalized == 3 ):
            scaler_filename = opts.train_filename + '.scaler-std.pkl'
        else:
            print "Error! Unknown normalization method (%d)!" %opts.normalized
            print "Choice: 1 for [-1, 1], 2 for [0, 1], 3 for standard normalization"
            traceback.print_stack()
            sys.exit(1)

        scaler = load_scaler(scaler_filename, x_train, opts.normalized)
        x_train = scaler.transform(x_train)
        x_test  = scaler.transform(x_test)

    
    # dimension selection
    if( opts.dim != None ):
        opts.dim = int(opts.dim)
        if( opts.dim >= D ):
            print "Warning! Select dimension (%d) >= max data dimension (%d), use original dimension." %(opts.dim, D)
            opts.dim = D
        else:
            x_train = x_train[:, :opts.dim]
            x_test  = x_test[:, :opts.dim]
            (N, D) = x_train.shape
            print "Using first %d feature ..." %(opts.dim)

    
    if( opts.prob_filename != None ):
        outputProb = True
    else:
        outputProb = False


    # Train and predict
    print "Training and Predicting ..."

    if opts.model == 'SVM':

        arg = {'kernel': opts.kernel, 'probability': outputProb}
        
        if( opts.C == None ):
            arg['C'] = 1.0 
        else:
            arg['C'] = float(opts.C)
        
        if( opts.gamma == None ):
            arg['gamma'] = 1.0/D
        else:
            arg['gamma'] = float(opts.gamma)

############################################################
##                        RBF-SVM                         ##
############################################################
        if( opts.kernel == 'rbf' ):
            
            print 'Run %s-SVM with C = %f, gamma = %f' %(opts.kernel, arg['C'], arg['gamma'])
            clf = train(SVC, arg, x_train, y_train, opts.model_filename)
            acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
            print 'acc = %f' % acc

############################################################
##                    polynomial-SVM                      ##
############################################################
        elif( opts.kernel == 'poly' ):
            
            if( opts.coef0 == None ):
                arg['coef0'] = 0
            else:
                arg['coef0'] = float(opts.coef0)
            
            if( opts.degree == None ):
                arg['degree'] = 3
            else:
                arg['degree'] = int(opts.degree)
            
            
            print 'Run %s-SVM with C = %f, coef0 = %f, gamma = %f, degree = %d' %(opts.kernel, arg['C'], arg['coef0'], arg['gamma'], arg['degree'])
            clf = train(SVC, arg, x_train, y_train, opts.model_filename)
            acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
            print 'acc = %f' % acc
    
############################################################
##                    sigmoid-SVM                         ##
############################################################
        elif( opts.kernel == 'sigmoid' ):
            
            if( opts.coef0 == None ):
                arg['coef0'] = 0
            else:
                arg['coef0'] = float(opts.coef0)
            
            print 'Run %s-SVM with C = %f, coef0 = %f, gamma = %f' %(opts.kernel, arg['C'], arg['coef0'], arg['gamma'])
            clf = train(SVC, arg, x_train, y_train, opts.model_filename)
            acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
            print 'acc = %f' % acc

        else:
            print "Error! Unknown kernel %s!" %opts.kernel
            traceback.print_stack()
            sys.exit(1)

############################################################
##                     linear-SVM                         ##
############################################################
    elif opts.model == 'LINEARSVM':

        if( outputProb == True ):
            print "Warning! Probability output is not supported in LinearSVM!"
            outputProb = False

        arg = {}
        if( opts.penalty == None ):
            arg['penalty'] = 'l2'
        else:
            arg['penalty'] = opts.penalty

        if( opts.penalty == 'l1' ):
            arg['dual'] = False
        
        if( opts.loss == None ):
            arg['loss'] = 'l2'
        else:
            arg['loss'] = opts.loss

        if( opts.C == None ):  # run all C
            arg['C'] = 1.0 / D
        else:
            arg['C'] = float(opts.C)

        print 'Run Linear_SVM with C = %f, penalty = %s, loss = %s' %(arg['C'], arg['penalty'], arg['loss'])
        clf = train(LinearSVC, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename)
        print "acc = %f" %acc

############################################################
##                Linear model with SGD                   ##
############################################################
    elif opts.model == 'SGD':
        
        if( outputProb == True ):
            print "Warning! Probability output is not supported in SGD!"
            outputProb = False

        arg = {}
        if( opts.penalty == None ):
            arg['penalty'] = 'l2'
        else:
            arg['penalty'] = opts.penalty
        
        if( opts.loss == None ):
            arg['loss'] = 'hinge'
        else:
            arg['loss'] = opts.loss

        if( opts.alpha == None ):
            arg['alpha'] = 0.0001
        else:
            arg['alpha'] = float(opts.alpha)

        print 'Run Linear-SVM with alpha = %f, penalty = %s, loss = %s' %(arg['alpha'], arg['penalty'], arg['loss'])
        clf = train(SGDClassifier, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename)
        print "acc = %f" %acc

############################################################
##                     Random Forest                      ##
############################################################
    elif opts.model == 'RF':
        arg = {}
        if( opts.n_estimators == None ):
            arg['n_estimators'] = 100
        else:
            arg['n_estimators'] = int(opts.n_estimators)
    
        print 'Run RandomForest with n_estimators = %d' %(arg['n_estimators'])
        clf = train(RandomForestClassifier, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
        print "acc = %f" %acc

############################################################
##                        AdaBoost                        ##
############################################################
    elif opts.model == 'ADABOOST':
        arg = {}

        be_DT        = DecisionTreeClassifier()
        be_SVC       = SVC(probability=True)
        be_SGD_huber = SGDClassifier(loss='modified_huber')
        be_SGD_log   = SGDClassifier(loss='log')

        if( opts.base_estimator == None or opts.base_estimator == 'DT' ):
            be = [ be_DT ]
        elif( opts.base_estimator == 'SVM' ):
            be = [ be_SVC ]
        elif( opts.base_estimator == 'SGD' or opts.base_estimator == 'SGD-HUBER' ):
            be = [ be_SGD_huber ]
        elif( opts.base_estimator == 'SGD-LOG' ):
            be = [ be_SGD_log ]
        else:
            print "Unkinown base estimator %s !" %opts.base_estimator
            traceback.print_stack()
            sys.exit(1)

        if( opts.n_estimators == None ):
            arg['n_estimators'] = 100
        else:
            arg['n_estimators'] = int(opts.n_estimators)
        
        if( opts.learning_rate == None ):
            arg['learning_rate'] = 1.0
        else:
            arg['learning_rate'] = float(opts.learning_rate)
        
        print 'Run AdaBoost with n_estimators = %d, learning_rate = %f' %(arg['n_estimators'], arg['learning_rate'])
        clf = train(AdaBoostClassifier, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
        print "acc = %f" %acc

############################################################
##                    GradientBoost                       ##
############################################################
    elif opts.model == 'GB':
        arg = {}
        if( opts.n_estimators == None ):
            arg['n_estimators'] = 100
        else:
            arg['n_estimators'] = int(opts.n_estimators)
        
        if( opts.learning_rate == None ):
            arg['learning_rate'] = 0.1
        else:
            arg['learning_rate'] = float(opts.learning_rate)
        
        print 'Run GradientBoosting with n_estimators = %d, learning_rate = %f' %(arg['n_estimators'], arg['learning_rate'])
        clf = train(GradientBoostingClassifier, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
        print "acc = %f" %acc

############################################################
##                          KNN                           ##
############################################################
    elif opts.model == 'KNN':
        arg = {}        
        if( opts.n_neighbors == None ):
            arg['n_neighbors'] = 5
        else:
            arg['n_neighbors'] = int(opts.n_neighbors)
        
        if( opts.degree == None ):
            arg['p'] = 2
        else:
            arg['p'] = int(opts.degree)

        if( opts.weights == None ):
            arg['weights'] = 'distance'
        else:
            arg['weights'] = opts.weights

        print 'Run KNN with n_neighbors = %d, weights = %s, power of distance metric = %d' %(arg['n_neighbors'], arg['weights'], arg['p'])
        clf = train(KNeighborsClassifier, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
        print "acc = %f" %acc

############################################################
##                  Logistic Regression                   ##
############################################################
    elif opts.model == 'LR':
        arg = {}
        if( opts.penalty == None ):
            arg['penalty'] = 'l2'
        else:
            arg['penalty'] = opts.penalty
        
        if( opts.C == None ):  # run all C
            arg['C'] = 1.0
        else:
            arg['C'] = float(opts.C)

        if(arg['penalty'] == 'l2'):
            arg['dual'] = True

        print 'Run Logistic Regression with C = %f, penalty = %s' %(arg['C'], arg['penalty'])
        clf = train(LogisticRegression, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
        print "acc = %f" %acc
    
############################################################
##                    Ridge Regression                    ##
############################################################
    elif opts.model == 'RIDGE':
        arg = {}
        if( opts.alpha == None ):
            arg['alpha'] = 1.0
        else:
            arg['alpha'] = float(opts.alpha)

        print 'Run Ridge Regression with alpha = %f' %(arg['alpha'])
        clf = train(RidgeClassifier, arg, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename, opts.prob_filename)
        print "acc = %f" %acc

############################################################
##                 Gaussian Naive Bayes                   ##
############################################################
    elif opts.model == 'GNB':

        print 'Run Gaussian Naive Bayes'
        clf = train(GaussianNB, {}, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename)
        print 'acc = %f' % acc

############################################################
##             Linear Discriminant Analysis               ##
############################################################
    elif opts.model == 'LDA':
        
        print 'Run Linear Discriminant Analysis'
        clf = train(LDA, {}, x_train, y_train, opts.model_filename)
        acc = predict(clf, x_test, y_test, opts.output_filename)
        print 'acc = %f' % acc

    else:
        sys.stderr.write('Error: invalid model %s\n' %opts.model)
        traceback.print_stack()
        sys.exit(1)
Ejemplo n.º 2
0
def main():
    
    description='An integrated sklearn API to run N-fold training and cross validation with multi-thread.Simple example: ./train_valid.py -i INPUT -m svm'
    parser = load_parser(description)
    parser.add_argument('-f' , '--fold'  , dest='fold'  , type=int, default=3, help='Number of fold in cross_validation [default = 3]')
    parser.add_argument('-th', '--thread', dest='thread', type=int, default=8, help='Number of thread to run in parallel [default = 8]')
    parser.add_argument('-log2c'         , dest='log2_C'                     , help='Grid search {begin:end:step} for log2(C)')
    parser.add_argument('-log2g'         , dest='log2_gamma'                 , help='Grid search {begin:end:step} for log2(gamma)')
    parser.add_argument('-log2r'         , dest='log2_coef0'                 , help='Grid search {begin:end:step} for log2(coef0)')
    parser.add_argument('-log2lr'        , dest='log2_lr'                    , help='Grid search {begin:end:step} for log2(learning_rate)')
    parser.add_argument('-log2a'         , dest='log2_alpha'                 , help='Grid search {begin:end:step} for log2(alpha)')
    opts = parser.parse_args(sys.argv[1:])  

    # pre-check options before loading data
    opts.model  = opts.model.upper()
    opts.kernel = opts.kernel.lower()
    check_options(opts) 
    
    # Loading training data
    print "Loading %s ..." %opts.train_filename
    x_train, y_train = load_svmlight_file(opts.train_filename)
    x_train = x_train.todense()
    (N, D) = x_train.shape
    print "training data dimension = (%d, %d)" %(N, D)
    
    # feature normalization
    if( opts.normalized ):      
        if( opts.normalized == 1 ):
            scaler_filename = opts.train_filename + '.scaler-11.pkl'
        elif( opts.normalized == 2 ):
            scaler_filename = opts.train_filename + '.scaler-01.pkl'
        elif( opts.normalized == 3 ):
            scaler_filename = opts.train_filename + '.scaler-std.pkl'
        else:
            print "Error! Unknown normalization method (%d)!" %opts.normalized
            print "Choice: 1 for [-1, 1], 2 for [0, 1], 3 for standard normalization"
            traceback.print_stack()
            sys.exit(1)

        scaler = load_scaler(scaler_filename, x_train, opts.normalized)
        x_train = scaler.transform(x_train)

    
    # dimension grid search
    if( opts.dim == None ):
        dim_list = [D]
    else:
        dim_list = parse_grid(opts.dim, 0, 100)
    
    x_train_all = x_train
    for dim in dim_list:
        if( dim > D ):
            print "Warning! Select dimension (%d) >= max data dimension (%d), use original dimension." %(dim, D)
            dim = D
        else:
            x_train = x_train_all[:, :dim]
            print "Using first %d feature ..." %(dim)


        # Training and Validation

        if opts.model == 'SVM':
            
            # parameter C
            if( opts.C != None ):
                c_list = parse_grid(opts.C, 0, float)
            else:
                if( opts.log2_C != None ):
                    c_list = parse_grid(opts.log2_C, 2) # base = 2
                else:
                    # default = {1, 2, 4, 8, 16, 32, 64, 128}
                    c_list = []
                    for i in range(0, 8):
                        c_list.append( 2**i )
            
            # parameter gamma
            if( opts.gamma != None ):
                gamma_list = parse_grid(opts.gamma, 0, float)
            else:
                if( opts.log2_gamma != None ):
                    gamma_list = parse_grid(opts.log2_gamma, 2) # base = 2
                else:
                    # default = {0.0625, 0.25, 1, 4}
                    gamma_list = []
                    for i in range(-4, 5, 2):
                        gamma_list.append( 2**i )

############################################################
##                        RBF-SVM                         ##
############################################################
            if( opts.kernel == 'rbf' ):
                
                arg_list = list( ParameterGrid( {'kernel': [opts.kernel], 'gamma': gamma_list, 'C': c_list} ) )

                (acc_max, arg_best) = parallel_cross_validation(SVC, 'SVM', arg_list, x_train, y_train, opts.fold, opts.thread)
                
                print "#####################################################################################"
                print "max_acc = %f --- C = %f, gamma = %f" %(acc_max, arg_best['C'], arg_best['gamma'])
                print "#####################################################################################"

############################################################
##                    polynomial-SVM                      ##
############################################################
            elif( opts.kernel == 'poly' ):

                if( opts.coef0 != None ):
                    coef0_list = parse_grid(opts.coef0, 0, float)
                else:
                    if( opts.log2_coef0 != None ):
                        coef0_list = parse_grid(opts.log2_coef0, 2) # base = 2
                    else:
                        # default = {0.0625, 0.25, 1, 4}
                        coef0_list = []
                        for i in range(-4, 5, 2):
                            coef0_list.append( 2**i )
                
                if( opts.degree != None ):
                    degree_list = parse_grid(opts.degree, 0)
                else:
                    # default = {1, 2, 3, 4}
                    degree_list = []
                    for i in range(1, 5):
                        degree_list.append(i)
                
                arg_list = list( ParameterGrid( {'kernel':[opts.kernel], 'degree': degree_list, 'coef0': coef0_list, 'gamma': gamma_list, 'C': c_list} ) )
                
                (acc_max, arg_best) = parallel_cross_validation(SVC, 'SVM', arg_list, x_train, y_train, opts.fold, opts.thread)
                
                print "#####################################################################################"
                print "max_acc = %f --- C = %f, coef0 = %f, gamma = %f, degree = %d" %(acc_max, arg_best['C'], arg_best['coef0'], arg_best['gamma'], arg_best['degree'])
                print "#####################################################################################"
        
############################################################
##                    sigmoid-SVM                         ##
############################################################
            elif( opts.kernel == 'sigmoid' ):
                 
                if( opts.coef0 != None ):
                    coef0_list = parse_grid(opts.coef0, 0, float)
                else:
                    if( opts.log2_coef0 != None ):
                        coef0_list = parse_grid(opts.log2_coef0, 2) # base = 2
                    else:
                        # default = {0.0625, 0.25, 1, 4}
                        coef0_list = []
                        for i in range(-4, 5, 2):
                            coef0_list.append( 2**i )
                
                arg_list = list( ParameterGrid( {'kernel': [opts.kernel], 'coef0': coef0_list, 'gamma': gamma_list, 'C': c_list } ) )
                
                (acc_max, arg_best) = parallel_cross_validation(SVC, 'SVM', arg_list, x_train, y_train, opts.fold, opts.thread)
                    
                print "#####################################################################################"
                print "max_acc = %f --- C = %f, coef0 = %f, gamma = %f" %(acc_max, arg_best['C'], arg_best['coef0'], arg_best['gamma'])
                print "#####################################################################################"
        
            else:
                print "Error! Unknown kernel %s!" %opts.kernel
                traceback.print_stack()
                sys.exit(1)

############################################################
##                     linear-SVM                         ##
############################################################
        elif opts.model == 'LINEARSVM':
            
            penalty_list = []
            if( opts.penalty == None ):
                penalty_list.append('l2')
                penalty_list.append('l1')
            else:
                penalty_list.append( opts.penalty )
            
            loss_list = []
            if( opts.loss == None ):
                loss_list.append('l2')
                loss_list.append('l1')
            else:
                loss_list.append( opts.loss )

            # parameter C
            if( opts.C != None ):
                c_list = parse_grid(opts.C, 0, float)
            else:
                if( opts.log2_C != None ):
                    c_list = parse_grid(opts.log2_C, 2) # base = 2
                else:
                    # default = {1, 2, 4, 8, 16, 32, 64, 128}
                    c_list = []
                    for i in range(0, 8):
                        c_list.append( 2**i )
            
            arg_list_pre = list( ParameterGrid( {'penalty': penalty_list, 'loss': loss_list, 'C': c_list} ) )

            arg_list = []
            for arg in arg_list_pre:
                if( arg['penalty'] == 'l1' and arg['loss'] == 'l1' ):
                    # not support
                    continue

                if( arg['penalty'] == 'l1' ):
                    arg['dual'] = False

                arg_list.append(arg)

            (acc_max, arg_best) = parallel_cross_validation(LinearSVC, 'Linear-SVM', arg_list, x_train, y_train, opts.fold, opts.thread)
            
            print "#####################################################################################"
            print "max_acc = %f --- C = %f, penalty = %s, loss = %s" %(acc_max, arg_best['C'], arg_best['penalty'], arg_best['loss'])
            print "#####################################################################################"
    
############################################################
##                Linear model with SGD                   ##
############################################################
        elif opts.model == 'SGD':

            if( opts.alpha != None ):
                alpha_list = parse_grid(opts.alpha, 0, float)
            else:
                if( opts.log2_alpha != None ):
                    alpha_list = parse_grid(opts.log2_alpha, 2) # base = 2
                else:
                    # default = {0.031325, 0.0625, 0.125, 0.25, 0.5, 1, 2, 4}
                    alpha_list = []
                    for i in range(-5, 3):
                        alpha_list.append( 2**i )
            
            loss_list = []
            if( opts.loss == None ):
                loss_list.append('hinge')
                loss_list.append('log')
                loss_list.append('modified_huber')
                loss_list.append('squared_hinge')
                loss_list.append('perceptron')
                loss_list.append('squared_loss')
                loss_list.append('huber')
                loss_list.append('epsilon_insensitive')
                loss_list.append('squared_epsilon_insensitive')
            else:
                loss_list.append(opts.loss)
            
            penalty_list = []
            if( opts.penalty == None ):
                penalty_list.append('l2')
                penalty_list.append('l1')
                penalty_list.append('elasticnet')
            else:
                penalty_list.append(opts.penalty)

            
            arg_list = list( ParameterGrid( {'alpha': alpha_list, 'loss':loss_list, 'penalty':penalty_list} ) )
            (acc_max, arg_best) = parallel_cross_validation(SGDClassifier, 'Linear-SGD', arg_list, x_train, y_train, opts.fold, opts.thread)
            
            print "#####################################################################################"
            print "max_acc = %f --- alpha = %f, loss = %s, penalty = %s" %(acc_max, arg_best['alpha'], arg_best['loss'], arg_best['penalty'])
            print "#####################################################################################"

############################################################
##                     Random Forest                      ##
############################################################
        elif opts.model == 'RF':
            if( opts.n_estimators != None ):
                ne_list = parse_grid(opts.n_estimators, 0)
            else:
                # default = {50, 100, 150, 200, 250, 300}
                ne_list = []
                for i in range(5, 31, 5):
                    ne_list.append( 10*i )
            
            arg_list = list( ParameterGrid( {'n_estimators': ne_list} ) )
            (acc_max, arg_best) = parallel_cross_validation(RandomForestClassifier, 'Random Forest', arg_list, x_train, y_train, opts.fold, opts.thread)

            print "#####################################################################################"
            print "max_acc = %f --- n_estimators = %d" %(acc_max, arg_best['n_estimators'])
            print "#####################################################################################"


############################################################
##                        AdaBoost                        ##
############################################################
        elif opts.model == 'ADABOOST':
            be_DT        = DecisionTreeClassifier()
            be_SVC       = SVC(probability=True)
            be_SGD_huber = SGDClassifier(loss='modified_huber')
            be_SGD_log   = SGDClassifier(loss='log')

            if( opts.base_estimator == None ):
                be = [ be_DT, be_SVC, be_SGD_huber, be_SGD_log ] 
            elif( opts.base_estimator == 'DT' ):
                be = [ be_DT ]
            elif( opts.base_estimator == 'SVM' ):
                be = [ be_SVC ]
            elif( opts.base_estimator == 'SGD' ):
                be = [ be_SGD_huber , be_SGD_log ]
            elif( opts.base_estimator == 'SGD-HUBER' ):
                be = [ be_SGD_huber ]
            elif( opts.base_estimator == 'SGD-LOG' ):
                be = [ be_SGD_log ]
            else:
                print "Unkinown base estimator %s !" %opts.base_estimator
                traceback.print_stack()
                sys.exit(1)
            
            if( opts.n_estimators != None ):
                ne_list = parse_grid(opts.n_estimators, 0)
            else:
                # default = {50, 100, 150, 200, 250, 300}
                ne_list = []
                for i in range(5, 31, 5):
                    ne_list.append( 10*i )
        
            if( opts.learning_rate != None ):
                lr_list = parse_grid(opts.learning_rate, 0, float)
            else:
                if( opts.log2_lr != None ):
                    lr_list = parse_grid(opts.log2_lr, 2)
                else:
                    # default = {0.25, 0.5, 1, 2}
                    lr_list = []
                    for i in range(-2, 3):
                        lr_list.append( 2**i )
            
            arg_list = list( ParameterGrid( {'base_estimator': be, 'n_estimators': ne_list, 'learning_rate': lr_list} ) )
            (acc_max, arg_best) = parallel_cross_validation(AdaBoostClassifier, 'AdaBoost', arg_list, x_train, y_train, opts.fold, opts.thread)
            
            print "#####################################################################################"
            print "max_acc = %f --- base_estimator = %s, n_estimators = %d, learning_rate = %f" %(acc_max, arg_best['base_estimator'], arg_best['n_estimators'], arg_best['learning_rate'])
            print "#####################################################################################"


############################################################
##                    GradientBoost                       ##
############################################################
        elif opts.model == 'GB':

            if( opts.n_estimators != None ):
                ne_list = parse_grid(opts.n_estimators, 0)
            else:
                # default = {50, 100, 150, 200, 250, 300}
                ne_list = []
                for i in range(5, 31, 5):
                    ne_list.append( 10*i )
        
            if( opts.learning_rate != None ):
                lr_list = parse_grid(opts.learning_rate, 0, float)
            else:
                if( opts.log2_lr != None ):
                    lr_list = parse_grid(opts.log2_lr, 2)
                else:
                    # default = {0.25, 0.5, 1, 2}
                    lr_list = []
                    for i in range(-2, 3):
                        lr_list.append( 2**i )

            arg_list = list( ParameterGrid( {'n_estimators': ne_list, 'learning_rate': lr_list} ) )
            (acc_max, arg_best) = parallel_cross_validation(GradientBoostingClassifier, 'GradientBoosting', arg_list, x_train, y_train, opts.fold, opts.thread)

            print "#####################################################################################"
            print "max_acc = %f --- n_estimators = %d, learning_rate = %f" %(acc_max, arg_best['n_estimators'], arg_best['learning_rate'])
            print "#####################################################################################"


############################################################
##                          KNN                           ##
############################################################
        elif opts.model == 'KNN':
            
            if( opts.n_neighbors != None ):
                nn_list = parse_grid(opts.n_neighbors, 0)
            else:
                # default = {5, 10, 15, 20, 25}
                nn_list = []
                for i in range(5):
                    nn_list.append(5 + 10 * i)
            
            p_list = []
            if( opts.degree == None ):
                p_list.append(1)
                p_list.append(2)
            else:
                p_list.append( opts.degree )

            weight_list = []
            if( opts.weights == None ):
                weight_list.append('distance')
                weight_list.append('uniform')
            else:
                weight_list.append( opts.weights ) 

            arg_list = list( ParameterGrid( {'n_neighbors': nn_list, 'p': p_list, 'weights': weight_list} ) )
            
            (acc_max, arg_best) = parallel_cross_validation(KNeighborsClassifier, 'KNN', arg_list, x_train, y_train, opts.fold, opts.thread)

            print "#####################################################################################"
            print "max_acc = %f --- n_neighbors = %d, weights = %s, p = %d" %(acc_max, arg_best['n_neighbors'], arg_best['weights'], arg_best['p'])
            print "#####################################################################################"


############################################################
##                  Logistic Regression                   ##
############################################################
        elif opts.model == 'LR':

            penalty_list = []
            if( opts.penalty == None ):
                penalty_list.append('l2')
                penalty_list.append('l1')
            else:
                penalty_list.append(opts.penalty)
            
            if( opts.C != None ):
                c_list = parse_grid(opts.C, 0, float)
            else:
                if( opts.log2_C != None ):
                    c_list = parse_grid(opts.log2_C, 2) # base = 2
                else:
                    # default = {1, 2, 4, 8, 16, 32, 64, 128}
                    c_list = []
                    for i in range(0, 8):
                        c_list.append( 2**i )

            arg_list_pre = list( ParameterGrid( {'penalty': penalty_list, 'C': c_list} ) )
        
            arg_list = []
            for arg in arg_list_pre:
                if(arg['penalty'] == 'l2'):
                    arg['dual'] = True
                arg_list.append(arg)

            (acc_max, arg_best) = parallel_cross_validation(LogisticRegression, 'Logistic Regression', arg_list, x_train, y_train, opts.fold, opts.thread)

            print "#####################################################################################"
            print "max_acc = %f --- C = %f, penalty = %s" %(acc_max, arg_best['C'], arg_best['penalty'])
            print "#####################################################################################"

############################################################
##                    Ridge Regression                    ##
############################################################
        elif opts.model == 'RIDGE':

            if( opts.alpha != None ):
                alpha_list = parse_grid(opts.alpha, 0, float)
            else:
                if( opts.log2_alpha != None ):
                    alpha_list = parse_grid(opts.log2_alpha, 2) # base = 2
                else:
                    # default = {0.031325, 0.0625, 0.125, 0.25, 0.5, 1, 2, 4}
                    alpha_list = []
                    for i in range(-5, 3):
                        alpha_list.append( 2**i )

            arg_list = list( ParameterGrid( {'alpha': alpha_list} ) )
        
            (acc_max, arg_best) = parallel_cross_validation(RidgeClassifier, 'Ridge', arg_list, x_train, y_train, opts.fold, opts.thread)

            print "#####################################################################################"
            print "max_acc = %f --- alpha = %f" %(acc_max, arg_best['alpha'])
            print "#####################################################################################"


############################################################
##                 Gaussian Naive Bayes                   ##
############################################################
        elif opts.model == 'GNB':

            print 'Run Gaussian Naive Bayes (%d-fold CV)' %(opts.fold)
            (acc, arg) = cross_validation( (GaussianNB, 'GNB', {}, x_train, y_train, opts.fold) )

            print "#####################################################################################"
            print 'max acc = %f' % acc
            print "#####################################################################################"
        

############################################################
##             Linear Discriminant Analysis               ##
############################################################
        elif opts.model == 'LDA':
            
            print 'Run Linear Discriminant Analysis (%d-fold CV)' %(opts.fold)
            (acc, arg) = cross_validation( (LDA, 'LNA', {}, x_train, y_train, opts.fold) )

            print "#####################################################################################"
            print "max_acc = %f " %(acc)
            print "#####################################################################################"

        else:
            sys.stderr.write('Error: invalid model %s\n' %opts.model)
            traceback.print_stack()
            sys.exit(1)