target = data['TripType']
del data['TripType']

X, y = data, target

ch2 = SelectKBest(chi2, k=150)
X = ch2.fit_transform(X, y)

# Splitting the data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)

kf = KFold(n=70686, n_folds=10, shuffle=False, random_state=42)

count = 0

total_roc=0
for train_index, test_index in kf:
   

    X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index]
    print("Calling get Roc. C Value::\t" + str(C))
    result_ROC =  getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "LogisticRegression", Cvalue=10000)

    roc_Micro_score = result_ROC[0]
    print("ROC on Validation set ",roc_Micro_score)
    total_roc+=roc_Micro_score
print("Average ROC Score is (Validation ROC) : ",total_roc/10)
result_ROC =getROCScore(X_train, y_train, X_test, y_test, "LogisticRegression", Cvalue=10000)
print("ROC score on test set is ",result_ROC[0])
ch2 = SelectKBest(chi2, k=130)
X = ch2.fit_transform(X, y)

# Splitting the data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)

kf = KFold(n=70686, n_folds=10, shuffle=False, random_state=42)
i=0
count = 0
max_score = 0
best_model = -1
best_CValue = 0
C_Value=[0.0001,0.001, 0.01, 0.1, 1, 10, 100, 1000,10000,100000]
total_roc=0
for train_index, test_index in kf:
    count +=1
    C = C_Value[i]
    i+=1

    X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index]
    
    result_ROC =  getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "LinearSVC", Cvalue=100000)

    roc_Micro_score = result_ROC[0]
    print("ROC on Validation set ",roc_Micro_score)
    total_roc+=roc_Micro_score
print("Average ROC Score is (Validation ROC) : ",total_roc/10)
result_ROC =getROCScore(X_train, y_train, X_test, y_test, "LinearSVC", Cvalue=100000)
print("ROC score on test set is ",result_ROC[0])
    print("Feature Count::\t" +str(X_features))
    X_features=150
    ch2 = SelectKBest(chi2, k=X_features)
    X = ch2.fit_transform(X, y)

    # Splitting the data to train and test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)

    # Decision Tree

    print("Decision Tree model")
    # d = clf.fit(X_train, y_train)
    print("starting predication")
    # Y_predicted = d.predict(X_test)
    print("Decision Tree Accuracy is")
    result_ROC = getROCScore(X_train, y_train, X_test, y_test, "DecisionTree", depth=10)
    print result_ROC[0]
    print("\n")

    #Linear Regression
    # regr=OneVsRestClassifier(linear_model.LogisticRegression(C=10000))
    print("Logistic Regression model")
    # d = regr.fit(X_train, y_train)
    print("starting predication")
    # Y_predicted = d.predict(X_test)

    print("Logistic Regression model Accuracy is")
    result_ROC = getROCScore(X_train, y_train, X_test, y_test, "LogisticRegression", Cvalue=10000)
    print result_ROC[0]
    print("\n")
ch2 = SelectKBest(chi2, k=150)
X = ch2.fit_transform(X, y)

# Splitting the data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)

kf = KFold(n=len(X_train), n_folds=10, shuffle=False, random_state=42)

count = 0
max_score = 0
best_model = -1
best_alphaValue = 0
alpha_Value=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 1]
i=0
total_roc=0
for train_index, test_index in kf:
    count +=1
    alpha = alpha_Value[i]
    i+=1

    X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index]
    
    result_ROC =  getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "NaiveBayes", alphaValue=0.6)

    roc_Micro_score = result_ROC[0]
    print("ROC on Validation set ",roc_Micro_score)
    total_roc+=roc_Micro_score
print("Average ROC Score is (Validation ROC) : ",total_roc/10)
result_ROC =getROCScore(X_train, y_train, X_test, y_test, "NaiveBayes", alphaValue=0.6)
print("ROC score on test set is ",result_ROC[0])
  
X, y = data, target

ch2 = SelectKBest(chi2, k='all')
X = ch2.fit_transform(X, y)

# Splitting the data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)

kf = KFold(n=70686, n_folds=10, shuffle=False, random_state=42)
i=0
count = 0

total_roc=0
for train_index, test_index in kf:
    count +=1
   

    X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index]
    
    result_ROC =getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "DecisionTree")

    roc_Micro_score = result_ROC[0]
    
    print("ROC on Validation set ",roc_Micro_score)
    total_roc+=roc_Micro_score
print("Average ROC Score is (Validation ROC) : ",total_roc/10)
result_ROC =getROCScore(X_train, y_train, X_test, y_test, "DecisionTree")
print("ROC score on test set is ",result_ROC[0])