def main(): df = pd.read_csv('wine.csv') #pp.dfExplore(df) #exploration function for paper analysis pp.clean(df) #Normalizes data #df.drop('resid_sugar', axis = 1, inplace=True) #df.drop('free_sulf_d', axis = 1, inplace=True) #df.drop('citric_acid', axis = 1, inplace=True) #df.drop('density', axis = 1, inplace=True) #df.drop('fx_acidity', axis = 1, inplace=True) x = createX(df) y = createY(df) #Feature selection temp = list(df) print(temp) fs.recursiveFeatureElimination(x, y) fs.featureImportance(x, y) #classifiers results = clf.decisionTree(x, y) confMatrixOutput('Decision Tree', results) results = clf.supportVectorMachine(x, y) confMatrixOutput('Support Vector Machine', results) results = clf.ANN(x, y) confMatrixOutput('Artificial Neural Network', results) results = clf.randomForest(x, y) confMatrixOutput('Random Forest', results) results = clf.rulesBased(x, y) confMatrixOutput('Rules Based Classifier', results) pp.bayes(df) # discretizes all the data to low, medium, and high. x = createX(df) y = createY(df) results = clf.bayesClassifier(x, y) confMatrixOutput('Naive Bayesian Classifier', results)
def experiment6_1(train, test, f): over_sampled_train = SMOTEOverSampling(train) keep = decisionTreeFSelect(over_sampled_train) keep = f(over_sampled_train[keep]) train = Standardization(over_sampled_train[keep]) test = Standardization(test[keep]) return randomForest(train, test)
def random_forest_depth_exp_SM_LV_ST_RF(train, test, max_depth): over_sampled_train = SMOTEOverSampling(train) keep = lowVarianceElimination(over_sampled_train, 0.8) train = Standardization(over_sampled_train[keep]) test = Standardization(test[keep]) return randomForest(train, test, max_depth=max_depth)
def experiment17(train, test, f): keep = f(train) train = Standardization(train[keep]) test = Standardization(test[keep]) return randomForest(train, test)
def experiment18_1(train, test, f): keep = decisionTreeFSelect(train) keep = f(train[keep]) train = Standardization(train[keep]) test = Standardization(test[keep]) return randomForest(train, test)
def experiment18(train, test, f): keep = univariateFSelect(train) keep = f(train[keep]) train = Standardization(train[keep]) test = Standardization(test[keep]) return randomForest(train, test)
def experiment5(train, test, f): over_sampled_train = SMOTEOverSampling(train) keep = f(over_sampled_train) train = Standardization(over_sampled_train[keep]) test = Standardization(test[keep]) return randomForest(train, test)
def experiment12_1(train, test, f): over_sampled_train = SMOTEOverSampling(train) keep = decisionTreeFSelect(over_sampled_train) keep = f(over_sampled_train[keep]) return randomForest(over_sampled_train[keep], test[keep])
def experiment12(train, test, f): over_sampled_train = SMOTEOverSampling(train) keep = univariateFSelect(over_sampled_train) keep = f(over_sampled_train[keep]) return randomForest(over_sampled_train[keep], test[keep])
def experiment11(train, test, f): over_sampled_train = SMOTEOverSampling(train) keep = f(over_sampled_train) return randomForest(over_sampled_train[keep], test[keep])