def DTree(file1): # Read Data data = Data() data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data InputData(data_train, data_test, output_test, name='Initial Test Data') spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0) classifier = DecisionTreeClassifier() classifier_name = DecisionTreeClassifier.__name__ name = "DTree" params_grid = [{ 'criterion': ["gini", "entropy"], 'max_leaf_nodes': [30, 50, 70, 100] }] classifier = CrossValidationWithGridSearch(spl, classifier, data_train, output_train, groups_at_training, params_grid, classifier_name, file1) pred_data_test, pred_data_train = model_training(classifier, data_train, output_train, data_test, output_test) Results(data_train, output_train, pred_data_train, output_test, pred_data_test, activities_labels, data_test, classifier, file1, name)
def RandomForest(file1): # Read Data data = Data() data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data InputData(data_train, data_test, output_test, name='Initial Test Data') spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0) classifier = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=70, random_state=50) fig, ax = plt.subplots() name = "Random Forest" alg_name = RandomForestClassifier.__name__ CrossValidationWithoutGridSearch(spl, classifier, data_train, output_train, groups_at_training, alg_name, file1, ax, n_splits=6, lw=10) pred_data_test, pred_data_train = model_training(classifier, data_train, output_train, data_test, output_test) Results(data_train, output_train, pred_data_train, output_test, pred_data_test, activities_labels, data_test, classifier, file1, name)
def LR_model(file1): # Read Data data = Data() data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data InputData(data_train, data_test, output_test, name='Initial Test Data') spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0) #spl = GroupShuffleSplit(n_splits=4,train_size=.6, test_size=.4, random_state=42) classifier = LogisticRegression(tol=1e-3, penalty='l2') classifier_name = LogisticRegression.__name__ name = "LR" params_grid = [{ 'solver': ['newton-cg'], 'C': [0.5, 1, 10, 100, 1000], 'max_iter': [10000, 100000] }, { 'solver': ['lbfgs'], 'C': [0.5, 1, 10, 100, 1000], 'max_iter': [10000, 100000] }, { 'solver': ['liblinear'], 'C': [0.5, 1, 10, 100, 1000], 'max_iter': [10000, 100000] }, { 'solver': ['sag'], 'C': [0.5, 1, 10, 100, 1000], 'max_iter': [10000, 100000] }, { 'solver': ['saga'], 'C': [0.5, 1, 10, 100, 1000], 'max_iter': [10000, 100000] }] classifier = CrossValidationWithGridSearch(spl, classifier, data_train, output_train, groups_at_training, params_grid, classifier_name, file1) pred_data_test, pred_data_train = model_training(classifier, data_train, output_train, data_test, output_test) Results(data_train, output_train, pred_data_train, output_test, pred_data_test, activities_labels, data_test, classifier, file1, name)
def LR_model(file1): # Read Data data = Data() data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data InputData(data_train, data_test, output_test, name='Initial Test Data') spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0) #spl = GroupShuffleSplit(n_splits=10,train_size=.6, test_size=.4, random_state=42) classifier = SVC(kernel='rbf', gamma=0.01, C=100, degree=3) classifier_name = SVC.__name__ name = "SVC" params_grid = [{ 'kernel': ['rbf'], 'gamma': [1e-2, 1e-1, 1, 2], 'C': [0.1, 1, 10, 50, 100, 150] }, { 'kernel': ['linear'], 'C': [0.1, 1, 10, 50, 100, 150] }, { 'kernel': ['poly'], 'gamma': [1e-2, 1e-1, 1, 2], 'degree': [2, 3, 4, 5], 'C': [0.1, 1, 10, 50, 100, 150] }, { 'kernel': ['sigmoid'], 'gamma': [1e-2, 1e-1, 1, 2], 'C': [0.1, 1, 10, 50, 100, 150] }] #classifier = CrossValidationWithGridSearch(spl,classifier,data_train,output_train,groups_at_training,params_grid,classifier_name,file1) pred_data_test, pred_data_train = model_training(classifier, data_train, output_train, data_test, output_test, groups_at_training) Results(data_train, output_train, pred_data_train, output_test, pred_data_test, activities_labels, data_test, classifier, file1, name)
def LDA_model(file1): # Read Data data = Data() data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data num_training = cound_groups(groups_at_training) num_test = cound_groups(groups_at_testing) print(num_training, num_test) PlotTheInputData(output_train, activities_labels, "Train Data") PlotTheInputData(output_test, activities_labels, 'Test Data') InputData(data_train, data_test, output_test, name='Initial Test Data') #lpl = LeavePGroupsOut(n_groups=2) spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0) classifier = LinearDiscriminantAnalysis() fig, ax = plt.subplots() name = "LDA" alg_name = LinearDiscriminantAnalysis.__name__ CrossValidationWithoutGridSearch(spl, classifier, data_train, output_train, groups_at_training, alg_name, file1, ax, n_splits=6, lw=10) pred_data_test, pred_data_train = model_training(classifier, data_train, output_train, data_test, output_test) Results(data_train, output_train, pred_data_train, output_test, pred_data_test, activities_labels, data_test, classifier, file1, name)
# Compare Algorithms import pandas import numpy as np import matplotlib.pyplot as plt from Data.Select_From_model import Data from sklearn import model_selection from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from UsefullFunctions import Add_Noise from sklearn.preprocessing import MinMaxScaler data = Data() data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data # prepare configuration for cross validation test harness seed = 7 # prepare models models = [] models.append( ('LR', LogisticRegression(C=10, solver='liblinear', max_iter=10000))) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('RF', RandomForestClassifier(n_estimators=1000, criterion='gini', max_depth=20, random_state=50))) models.append( ('DTree', DecisionTreeClassifier(criterion='entropy', max_leaf_nodes=70)))