Exemple #1
0
def DTree(file1):
    # Read Data
    data = Data()
    data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data

    InputData(data_train, data_test, output_test, name='Initial Test Data')

    spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0)
    classifier = DecisionTreeClassifier()
    classifier_name = DecisionTreeClassifier.__name__
    name = "DTree"
    params_grid = [{
        'criterion': ["gini", "entropy"],
        'max_leaf_nodes': [30, 50, 70, 100]
    }]

    classifier = CrossValidationWithGridSearch(spl, classifier, data_train,
                                               output_train,
                                               groups_at_training, params_grid,
                                               classifier_name, file1)
    pred_data_test, pred_data_train = model_training(classifier, data_train,
                                                     output_train, data_test,
                                                     output_test)

    Results(data_train, output_train, pred_data_train, output_test,
            pred_data_test, activities_labels, data_test, classifier, file1,
            name)
Exemple #2
0
def RandomForest(file1):
    # Read Data
    data = Data()
    data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data

    InputData(data_train, data_test, output_test, name='Initial Test Data')

    spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0)
    classifier = RandomForestClassifier(n_estimators=100,
                                        criterion='gini',
                                        max_depth=70,
                                        random_state=50)
    fig, ax = plt.subplots()
    name = "Random Forest"
    alg_name = RandomForestClassifier.__name__
    CrossValidationWithoutGridSearch(spl,
                                     classifier,
                                     data_train,
                                     output_train,
                                     groups_at_training,
                                     alg_name,
                                     file1,
                                     ax,
                                     n_splits=6,
                                     lw=10)

    pred_data_test, pred_data_train = model_training(classifier, data_train,
                                                     output_train, data_test,
                                                     output_test)

    Results(data_train, output_train, pred_data_train, output_test,
            pred_data_test, activities_labels, data_test, classifier, file1,
            name)
def LR_model(file1):

    # Read Data
    data = Data()
    data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data

    InputData(data_train, data_test, output_test, name='Initial Test Data')

    spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0)
    #spl = GroupShuffleSplit(n_splits=4,train_size=.6, test_size=.4, random_state=42)
    classifier = LogisticRegression(tol=1e-3, penalty='l2')
    classifier_name = LogisticRegression.__name__

    name = "LR"
    params_grid = [{
        'solver': ['newton-cg'],
        'C': [0.5, 1, 10, 100, 1000],
        'max_iter': [10000, 100000]
    }, {
        'solver': ['lbfgs'],
        'C': [0.5, 1, 10, 100, 1000],
        'max_iter': [10000, 100000]
    }, {
        'solver': ['liblinear'],
        'C': [0.5, 1, 10, 100, 1000],
        'max_iter': [10000, 100000]
    }, {
        'solver': ['sag'],
        'C': [0.5, 1, 10, 100, 1000],
        'max_iter': [10000, 100000]
    }, {
        'solver': ['saga'],
        'C': [0.5, 1, 10, 100, 1000],
        'max_iter': [10000, 100000]
    }]

    classifier = CrossValidationWithGridSearch(spl, classifier, data_train,
                                               output_train,
                                               groups_at_training, params_grid,
                                               classifier_name, file1)
    pred_data_test, pred_data_train = model_training(classifier, data_train,
                                                     output_train, data_test,
                                                     output_test)

    Results(data_train, output_train, pred_data_train, output_test,
            pred_data_test, activities_labels, data_test, classifier, file1,
            name)
def LR_model(file1):

    # Read Data
    data = Data()
    data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data

    InputData(data_train, data_test, output_test, name='Initial Test Data')

    spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0)
    #spl = GroupShuffleSplit(n_splits=10,train_size=.6, test_size=.4, random_state=42)
    classifier = SVC(kernel='rbf', gamma=0.01, C=100, degree=3)
    classifier_name = SVC.__name__

    name = "SVC"
    params_grid = [{
        'kernel': ['rbf'],
        'gamma': [1e-2, 1e-1, 1, 2],
        'C': [0.1, 1, 10, 50, 100, 150]
    }, {
        'kernel': ['linear'],
        'C': [0.1, 1, 10, 50, 100, 150]
    }, {
        'kernel': ['poly'],
        'gamma': [1e-2, 1e-1, 1, 2],
        'degree': [2, 3, 4, 5],
        'C': [0.1, 1, 10, 50, 100, 150]
    }, {
        'kernel': ['sigmoid'],
        'gamma': [1e-2, 1e-1, 1, 2],
        'C': [0.1, 1, 10, 50, 100, 150]
    }]

    #classifier = CrossValidationWithGridSearch(spl,classifier,data_train,output_train,groups_at_training,params_grid,classifier_name,file1)
    pred_data_test, pred_data_train = model_training(classifier, data_train,
                                                     output_train, data_test,
                                                     output_test,
                                                     groups_at_training)

    Results(data_train, output_train, pred_data_train, output_test,
            pred_data_test, activities_labels, data_test, classifier, file1,
            name)
def LDA_model(file1):

    # Read Data
    data = Data()
    data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data
    num_training = cound_groups(groups_at_training)
    num_test = cound_groups(groups_at_testing)
    print(num_training, num_test)

    PlotTheInputData(output_train, activities_labels, "Train Data")
    PlotTheInputData(output_test, activities_labels, 'Test Data')

    InputData(data_train, data_test, output_test, name='Initial Test Data')

    #lpl = LeavePGroupsOut(n_groups=2)
    spl = GroupShuffleSplit(n_splits=6, test_size=0.3, random_state=0)
    classifier = LinearDiscriminantAnalysis()
    fig, ax = plt.subplots()
    name = "LDA"
    alg_name = LinearDiscriminantAnalysis.__name__
    CrossValidationWithoutGridSearch(spl,
                                     classifier,
                                     data_train,
                                     output_train,
                                     groups_at_training,
                                     alg_name,
                                     file1,
                                     ax,
                                     n_splits=6,
                                     lw=10)

    pred_data_test, pred_data_train = model_training(classifier, data_train,
                                                     output_train, data_test,
                                                     output_test)

    Results(data_train, output_train, pred_data_train, output_test,
            pred_data_test, activities_labels, data_test, classifier, file1,
            name)
# Compare Algorithms
import pandas
import numpy as np
import matplotlib.pyplot as plt
from Data.Select_From_model import Data
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from UsefullFunctions import Add_Noise
from sklearn.preprocessing import MinMaxScaler
data = Data()
data_train, data_test, output_train, output_test, unscaled_data_test, activities_labels, feature_labels, groups_at_training, groups_at_testing = data

# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = []
models.append(
    ('LR', LogisticRegression(C=10, solver='liblinear', max_iter=10000)))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('RF',
               RandomForestClassifier(n_estimators=1000,
                                      criterion='gini',
                                      max_depth=20,
                                      random_state=50)))
models.append(
    ('DTree', DecisionTreeClassifier(criterion='entropy', max_leaf_nodes=70)))