Example #1
0
def test_adult_tree():
    import os

    import numpy as np
    import pykitml as pk
    from pykitml.datasets import adult

    # Download the dataset
    if (not os.path.exists('adult.data.pkl')): adult.get()

    # Load adult data set
    inputs_train, outputs_train, inputs_test, outputs_test = adult.load()
    outputs_train = pk.onehot(outputs_train)
    outputs_test = pk.onehot(outputs_test)

    # Create model
    ftypes = [
        'continues', 'categorical', 'continues', 'categorical', 'categorical',
        'categorical', 'categorical', 'categorical', 'categorical',
        'continues', 'continues', 'continues', 'categorical'
    ]
    tree_adult_classifier = pk.DecisionTree(13,
                                            2,
                                            max_depth=100,
                                            min_split=100,
                                            feature_type=ftypes)

    # Train
    tree_adult_classifier.train(inputs_train, outputs_train)

    # Save it
    pk.save(tree_adult_classifier, 'tree_adult_classifier.pkl')

    # Print accuracy
    accuracy = tree_adult_classifier.accuracy(inputs_train, outputs_train)
    print('Train accuracy:', accuracy)
    accuracy = tree_adult_classifier.accuracy(inputs_test, outputs_test)
    print('Test accuracy:', accuracy)

    # Plot confusion matrix
    tree_adult_classifier.confusion_matrix(inputs_test,
                                           outputs_test,
                                           gnames=['False', 'True'])

    # Assert accuracy
    assert (tree_adult_classifier.accuracy(inputs_test, outputs_test)) >= 84
def test_banknote_forest():
    import os

    import numpy as np
    import pykitml as pk
    from pykitml.datasets import banknote

    # Download the dataset
    if (not os.path.exists('banknote.pkl')): banknote.get()

    # Load heart data set
    inputs_train, outputs_train, inputs_test, outputs_test = banknote.load()

    # Change 0/False to [1, 0]
    # Change 1/True to [0, 1]
    outputs_train = pk.onehot(outputs_train)
    outputs_test = pk.onehot(outputs_test)

    # Create model
    ftypes = ['continues'] * 4
    forest_banknote_classifier = pk.RandomForest(4,
                                                 2,
                                                 max_depth=9,
                                                 feature_type=ftypes)

    # Train
    forest_banknote_classifier.train(inputs_train, outputs_train)

    # Save it
    pk.save(forest_banknote_classifier, 'forest_banknote_classifier.pkl')

    # Print accuracy
    accuracy = forest_banknote_classifier.accuracy(inputs_train, outputs_train)
    print('Train accuracy:', accuracy)
    accuracy = forest_banknote_classifier.accuracy(inputs_test, outputs_test)
    print('Test accuracy:', accuracy)

    # Plot confusion matrix
    forest_banknote_classifier.confusion_matrix(inputs_test,
                                                outputs_test,
                                                gnames=['False', 'True'])

    # Assert accuracy
    assert (forest_banknote_classifier.accuracy(inputs_test,
                                                outputs_test)) >= 98
Example #3
0
def test_sonar_forest():
    import os

    import numpy as np
    import pykitml as pk
    from pykitml.datasets import sonar

    # Download the dataset
    if (not os.path.exists('sonar.pkl')): sonar.get()

    # Load the sonar dataset
    inputs_train, outputs_train, inputs_test, outputs_test = sonar.load()
    outputs_train = pk.onehot(outputs_train)
    outputs_test = pk.onehot(outputs_test)

    # Create model
    forest_sonar_classifier = pk.RandomForest(60,
                                              2,
                                              max_depth=9,
                                              feature_type=['continues'] * 60)

    # Train the model
    forest_sonar_classifier.train(inputs_train,
                                  outputs_train,
                                  num_feature_bag=60)

    # Save it
    pk.save(forest_sonar_classifier, 'forest_sonar_classifier.pkl')

    # Print accuracy
    accuracy = forest_sonar_classifier.accuracy(inputs_train, outputs_train)
    print('Train accuracy:', accuracy)
    accuracy = forest_sonar_classifier.accuracy(inputs_test, outputs_test)
    print('Test accuracy:', accuracy)

    # Plot confusion matrix
    forest_sonar_classifier.confusion_matrix(inputs_test,
                                             outputs_test,
                                             gnames=['False', 'True'])
Example #4
0
def test_heart_tree():
    import os.path

    import numpy as np
    import pykitml as pk
    from pykitml.datasets import heartdisease

    # Download the dataset
    if (not os.path.exists('heartdisease.pkl')): heartdisease.get()

    # Load heart data set
    inputs, outputs = heartdisease.load()
    outputs = pk.onehot(outputs)

    # Create model
    ftypes = [
        'continues', 'categorical', 'categorical', 'continues', 'continues',
        'categorical', 'categorical', 'continues', 'categorical', 'continues',
        'categorical', 'categorical', 'categorical'
    ]
    tree_heart_classifier = pk.DecisionTree(13,
                                            2,
                                            max_depth=7,
                                            feature_type=ftypes)

    # Train
    tree_heart_classifier.train(inputs, outputs)

    # Save it
    pk.save(tree_heart_classifier, 'tree_heart_classifier.pkl')

    # Print accuracy
    accuracy = tree_heart_classifier.accuracy(inputs, outputs)
    print('Accuracy:', accuracy)

    # Plot confusion matrix
    tree_heart_classifier.confusion_matrix(inputs,
                                           outputs,
                                           gnames=['False', 'True'])

    # Plot descision tree
    tree_heart_classifier.show_tree()

    # Assert accuracy
    assert (tree_heart_classifier.accuracy(inputs, outputs)) >= 94
Example #5
0
def test_heart_bayes():
    import os.path

    import numpy as np
    import pykitml as pk
    from pykitml.datasets import heartdisease

    # Download the dataset
    if (not os.path.exists('heartdisease.pkl')): heartdisease.get()

    # Load heart data set
    inputs, outputs = heartdisease.load()

    # Change 0/False to [1, 0]
    # Change 1/True to [0, 1]
    outputs = pk.onehot(outputs)

    distrbutions = [
        'gaussian', 'binomial', 'multinomial', 'gaussian', 'gaussian',
        'binomial', 'multinomial', 'gaussian', 'binomial', 'gaussian',
        'multinomial', 'multinomial', 'multinomial'
    ]

    # Create model
    bayes_heart_classifier = pk.NaiveBayes(13, 2, distrbutions)

    # Train
    bayes_heart_classifier.train(inputs, outputs)

    # Save it
    pk.save(bayes_heart_classifier, 'bayes_heart_classifier.pkl')

    # Print accuracy
    accuracy = bayes_heart_classifier.accuracy(inputs, outputs)
    print('Accuracy:', accuracy)

    # Plot confusion matrix
    bayes_heart_classifier.confusion_matrix(inputs,
                                            outputs,
                                            gnames=['False', 'True'])

    # Assert accuracy
    assert (bayes_heart_classifier.accuracy(inputs, outputs)) > 84