def test_decision_tree_classifier_predict():
    interview_classifier = MyRandomForestClassifier()
    interview_classifier.fit(interview_table, interview_class_train, 2, 20, 7,
                             3)
    assert interview_classifier.predict([["Mid", "Java", "yes", "no"],
                                         ["Junior", "Python", "no",
                                          "yes"]]) == ["True", "False"]
Esempio n. 2
0
def test_My_Random_Forest_Classifier_predict():
    # Object Declarations
    # Tests with N = 3, M = 2, F = 2 and seed = 1
    rand_forest_test = MyRandomForestClassifier(3, 2, 2, 1)
    table = MyPyTable()

    # Variable Assignment and Declaration
    table.data = interview_table
    table.column_names = interview_header

    y_train, X_train = [], []
    for inst in interview_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])

    # Sets X_test
    X_test = [["Junior", "Java", "yes", "no"],
              ["Junior", "Java", "yes", "yes"]]

    # Tests on the Interview Dataset
    rand_forest_test.header = interview_header[:-1]
    rand_forest_test.fit(X_train, y_train)
    y_predicted = rand_forest_test.predict(X_test)

    print("y_predicted:", y_predicted)

    # Trace Test

    assert y_predicted == ['True', 'False']
Esempio n. 3
0
def tune_parameters(M, N, F, dataset):
    print("M =", M, "N =", N, "F =", F)
    adjusted_dataset = select_random_attributes(F, dataset.data)
    for i in range(5):
        X, y = split_x_y_train(adjusted_dataset)
        x_train, x_test, y_train, y_test = myevaluation.train_test_split(
            X, y, shuffle=True)

        remainder = []

        for j in range(len(x_train)):
            row = x_train[j]
            row.append(y_train[j])
            remainder.append(row)
        myRF = MyRandomForestClassifier()
        myRF.fit(remainder, M, N)
        y_predict_rf = myRF.predict(x_test)
        count = 0
        for l in range(len(y_predict_rf)):
            binned_predict = get_useful_bin(y_predict_rf[l])
            binned_test = get_useful_bin(y_test[l])
            if (binned_predict == binned_test):
                count = count + 1

        accuracy = count / len(y_predict_rf)
        error = (len(y_predict_rf) - count) / len(y_predict_rf)
        print(i, "-- accuracy =", accuracy, "error =", error)
def test_random_forest_fit():
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [["Senior", "Java", "no", "no", "False"],
                       ["Senior", "Java", "no", "yes", "False"],
                       ["Mid", "Python", "no", "no", "True"],
                       ["Junior", "Python", "no", "no", "True"],
                       ["Junior", "R", "yes", "no", "True"],
                       ["Junior", "R", "yes", "yes", "False"],
                       ["Mid", "R", "yes", "yes", "True"],
                       ["Senior", "Python", "no", "no", "False"],
                       ["Senior", "R", "yes", "no", "True"],
                       ["Junior", "Python", "yes", "no", "True"],
                       ["Senior", "Python", "yes", "yes", "True"],
                       ["Mid", "Python", "no", "yes", "True"],
                       ["Mid", "Java", "yes", "no", "True"],
                       ["Junior", "Python", "no", "yes", "False"]]
    myutils.prepend_attribute_label(interview_table, interview_header)

    interview_pytable = MyPyTable(column_names=interview_header,
                                  data=interview_table)
    y_col = interview_pytable.get_column("interviewed_well", False)
    x_cols = interview_pytable.drop_col("interviewed_well")

    many_trees = MyRandomForestClassifier()
    X_sample, y_sample = myutils.compute_bootstrapped_sample(x_cols, y_col)
    X_train, X_test, y_train, y_test = myutils.train_test_split(
        X_sample, y_sample, .33)
    many_trees.fit(X_train, y_train, X_test, y_test)
    y_predicted = many_trees.predict(X_test)

    numCorrectPredictions = 0
    numWrongPredictions = 0
    for i in range(len(y_test)):
        values = [y_predicted[i], y_test[i]]  #predicted/actual
        if (values[0] == values[1]):
            numCorrectPredictions = numCorrectPredictions + 1
        else:
            numWrongPredictions = numWrongPredictions + 1

    accuracy = np.round((numCorrectPredictions) /
                        (numCorrectPredictions + numWrongPredictions), 3)
    error_rate = np.round(
        (numWrongPredictions) / (numCorrectPredictions + numWrongPredictions),
        3)

    print("-----------------------------------------------------------")
    print("Accuracy and Error Rate")
    print("-----------------------------------------------------------")
    print()
    print("Random Forest: accuracy = {}, error rate = {}".format(
        accuracy, error_rate))
    print()
    print(
        "Because of the random aspect of this classifier, this will not always pass the tests"
    )
    print()
    print("Predicted table: " + str(y_predicted))
    print("Testing set:     " + str(y_test))
    for i in range(len(y_test)):
        assert y_predicted[i] == y_test[i]
Esempio n. 5
0
def test_MyRandomForestClassifier_predict():
    random.seed(1)
    # Interview DataSet

    # Create X_train and y_train
    X_train = []
    y_train = []
    X_test = [["Junior", "R", "yes", "no"], ["Junior", "Python", "no", "yes"],
              ["Senior", "Java", "no", "no", "False"]]
    # Append the header
    X_train.append(["level", "lang", "tweets", "phd", "interviewed_well"])
    # Delete the classifier
    del X_train[0][-1]
    # Get X_train
    for row in range(len(interview_table)):
        tmp = []
        for col in range(len(interview_table[0]) - 1):
            tmp.append(interview_table[row][col])
        X_train.append(tmp)

    # Get y_train
    for row in range(len(interview_table)):
        y_train.append(interview_table[row][-1])
    # Create a MyDecisionTreeClassifier object
    #print(X_train)
    test_fit = MyRandomForestClassifier(100, 2, 2)
    # Call fit
    actual = ['True', 'True', 'True']
    test_fit.fit(X_train, y_train)
    predicted = test_fit.predict(X_test)
    assert predicted == actual
Esempio n. 6
0
def test_random_forest_classifier_fit():
    # interview dataset
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [["Senior", "Java", "no", "no"],
                       ["Senior", "Java", "no", "yes"],
                       ["Mid", "Python", "no", "no"],
                       ["Junior", "Python", "no", "no"],
                       ["Junior", "R", "yes", "no"],
                       ["Junior", "R", "yes", "yes"],
                       ["Mid", "R", "yes", "yes"],
                       ["Senior", "Python", "no", "no"],
                       ["Senior", "R", "yes", "no"],
                       ["Junior", "Python", "yes", "no"],
                       ["Senior", "Python", "yes", "yes"],
                       ["Mid", "Python", "no", "yes"],
                       ["Mid", "Java", "yes", "no"],
                       ["Junior", "Python", "no", "yes"]]
    interview_labels = [
        "False", "False", "True", "True", "True", "False", "True", "False",
        "True", "True", "True", "True", "True", "False"
    ]

    rfc = MyRandomForestClassifier(20, 7, 2, None)

    rfc.fit(interview_table, interview_labels)

    assert len(rfc.learners) == 7
    assert len(rfc.accuracies) == 7
def test_my_random_forest_fit():
    interview_classifier = MyRandomForestClassifier()
    interview_classifier.fit(interview_table, interview_class_train, 2, 20, 7,
                             3)
    forest = [[
        'Attribute', 'att2', ['Value', 'no', ['Leaf', 'True', 0, 7]],
        ['Value', 'yes', ['Leaf', 'True', 2, 9]]
    ], ['Leaf', 'True', 0, 9],
              [
                  'Attribute', 'att0',
                  [
                      'Value', 'Junior',
                      [
                          'Attribute', 'att3',
                          ['Value', 'no', ['Leaf', 'True', 2, 4]],
                          ['Value', 'yes', ['Leaf', 'False', 2, 4]]
                      ]
                  ], ['Value', 'Mid', ['Leaf', 'True', 4, 9]],
                  ['Value', 'Senior', ['Leaf', 'False', 1, 9]]
              ],
              [
                  'Attribute', 'att0',
                  [
                      'Value', 'Junior',
                      [
                          'Attribute', 'att3',
                          ['Value', 'no', ['Leaf', 'True', 2, 4]],
                          ['Value', 'yes', ['Leaf', 'False', 2, 4]]
                      ]
                  ], ['Value', 'Mid', ['Leaf', 'True', 4, 9]],
                  ['Value', 'Senior', ['Leaf', 'False', 1, 9]]
              ],
              [
                  'Attribute', 'att0',
                  [
                      'Value', 'Junior',
                      [
                          'Attribute', 'att3',
                          ['Value', 'no', ['Leaf', 'True', 2, 4]],
                          ['Value', 'yes', ['Leaf', 'False', 2, 4]]
                      ]
                  ], ['Value', 'Mid', ['Leaf', 'True', 4, 9]],
                  ['Value', 'Senior', ['Leaf', 'False', 1, 9]]
              ], ['Leaf', 'True', 0, 9],
              [
                  'Attribute', 'att0',
                  [
                      'Value', 'Junior',
                      [
                          'Attribute', 'att3',
                          ['Value', 'no', ['Leaf', 'True', 2, 4]],
                          ['Value', 'yes', ['Leaf', 'False', 2, 4]]
                      ]
                  ], ['Value', 'Mid', ['Leaf', 'True', 4, 9]],
                  ['Value', 'Senior', ['Leaf', 'False', 1, 9]]
              ]]
    assert [i for i in interview_classifier.forest if i not in forest] == []
def test_random_forest():
    rf = MyRandomForestClassifier()
    rf.fit(iphone_x, iphone_y, M=7, N=20, F=2)

    assert len(rf.trees) == 7

    rf = MyRandomForestClassifier()
    rf.fit(iphone_x, iphone_y, M=6, N=20, F=2)

    assert len(rf.trees) == 6
Esempio n. 9
0
def test_random_forest_classifier_fit():
    mp_table = MyPyTable(interview_header, interview_table)
    # Formulate X_train and y_train
    y_train = mp_table.get_column('interviewed_well')
    X_train_col_names = ["level", "lang", "tweets", "phd"]
    X_train = mp_table.get_rows(X_train_col_names)

    myRF = MyRandomForestClassifier(N=4, M=2, F=4)
    myRF.fit(X_train, y_train)

    assert len(myRF.M_attr_sets) == myRF.M
Esempio n. 10
0
def test_random_forest_fit():
    X = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"],
         ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"],
         ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"],
         ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"],
         ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"],
         ["Senior", "Python", "yes", "yes"], ["Mid", "Python", "no", "yes"],
         ["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"]]

    y = [
        "False", "False", "True", "True", "True", "False", "True", "False",
        "True", "True", "True", "True", "True", "False"
    ]

    test_trees = [[
        'Attribute', 'att0',
        [
            'Value', 'Junior',
            [
                'Attribute', 'att2', ['Value', 'no', ['Leaf', 'False']],
                [
                    'Value', 'yes',
                    [
                        'Attribute', 'att3', ['Value', 'no', ['Leaf', 'True']],
                        ['Value', 'yes', ['Leaf', 'False']]
                    ]
                ]
            ]
        ], ['Value', 'Mid', ['Leaf', 'True']],
        [
            'Value', 'Senior',
            [
                'Attribute', 'att2', ['Value', 'no', ['Leaf', 'False']],
                ['Value', 'yes', ['Leaf', 'True']]
            ]
        ]
    ],
                  [
                      'Attribute', 'att0',
                      ['Value', 'Junior', ['Leaf', 'False']],
                      ['Value', 'Mid', ['Leaf', 'True']],
                      [
                          'Value', 'Senior',
                          [
                              'Attribute', 'att2',
                              ['Value', 'no', ['Leaf', 'False']],
                              ['Value', 'yes', ['Leaf', 'True']]
                          ]
                      ]
                  ]]
    forest = MyRandomForestClassifier(n=4, m=2, f=2, seed=2)
    forest.fit(X, y)

    assert forest.trees == test_trees
Esempio n. 11
0
def test_My_Random_Forest_Classifier_fit():
    # Object Declarations
    # Tests with N = 3, M = 2, F = 2 and seed = 0
    rand_forest_test = MyRandomForestClassifier(3, 2, 2, 0)
    table = MyPyTable()

    # Variable Assignment and Declaration
    table.data = interview_table
    table.column_names = interview_header

    X_test = interview_table
    y_train = table.get_column("interviewed_well")

    # Tests on the Interview Dataset
    rand_forest_test.header = interview_header
    rand_forest_test.fit(X_test, y_train)

    trees = rand_forest_test.trees
Esempio n. 12
0
def test_random_forest_classifier_predict():
    X_test = [["Mid", "Python", "no", "no", "True"],
              ["Mid", "R", "yes", "yes", "True"],
              ["Mid", "Python", "no", "yes", "True"]]

    y_test = ["True", "True", "True"]

    mp_table = MyPyTable(interview_header, interview_table)
    # Formulate X_train and y_train
    y_train = mp_table.get_column('interviewed_well')
    X_train_col_names = ["level", "lang", "tweets", "phd"]
    X_train = mp_table.get_rows(X_train_col_names)

    myRF = MyRandomForestClassifier(N=4, M=2, F=4)
    myRF.fit(X_train, y_train)
    predictions = myRF.predict(X_test)

    for i in range(0, len(predictions)):
        assert predictions[i] == y_test[i]
Esempio n. 13
0
def test_random_forest_predict():
    X = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"],
         ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"],
         ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"],
         ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"],
         ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"],
         ["Senior", "Python", "yes", "yes"], ["Mid", "Python", "no", "yes"],
         ["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"]]

    y = [
        "False", "False", "True", "True", "True", "False", "True", "False",
        "True", "True", "True", "True", "True", "False"
    ]

    forest = MyRandomForestClassifier(n=4, m=2, f=2, seed=2)
    forest.fit(X, y)
    y_predicted = forest.predict([["Junior", "Python", "no", "yes"],
                                  ["Mid", "Java", "yes", "no"]])
    y_actual = ['False', 'True']
    assert y_predicted == y_actual
Esempio n. 14
0
def test_simple_linear_regressor_fit():
    myline = MyRandomForestClassifier(2, 5, 3)
    X_train = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"],
               ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"],
               ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"],
               ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"],
               ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"],
               ["Senior", "Python", "yes", "yes"],
               ["Mid", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"],
               ["Junior", "Python", "no", "yes"]]
    y_train = [
        "False", "False", "True", "True", "True", "False", "True", "False",
        "True", "True", "True", "True", "True", "False"
    ]
    y_domain = myutils.get_unique(y_train)
    myline.fit(X_train, y_train)
    prediction = myline.predict([["Junior", "Python", "no", "yes"],
                                 ["Mid", "Java", "yes", "no"]])
    for val in prediction:
        assert (val in y_domain)
def test_random_forest_fit():
    # interview dataset
    table = [["Senior", "Java", "no", "no", "False"],
             ["Senior", "Java", "no", "yes", "False"],
             ["Mid", "Python", "no", "no", "True"],
             ["Junior", "Python", "no", "no", "True"],
             ["Junior", "R", "yes", "no", "True"],
             ["Junior", "R", "yes", "yes", "False"],
             ["Mid", "R", "yes", "yes", "True"],
             ["Senior", "Python", "no", "no", "False"],
             ["Senior", "R", "yes", "no", "True"],
             ["Junior", "Python", "yes", "no", "True"],
             ["Senior", "Python", "yes", "yes", "True"],
             ["Mid", "Python", "no", "yes", "True"],
             ["Mid", "Java", "yes", "no", "True"],
             ["Junior", "Python", "no", "yes", "False"]]

    X, y = myutils.split_x_y_train(table)
    x_train, x_test, y_train, y_test = myevaluation.train_test_split(
        X, y, math.floor(len(table) * 0.33), shuffle=True)
    remainder = []
    for i in range(len(x_train)):
        row = x_train[i]
        row.append(y_train[i])
        remainder.append(row)

    print(remainder)

    myRF = MyRandomForestClassifier()
    myRF.fit(remainder, 10, 100)

    y_predicted = myRF.predict(x_test)

    assert len(y_predicted) == len(y_test)

    count = 0
    for i in range(len(y_predicted)):
        if y_predicted[i] == y_test[i]:
            count += 1

    assert count != 0
Esempio n. 16
0
def test_random_forest_fit():
    # test on the interview dataset

    # test 1
    N = 2
    M = 1
    F = 1
    trees = MyRandomForestClassifier(N = N, M = M, F = F, seed = 0)
    trees.fit(X_train, y_train)
    assert len(trees.learners) == M
    best_tree = [x.tree for x in trees.learners][0]
    check_tree_equivalence(test_tree, best_tree)

    # test 2
    N = 3
    M = 2
    F = 2
    trees = MyRandomForestClassifier(N = N, M = M, F = F, seed = 0)
    trees.fit(X_train, y_train)
    # print([x.tree for x in trees.learners])
    assert len(trees.learners) == M
    tree_results = [x.tree for x in trees.learners]
    for i in range(len(tree_results)):
        check_tree_equivalence(tree_results[i], test_forest[i])
Esempio n. 17
0
def test_MyRandomForestClassifier_fit():
    random.seed(1)
    # Interview DataSet

    # Create X_train and y_train
    X_train = []
    y_train = []
    # Append the header
    X_train.append(["level", "lang", "tweets", "phd", "interviewed_well"])
    # Delete the classifier
    del X_train[0][-1]
    # Get X_train
    for row in range(len(interview_table)):
        tmp = []
        for col in range(len(interview_table[0]) - 1):
            tmp.append(interview_table[row][col])
        X_train.append(tmp)

    # Get y_train
    for row in range(len(interview_table)):
        y_train.append(interview_table[row][-1])
    # Create a MyDecisionTreeClassifier object
    #print(X_train)
    test_fit = MyRandomForestClassifier(100, 2, 2)
    # Call fit

    test_fit.fit(X_train, y_train)
    # Test
    #print("working")
    #print(test_fit.forest)

    assert (test_fit.forest[0]['atts']) == ['att0', 'att1']
    assert (test_fit.forest[0]['tree'].tree) == tree_actual_1

    assert (test_fit.forest[1]['atts']) == ['att0', 'att1']
    assert (test_fit.forest[1]['tree'].tree) == tree_actual_2
import pickle
from mysklearn.myclassifiers import MyRandomForestClassifier
from mysklearn.mypytable import MyPyTable
import os

fname = os.path.join("input_data", "tracks_data_backup.txt")
tracks = MyPyTable().load_from_file(fname)

Danceability = tracks.get_column('danceability')
Energy = tracks.get_column('energy')
Acousticness = tracks.get_column('acousticness')
Valence = tracks.get_column('valence')

y_train = Acousticness
x_train = [[Danceability[i], Energy[i], Valence[i]]
           for i in range(len(y_train))]

rf = MyRandomForestClassifier()
rf.fit(x_train, y_train, 20, 7, 2)
rf = MyRandomForestClassifier()
rf.fit(x_train, y_train, 30, 4, 2)
# serialize to file (pickle)
outfile = open("trees.p", "wb")
pickle.dump(rf.trees, outfile)
outfile.close()

# deserialize to object (unpickle)
infile = open("trees.p", "rb")
trees2 = pickle.load(infile)
infile.close()
Esempio n. 19
0
def test_random_forest_classifier_predict():
    X_train = [
        ["Senior", "Java", "no", "no"],
        ["Senior", "Java", "no", "yes"],
        ["Mid", "Python", "no", "no"],
        ["Junior", "Python", "no", "no"],
        ["Junior", "R", "yes", "no"],
        ["Junior", "R", "yes", "yes"],
        ["Mid", "R", "yes", "yes"],
        ["Senior", "Python", "no", "no"],
        ["Senior", "R", "yes", "no"],
        ["Junior", "Python", "yes", "no"],
        ["Senior", "Python", "yes", "yes"],
        ["Mid", "Python", "no", "yes"],
        ["Mid", "Java", "yes", "no"],
        ["Junior", "Python", "no", "yes"]
    ]

    y_train = ["False", "False", "True", "True", "True", "False", "True", "False", "True", "True", "True", "True", "True", "False"]
    rf = MyRandomForestClassifier()
    rf.fit(X_train, y_train, 20, 7, 2)
    X_test = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"], ["Mid", "Python", "no", "no"]]
    pred = rf.predict(X_test)
    assert  pred == ["False", "False", "True"] # TODO: fix this

    degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"]
    degrees_table = [
        ["A", "B", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "A", "B", "B", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "A", "A", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "A", "B", "FIRST"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "B", "B", "SECOND"],
        ["B", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "B", "A", "A", "FIRST"],
        ["B", "B", "B", "A", "A", "SECOND"],
        ["B", "B", "A", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["B", "A", "B", "A", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "B", "A", "B", "B", "SECOND"],
        ["B", "A", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
    ]

    X_train = []
    y_train = []
    for row in degrees_table:
        X_train.append(row[0:4])
        y_train.append(row[4])

    rf1 = MyRandomForestClassifier()
    rf1.fit(X_train, y_train, 20, 7, 2)

    test_vals = [["B", "B", "B", "B", "B"], ["A", "A", "A", "A", "A"], ["A", "A", "A", "A", "B"]]

    assert rf1.predict(test_vals) == ['A', 'A', 'A']