예제 #1
0
def test_My_Random_Forest_Classifier_predict():
    # Object Declarations
    # Tests with N = 3, M = 2, F = 2 and seed = 1
    rand_forest_test = MyRandomForestClassifier(3, 2, 2, 1)
    table = MyPyTable()

    # Variable Assignment and Declaration
    table.data = interview_table
    table.column_names = interview_header

    y_train, X_train = [], []
    for inst in interview_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])

    # Sets X_test
    X_test = [["Junior", "Java", "yes", "no"],
              ["Junior", "Java", "yes", "yes"]]

    # Tests on the Interview Dataset
    rand_forest_test.header = interview_header[:-1]
    rand_forest_test.fit(X_train, y_train)
    y_predicted = rand_forest_test.predict(X_test)

    print("y_predicted:", y_predicted)

    # Trace Test

    assert y_predicted == ['True', 'False']
def test_decision_tree_classifier_predict():
    interview_classifier = MyRandomForestClassifier()
    interview_classifier.fit(interview_table, interview_class_train, 2, 20, 7,
                             3)
    assert interview_classifier.predict([["Mid", "Java", "yes", "no"],
                                         ["Junior", "Python", "no",
                                          "yes"]]) == ["True", "False"]
예제 #3
0
def tune_parameters(M, N, F, dataset):
    print("M =", M, "N =", N, "F =", F)
    adjusted_dataset = select_random_attributes(F, dataset.data)
    for i in range(5):
        X, y = split_x_y_train(adjusted_dataset)
        x_train, x_test, y_train, y_test = myevaluation.train_test_split(
            X, y, shuffle=True)

        remainder = []

        for j in range(len(x_train)):
            row = x_train[j]
            row.append(y_train[j])
            remainder.append(row)
        myRF = MyRandomForestClassifier()
        myRF.fit(remainder, M, N)
        y_predict_rf = myRF.predict(x_test)
        count = 0
        for l in range(len(y_predict_rf)):
            binned_predict = get_useful_bin(y_predict_rf[l])
            binned_test = get_useful_bin(y_test[l])
            if (binned_predict == binned_test):
                count = count + 1

        accuracy = count / len(y_predict_rf)
        error = (len(y_predict_rf) - count) / len(y_predict_rf)
        print(i, "-- accuracy =", accuracy, "error =", error)
def test_random_forest_fit():
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [["Senior", "Java", "no", "no", "False"],
                       ["Senior", "Java", "no", "yes", "False"],
                       ["Mid", "Python", "no", "no", "True"],
                       ["Junior", "Python", "no", "no", "True"],
                       ["Junior", "R", "yes", "no", "True"],
                       ["Junior", "R", "yes", "yes", "False"],
                       ["Mid", "R", "yes", "yes", "True"],
                       ["Senior", "Python", "no", "no", "False"],
                       ["Senior", "R", "yes", "no", "True"],
                       ["Junior", "Python", "yes", "no", "True"],
                       ["Senior", "Python", "yes", "yes", "True"],
                       ["Mid", "Python", "no", "yes", "True"],
                       ["Mid", "Java", "yes", "no", "True"],
                       ["Junior", "Python", "no", "yes", "False"]]
    myutils.prepend_attribute_label(interview_table, interview_header)

    interview_pytable = MyPyTable(column_names=interview_header,
                                  data=interview_table)
    y_col = interview_pytable.get_column("interviewed_well", False)
    x_cols = interview_pytable.drop_col("interviewed_well")

    many_trees = MyRandomForestClassifier()
    X_sample, y_sample = myutils.compute_bootstrapped_sample(x_cols, y_col)
    X_train, X_test, y_train, y_test = myutils.train_test_split(
        X_sample, y_sample, .33)
    many_trees.fit(X_train, y_train, X_test, y_test)
    y_predicted = many_trees.predict(X_test)

    numCorrectPredictions = 0
    numWrongPredictions = 0
    for i in range(len(y_test)):
        values = [y_predicted[i], y_test[i]]  #predicted/actual
        if (values[0] == values[1]):
            numCorrectPredictions = numCorrectPredictions + 1
        else:
            numWrongPredictions = numWrongPredictions + 1

    accuracy = np.round((numCorrectPredictions) /
                        (numCorrectPredictions + numWrongPredictions), 3)
    error_rate = np.round(
        (numWrongPredictions) / (numCorrectPredictions + numWrongPredictions),
        3)

    print("-----------------------------------------------------------")
    print("Accuracy and Error Rate")
    print("-----------------------------------------------------------")
    print()
    print("Random Forest: accuracy = {}, error rate = {}".format(
        accuracy, error_rate))
    print()
    print(
        "Because of the random aspect of this classifier, this will not always pass the tests"
    )
    print()
    print("Predicted table: " + str(y_predicted))
    print("Testing set:     " + str(y_test))
    for i in range(len(y_test)):
        assert y_predicted[i] == y_test[i]
예제 #5
0
def test_MyRandomForestClassifier_predict():
    random.seed(1)
    # Interview DataSet

    # Create X_train and y_train
    X_train = []
    y_train = []
    X_test = [["Junior", "R", "yes", "no"], ["Junior", "Python", "no", "yes"],
              ["Senior", "Java", "no", "no", "False"]]
    # Append the header
    X_train.append(["level", "lang", "tweets", "phd", "interviewed_well"])
    # Delete the classifier
    del X_train[0][-1]
    # Get X_train
    for row in range(len(interview_table)):
        tmp = []
        for col in range(len(interview_table[0]) - 1):
            tmp.append(interview_table[row][col])
        X_train.append(tmp)

    # Get y_train
    for row in range(len(interview_table)):
        y_train.append(interview_table[row][-1])
    # Create a MyDecisionTreeClassifier object
    #print(X_train)
    test_fit = MyRandomForestClassifier(100, 2, 2)
    # Call fit
    actual = ['True', 'True', 'True']
    test_fit.fit(X_train, y_train)
    predicted = test_fit.predict(X_test)
    assert predicted == actual
예제 #6
0
def test_random_forest_classifier_predict():
    X_test = [["Mid", "Python", "no", "no", "True"],
              ["Mid", "R", "yes", "yes", "True"],
              ["Mid", "Python", "no", "yes", "True"]]

    y_test = ["True", "True", "True"]

    mp_table = MyPyTable(interview_header, interview_table)
    # Formulate X_train and y_train
    y_train = mp_table.get_column('interviewed_well')
    X_train_col_names = ["level", "lang", "tweets", "phd"]
    X_train = mp_table.get_rows(X_train_col_names)

    myRF = MyRandomForestClassifier(N=4, M=2, F=4)
    myRF.fit(X_train, y_train)
    predictions = myRF.predict(X_test)

    for i in range(0, len(predictions)):
        assert predictions[i] == y_test[i]
예제 #7
0
def test_simple_linear_regressor_fit():
    myline = MyRandomForestClassifier(2, 5, 3)
    X_train = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"],
               ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"],
               ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"],
               ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"],
               ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"],
               ["Senior", "Python", "yes", "yes"],
               ["Mid", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"],
               ["Junior", "Python", "no", "yes"]]
    y_train = [
        "False", "False", "True", "True", "True", "False", "True", "False",
        "True", "True", "True", "True", "True", "False"
    ]
    y_domain = myutils.get_unique(y_train)
    myline.fit(X_train, y_train)
    prediction = myline.predict([["Junior", "Python", "no", "yes"],
                                 ["Mid", "Java", "yes", "no"]])
    for val in prediction:
        assert (val in y_domain)
예제 #8
0
def test_random_forest_predict():
    X = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"],
         ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"],
         ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"],
         ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"],
         ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"],
         ["Senior", "Python", "yes", "yes"], ["Mid", "Python", "no", "yes"],
         ["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"]]

    y = [
        "False", "False", "True", "True", "True", "False", "True", "False",
        "True", "True", "True", "True", "True", "False"
    ]

    forest = MyRandomForestClassifier(n=4, m=2, f=2, seed=2)
    forest.fit(X, y)
    y_predicted = forest.predict([["Junior", "Python", "no", "yes"],
                                  ["Mid", "Java", "yes", "no"]])
    y_actual = ['False', 'True']
    assert y_predicted == y_actual
def test_random_forest_fit():
    # interview dataset
    table = [["Senior", "Java", "no", "no", "False"],
             ["Senior", "Java", "no", "yes", "False"],
             ["Mid", "Python", "no", "no", "True"],
             ["Junior", "Python", "no", "no", "True"],
             ["Junior", "R", "yes", "no", "True"],
             ["Junior", "R", "yes", "yes", "False"],
             ["Mid", "R", "yes", "yes", "True"],
             ["Senior", "Python", "no", "no", "False"],
             ["Senior", "R", "yes", "no", "True"],
             ["Junior", "Python", "yes", "no", "True"],
             ["Senior", "Python", "yes", "yes", "True"],
             ["Mid", "Python", "no", "yes", "True"],
             ["Mid", "Java", "yes", "no", "True"],
             ["Junior", "Python", "no", "yes", "False"]]

    X, y = myutils.split_x_y_train(table)
    x_train, x_test, y_train, y_test = myevaluation.train_test_split(
        X, y, math.floor(len(table) * 0.33), shuffle=True)
    remainder = []
    for i in range(len(x_train)):
        row = x_train[i]
        row.append(y_train[i])
        remainder.append(row)

    print(remainder)

    myRF = MyRandomForestClassifier()
    myRF.fit(remainder, 10, 100)

    y_predicted = myRF.predict(x_test)

    assert len(y_predicted) == len(y_test)

    count = 0
    for i in range(len(y_predicted)):
        if y_predicted[i] == y_test[i]:
            count += 1

    assert count != 0
예제 #10
0
def test_random_forest_classifier_predict():
    X_train = [
        ["Senior", "Java", "no", "no"],
        ["Senior", "Java", "no", "yes"],
        ["Mid", "Python", "no", "no"],
        ["Junior", "Python", "no", "no"],
        ["Junior", "R", "yes", "no"],
        ["Junior", "R", "yes", "yes"],
        ["Mid", "R", "yes", "yes"],
        ["Senior", "Python", "no", "no"],
        ["Senior", "R", "yes", "no"],
        ["Junior", "Python", "yes", "no"],
        ["Senior", "Python", "yes", "yes"],
        ["Mid", "Python", "no", "yes"],
        ["Mid", "Java", "yes", "no"],
        ["Junior", "Python", "no", "yes"]
    ]

    y_train = ["False", "False", "True", "True", "True", "False", "True", "False", "True", "True", "True", "True", "True", "False"]
    rf = MyRandomForestClassifier()
    rf.fit(X_train, y_train, 20, 7, 2)
    X_test = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"], ["Mid", "Python", "no", "no"]]
    pred = rf.predict(X_test)
    assert  pred == ["False", "False", "True"] # TODO: fix this

    degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"]
    degrees_table = [
        ["A", "B", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "A", "B", "B", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "A", "A", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "A", "B", "FIRST"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "B", "B", "SECOND"],
        ["B", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "B", "A", "A", "FIRST"],
        ["B", "B", "B", "A", "A", "SECOND"],
        ["B", "B", "A", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["B", "A", "B", "A", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "B", "A", "B", "B", "SECOND"],
        ["B", "A", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
    ]

    X_train = []
    y_train = []
    for row in degrees_table:
        X_train.append(row[0:4])
        y_train.append(row[4])

    rf1 = MyRandomForestClassifier()
    rf1.fit(X_train, y_train, 20, 7, 2)

    test_vals = [["B", "B", "B", "B", "B"], ["A", "A", "A", "A", "A"], ["A", "A", "A", "A", "B"]]

    assert rf1.predict(test_vals) == ['A', 'A', 'A']