def test_My_Random_Forest_Classifier_predict(): # Object Declarations # Tests with N = 3, M = 2, F = 2 and seed = 1 rand_forest_test = MyRandomForestClassifier(3, 2, 2, 1) table = MyPyTable() # Variable Assignment and Declaration table.data = interview_table table.column_names = interview_header y_train, X_train = [], [] for inst in interview_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) # Sets X_test X_test = [["Junior", "Java", "yes", "no"], ["Junior", "Java", "yes", "yes"]] # Tests on the Interview Dataset rand_forest_test.header = interview_header[:-1] rand_forest_test.fit(X_train, y_train) y_predicted = rand_forest_test.predict(X_test) print("y_predicted:", y_predicted) # Trace Test assert y_predicted == ['True', 'False']
def test_decision_tree_classifier_predict(): interview_classifier = MyRandomForestClassifier() interview_classifier.fit(interview_table, interview_class_train, 2, 20, 7, 3) assert interview_classifier.predict([["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"]]) == ["True", "False"]
def tune_parameters(M, N, F, dataset): print("M =", M, "N =", N, "F =", F) adjusted_dataset = select_random_attributes(F, dataset.data) for i in range(5): X, y = split_x_y_train(adjusted_dataset) x_train, x_test, y_train, y_test = myevaluation.train_test_split( X, y, shuffle=True) remainder = [] for j in range(len(x_train)): row = x_train[j] row.append(y_train[j]) remainder.append(row) myRF = MyRandomForestClassifier() myRF.fit(remainder, M, N) y_predict_rf = myRF.predict(x_test) count = 0 for l in range(len(y_predict_rf)): binned_predict = get_useful_bin(y_predict_rf[l]) binned_test = get_useful_bin(y_test[l]) if (binned_predict == binned_test): count = count + 1 accuracy = count / len(y_predict_rf) error = (len(y_predict_rf) - count) / len(y_predict_rf) print(i, "-- accuracy =", accuracy, "error =", error)
def test_random_forest_fit(): interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"] interview_table = [["Senior", "Java", "no", "no", "False"], ["Senior", "Java", "no", "yes", "False"], ["Mid", "Python", "no", "no", "True"], ["Junior", "Python", "no", "no", "True"], ["Junior", "R", "yes", "no", "True"], ["Junior", "R", "yes", "yes", "False"], ["Mid", "R", "yes", "yes", "True"], ["Senior", "Python", "no", "no", "False"], ["Senior", "R", "yes", "no", "True"], ["Junior", "Python", "yes", "no", "True"], ["Senior", "Python", "yes", "yes", "True"], ["Mid", "Python", "no", "yes", "True"], ["Mid", "Java", "yes", "no", "True"], ["Junior", "Python", "no", "yes", "False"]] myutils.prepend_attribute_label(interview_table, interview_header) interview_pytable = MyPyTable(column_names=interview_header, data=interview_table) y_col = interview_pytable.get_column("interviewed_well", False) x_cols = interview_pytable.drop_col("interviewed_well") many_trees = MyRandomForestClassifier() X_sample, y_sample = myutils.compute_bootstrapped_sample(x_cols, y_col) X_train, X_test, y_train, y_test = myutils.train_test_split( X_sample, y_sample, .33) many_trees.fit(X_train, y_train, X_test, y_test) y_predicted = many_trees.predict(X_test) numCorrectPredictions = 0 numWrongPredictions = 0 for i in range(len(y_test)): values = [y_predicted[i], y_test[i]] #predicted/actual if (values[0] == values[1]): numCorrectPredictions = numCorrectPredictions + 1 else: numWrongPredictions = numWrongPredictions + 1 accuracy = np.round((numCorrectPredictions) / (numCorrectPredictions + numWrongPredictions), 3) error_rate = np.round( (numWrongPredictions) / (numCorrectPredictions + numWrongPredictions), 3) print("-----------------------------------------------------------") print("Accuracy and Error Rate") print("-----------------------------------------------------------") print() print("Random Forest: accuracy = {}, error rate = {}".format( accuracy, error_rate)) print() print( "Because of the random aspect of this classifier, this will not always pass the tests" ) print() print("Predicted table: " + str(y_predicted)) print("Testing set: " + str(y_test)) for i in range(len(y_test)): assert y_predicted[i] == y_test[i]
def test_MyRandomForestClassifier_predict(): random.seed(1) # Interview DataSet # Create X_train and y_train X_train = [] y_train = [] X_test = [["Junior", "R", "yes", "no"], ["Junior", "Python", "no", "yes"], ["Senior", "Java", "no", "no", "False"]] # Append the header X_train.append(["level", "lang", "tweets", "phd", "interviewed_well"]) # Delete the classifier del X_train[0][-1] # Get X_train for row in range(len(interview_table)): tmp = [] for col in range(len(interview_table[0]) - 1): tmp.append(interview_table[row][col]) X_train.append(tmp) # Get y_train for row in range(len(interview_table)): y_train.append(interview_table[row][-1]) # Create a MyDecisionTreeClassifier object #print(X_train) test_fit = MyRandomForestClassifier(100, 2, 2) # Call fit actual = ['True', 'True', 'True'] test_fit.fit(X_train, y_train) predicted = test_fit.predict(X_test) assert predicted == actual
def test_random_forest_classifier_predict(): X_test = [["Mid", "Python", "no", "no", "True"], ["Mid", "R", "yes", "yes", "True"], ["Mid", "Python", "no", "yes", "True"]] y_test = ["True", "True", "True"] mp_table = MyPyTable(interview_header, interview_table) # Formulate X_train and y_train y_train = mp_table.get_column('interviewed_well') X_train_col_names = ["level", "lang", "tweets", "phd"] X_train = mp_table.get_rows(X_train_col_names) myRF = MyRandomForestClassifier(N=4, M=2, F=4) myRF.fit(X_train, y_train) predictions = myRF.predict(X_test) for i in range(0, len(predictions)): assert predictions[i] == y_test[i]
def test_simple_linear_regressor_fit(): myline = MyRandomForestClassifier(2, 5, 3) X_train = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"], ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"], ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"], ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"], ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"], ["Senior", "Python", "yes", "yes"], ["Mid", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"]] y_train = [ "False", "False", "True", "True", "True", "False", "True", "False", "True", "True", "True", "True", "True", "False" ] y_domain = myutils.get_unique(y_train) myline.fit(X_train, y_train) prediction = myline.predict([["Junior", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"]]) for val in prediction: assert (val in y_domain)
def test_random_forest_predict(): X = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"], ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"], ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"], ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"], ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"], ["Senior", "Python", "yes", "yes"], ["Mid", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"]] y = [ "False", "False", "True", "True", "True", "False", "True", "False", "True", "True", "True", "True", "True", "False" ] forest = MyRandomForestClassifier(n=4, m=2, f=2, seed=2) forest.fit(X, y) y_predicted = forest.predict([["Junior", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"]]) y_actual = ['False', 'True'] assert y_predicted == y_actual
def test_random_forest_fit(): # interview dataset table = [["Senior", "Java", "no", "no", "False"], ["Senior", "Java", "no", "yes", "False"], ["Mid", "Python", "no", "no", "True"], ["Junior", "Python", "no", "no", "True"], ["Junior", "R", "yes", "no", "True"], ["Junior", "R", "yes", "yes", "False"], ["Mid", "R", "yes", "yes", "True"], ["Senior", "Python", "no", "no", "False"], ["Senior", "R", "yes", "no", "True"], ["Junior", "Python", "yes", "no", "True"], ["Senior", "Python", "yes", "yes", "True"], ["Mid", "Python", "no", "yes", "True"], ["Mid", "Java", "yes", "no", "True"], ["Junior", "Python", "no", "yes", "False"]] X, y = myutils.split_x_y_train(table) x_train, x_test, y_train, y_test = myevaluation.train_test_split( X, y, math.floor(len(table) * 0.33), shuffle=True) remainder = [] for i in range(len(x_train)): row = x_train[i] row.append(y_train[i]) remainder.append(row) print(remainder) myRF = MyRandomForestClassifier() myRF.fit(remainder, 10, 100) y_predicted = myRF.predict(x_test) assert len(y_predicted) == len(y_test) count = 0 for i in range(len(y_predicted)): if y_predicted[i] == y_test[i]: count += 1 assert count != 0
def test_random_forest_classifier_predict(): X_train = [ ["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"], ["Mid", "Python", "no", "no"], ["Junior", "Python", "no", "no"], ["Junior", "R", "yes", "no"], ["Junior", "R", "yes", "yes"], ["Mid", "R", "yes", "yes"], ["Senior", "Python", "no", "no"], ["Senior", "R", "yes", "no"], ["Junior", "Python", "yes", "no"], ["Senior", "Python", "yes", "yes"], ["Mid", "Python", "no", "yes"], ["Mid", "Java", "yes", "no"], ["Junior", "Python", "no", "yes"] ] y_train = ["False", "False", "True", "True", "True", "False", "True", "False", "True", "True", "True", "True", "True", "False"] rf = MyRandomForestClassifier() rf.fit(X_train, y_train, 20, 7, 2) X_test = [["Senior", "Java", "no", "no"], ["Senior", "Java", "no", "yes"], ["Mid", "Python", "no", "no"]] pred = rf.predict(X_test) assert pred == ["False", "False", "True"] # TODO: fix this degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"] degrees_table = [ ["A", "B", "A", "B", "B", "SECOND"], ["A", "B", "B", "B", "A", "FIRST"], ["A", "A", "A", "B", "B", "SECOND"], ["B", "A", "A", "B", "B", "SECOND"], ["A", "A", "B", "B", "A", "FIRST"], ["B", "A", "A", "B", "B", "SECOND"], ["A", "B", "B", "B", "B", "SECOND"], ["A", "B", "B", "B", "B", "SECOND"], ["A", "A", "A", "A", "A", "FIRST"], ["B", "A", "A", "B", "B", "SECOND"], ["B", "A", "A", "B", "B", "SECOND"], ["A", "B", "B", "A", "B", "SECOND"], ["B", "B", "B", "B", "A", "SECOND"], ["A", "A", "B", "A", "B", "FIRST"], ["B", "B", "B", "B", "A", "SECOND"], ["A", "A", "B", "B", "B", "SECOND"], ["B", "B", "B", "B", "B", "SECOND"], ["A", "A", "B", "A", "A", "FIRST"], ["B", "B", "B", "A", "A", "SECOND"], ["B", "B", "A", "A", "B", "SECOND"], ["B", "B", "B", "B", "A", "SECOND"], ["B", "A", "B", "A", "B", "SECOND"], ["A", "B", "B", "B", "A", "FIRST"], ["A", "B", "A", "B", "B", "SECOND"], ["B", "A", "B", "B", "B", "SECOND"], ["A", "B", "B", "B", "B", "SECOND"], ] X_train = [] y_train = [] for row in degrees_table: X_train.append(row[0:4]) y_train.append(row[4]) rf1 = MyRandomForestClassifier() rf1.fit(X_train, y_train, 20, 7, 2) test_vals = [["B", "B", "B", "B", "B"], ["A", "A", "A", "A", "A"], ["A", "A", "A", "A", "B"]] assert rf1.predict(test_vals) == ['A', 'A', 'A']