def test_My_Random_Forest_Classifier_predict(): # Object Declarations # Tests with N = 3, M = 2, F = 2 and seed = 1 rand_forest_test = MyRandomForestClassifier(3, 2, 2, 1) table = MyPyTable() # Variable Assignment and Declaration table.data = interview_table table.column_names = interview_header y_train, X_train = [], [] for inst in interview_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) # Sets X_test X_test = [["Junior", "Java", "yes", "no"], ["Junior", "Java", "yes", "yes"]] # Tests on the Interview Dataset rand_forest_test.header = interview_header[:-1] rand_forest_test.fit(X_train, y_train) y_predicted = rand_forest_test.predict(X_test) print("y_predicted:", y_predicted) # Trace Test assert y_predicted == ['True', 'False']
def confusionCategorical(yTrue, yTest, header, categories): table = MyPyTable() table.column_names = header table.data = [] for val in categories: newRow = [val] for i in range(len(header) - 1): newRow.append(0) table.data.append(newRow) for i in range(len(yTrue)): rowIndex = categories.index(yTrue[i]) colIndex = header.index(yTest[i]) table.data[rowIndex][colIndex] += 1 for row in table.data: total = 0 for i in range(1, len(categories) + 1): total += row[i] row[len(categories) + 1] = total for i in range(len(table.data)): if table.data[i][len(categories) + 1] != 0: recognition = table.data[i][i + 1] / table.data[i][len(categories) + 1] table.data[i][len(header) - 1] = round(100 * recognition, 2) return table
def test_My_Random_Forest_Classifier_fit(): # Object Declarations # Tests with N = 3, M = 2, F = 2 and seed = 0 rand_forest_test = MyRandomForestClassifier(3, 2, 2, 0) table = MyPyTable() # Variable Assignment and Declaration table.data = interview_table table.column_names = interview_header X_test = interview_table y_train = table.get_column("interviewed_well") # Tests on the Interview Dataset rand_forest_test.header = interview_header rand_forest_test.fit(X_test, y_train) trees = rand_forest_test.trees
from mysklearn.mypytable import MyPyTable # Object Declaration table = MyPyTable() # Trims the Dataset (Gets Data Based on City) city = "Sydney" table.load_from_file("weatherAUS.csv") table.column_names[0] = 'Location' names, tables = table.group_by("Location") city_index = names.index(city) print("\n") for i in range(10): print(tables[city_index][i]) table.data = tables[city_index] table.save_to_file(city+"_weather.csv")