tree = DecisionTree(5, train_data.shape[0]) tree.train(train_data, train_label) res = tree.predict(validation_data) score = 0 for i in range(len(res)): if res[i] == validation_label[i]: score += 1 score /= len(res) print(score) # random forest forest = RandomForest(100,5,train_data.shape[0],6) forest.train(train_data, train_label) res = forest.predict(validation_data) score = 0 for i in range(len(res)): if res[i] == validation_label[i]: score += 1 score /= len(res) print(score) # write to csv # with open('titanic_prediction.csv', 'wt') as f: # writer = csv.writer(f, delimiter=',') # writer.writerow(['Id', 'Category']) # for i, cat in enumerate(res): # writer.writerow([str(i + 1), str(cat)])
tree.train(train_data, train_label) res = tree.predict(validation_data[:1,:]) score = 0 for i in range(len(res)): if res[i] == validation_label[i]: score += 1 score /= len(res) print(score) # random forest rf = RandomForest(10,10,train_data.shape[0],train_data.shape[1]) rf.train(train_data,train_label) res = rf.predict(validation_data) score = 0 for i in range(len(res)): if res[i] == validation_label[i]: score += 1 score /= len(res) print(score) # with open('titanic_prediction.csv', 'wt') as f: # writer = csv.writer(f, delimiter=',') # writer.writerow(['Id', 'Category']) # for i, cat in enumerate(res): # writer.writerow([str(i + 1), str(cat)])
import pandas as pd import numpy as np from randomForest import RandomForest from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.model_selection import train_test_split train_x = pd.read_csv('./data/x_train.csv') train_y = pd.read_csv('./data/y_train.csv') train_data = pd.merge(train_x, train_y) forest = RandomForest(depth=5, min_sample_leaf=13, min_gini=0.001, n_tree=20) train_set, eval_set = train_test_split(train_data, test_size=0.2) forest.fit(train_set) result = forest.predict(eval_set) forest.save() print('ac ', accuracy_score(eval_set['label'], result)) print('precision ', precision_score(eval_set['label'], result)) print('recall ', recall_score(eval_set['label'], result)) print('f1_score ', f1_score(eval_set['label'], result))
indices = np.arange(150) np.random.shuffle(indices) train_dx, test_idx = indices[:100], indices[100:] Xtrain, Ytrain = X[train_dx], Y[train_dx] Xtest, Ytest = X[test_idx], Y[test_idx] # test of decision tree # model = dtc(6,10) # model.build_tree(Xtrain,Ytrain) # print('-------------------------------------------------------------------------------------------------------') # predicted_calsses = model.predict(Xtest) # score= 0 # for i in range(len(predicted_calsses)): # score += (predicted_calsses[i]==Ytest[i]) # print("the algorithm has an accuracy of ",score/len(predicted_calsses)) # test of random forest m = RandomForest(3, 2, 5) m.build_forest(Xtrain, Ytrain) predicted_calsses = m.predict(Xtest) score = 0 for i in range(len(predicted_calsses)): score += (predicted_calsses[i] == Ytest[i]) print("the algorithm has an accuracy of ", score / len(predicted_calsses))