Example #1
0
tree = DecisionTree(5, train_data.shape[0])
tree.train(train_data, train_label)
res = tree.predict(validation_data)
score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# random forest

forest = RandomForest(100,5,train_data.shape[0],6)
forest.train(train_data, train_label)
res = forest.predict(validation_data)

score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# write to csv
# with open('titanic_prediction.csv', 'wt') as f:
#     writer = csv.writer(f, delimiter=',')
#     writer.writerow(['Id', 'Category'])
#     for i, cat in enumerate(res):
#         writer.writerow([str(i + 1), str(cat)])
Example #2
0
tree.train(train_data, train_label)
res = tree.predict(validation_data[:1,:])


score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# random forest

rf = RandomForest(10,10,train_data.shape[0],train_data.shape[1])
rf.train(train_data,train_label)
res = rf.predict(validation_data)
score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)

# with open('titanic_prediction.csv', 'wt') as f:
#     writer = csv.writer(f, delimiter=',')
#     writer.writerow(['Id', 'Category'])
#     for i, cat in enumerate(res):
#         writer.writerow([str(i + 1), str(cat)])

import pandas as pd
import numpy as np
from randomForest import RandomForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

train_x = pd.read_csv('./data/x_train.csv')
train_y = pd.read_csv('./data/y_train.csv')

train_data = pd.merge(train_x, train_y)
forest = RandomForest(depth=5, min_sample_leaf=13, min_gini=0.001, n_tree=20)
train_set, eval_set = train_test_split(train_data, test_size=0.2)
forest.fit(train_set)
result = forest.predict(eval_set)
forest.save()

print('ac ', accuracy_score(eval_set['label'], result))
print('precision ', precision_score(eval_set['label'], result))
print('recall ', recall_score(eval_set['label'], result))
print('f1_score ', f1_score(eval_set['label'], result))
indices = np.arange(150)
np.random.shuffle(indices)
train_dx, test_idx = indices[:100], indices[100:]
Xtrain, Ytrain = X[train_dx], Y[train_dx]
Xtest, Ytest = X[test_idx], Y[test_idx]

# test of decision tree
# model = dtc(6,10)
# model.build_tree(Xtrain,Ytrain)
# print('-------------------------------------------------------------------------------------------------------')

# predicted_calsses = model.predict(Xtest)

# score= 0
# for i in range(len(predicted_calsses)):
#     score += (predicted_calsses[i]==Ytest[i])

# print("the algorithm has an accuracy of ",score/len(predicted_calsses))

# test of random forest
m = RandomForest(3, 2, 5)
m.build_forest(Xtrain, Ytrain)

predicted_calsses = m.predict(Xtest)

score = 0
for i in range(len(predicted_calsses)):
    score += (predicted_calsses[i] == Ytest[i])

print("the algorithm has an accuracy of ", score / len(predicted_calsses))