def test_all_file(self):
        options = {
            'df': pd.read_csv("benchmark.csv", sep=';'),
            'label_column': "Joga",
            'n_trees': 5,
            'bootstrap_size': 10
        }
        tr = RandomForest()
        model = tr.train(options)

        for _, row in options['df'].iterrows():
            target_label = row["Joga"]
            predicted = model.predict(row.drop("Joga"))
            self.assertEqual(target_label, predicted)
    def test_benchmark(self):
        options = {
            'df': pd.read_csv("benchmark.csv", sep=';'),
            'label_column': "Joga",
            'n_trees': 5,
            'bootstrap_size': 10
        }
        tr = RandomForest()
        model = tr.train(options)

        inf_data = pd.Series(
            ["Ensolarado", "Quente", "Normal", "Verdadeiro"],
            index=["Tempo", "Temperatura", "Umidade", "Ventoso"],
            name="InferenceData")
        self.assertEqual(model.predict(inf_data), 'Sim')
Beispiel #3
0
# decision tree
tree = DecisionTree(5, train_data.shape[0])
tree.train(train_data, train_label)
res = tree.predict(validation_data)
score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# random forest

forest = RandomForest(100,5,train_data.shape[0],6)
forest.train(train_data, train_label)
res = forest.predict(validation_data)

score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# write to csv
# with open('titanic_prediction.csv', 'wt') as f:
#     writer = csv.writer(f, delimiter=',')
#     writer.writerow(['Id', 'Category'])
#     for i, cat in enumerate(res):
Beispiel #4
0
tree.train(train_data, train_label)
res = tree.predict(validation_data[:1,:])


score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# random forest

rf = RandomForest(10,10,train_data.shape[0],train_data.shape[1])
rf.train(train_data,train_label)
res = rf.predict(validation_data)
score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)

# with open('titanic_prediction.csv', 'wt') as f:
#     writer = csv.writer(f, delimiter=',')
#     writer.writerow(['Id', 'Category'])
#     for i, cat in enumerate(res):
#         writer.writerow([str(i + 1), str(cat)])