コード例 #1
0
 def test_select_best_attribute_to_split(self):
     test_tree_node = TreeNode()
     test_dataset = Dataset(_dataset_name = 'watermelon_2.0', _dataset_file_path = './datasets/watermelon.csv')
     test_dataset.load_dataset(verbose=False)
     test_tree_node.set_samples(test_dataset.samples)
     test_tree_node.set_attribute_list(list(range(6)))
     best_attribute_to_split = test_tree_node.select_best_attribute_to_split()
     self.assertEqual(best_attribute_to_split,3)
コード例 #2
0
    def test_get_accuracy(self):
        test_dataset = Dataset(_dataset_name = 'watermelon_3.0', _dataset_file_path = './datasets/watermelon2.csv')
        test_dataset.load_dataset(verbose=False)
        
        test_random_forest = RandomForest(10)
        test_random_forest.set_dataset(test_dataset)
        test_random_forest.generate_random_forest()

        (test_random_forest.get_accuracy(test_dataset.samples))
コード例 #3
0
    def test_predict_batch(self):
        test_dataset = Dataset(_dataset_name = 'watermelon_3.0', _dataset_file_path = './datasets/watermelon2.csv')
        test_dataset.load_dataset(verbose=False)
        
        test_random_forest = RandomForest(10)
        test_random_forest.set_dataset(test_dataset)
        test_random_forest.generate_random_forest()

        predicted_result = test_random_forest.predict_batch(test_dataset.samples)
コード例 #4
0
 def test_split_by_attribute_internal(self):
     test_tree_node = TreeNode()
     test_dataset = Dataset(_dataset_name = 'watermelon_2.0', _dataset_file_path = './datasets/watermelon.csv')
     test_dataset.load_dataset(verbose=False)
     test_tree_node.set_samples(test_dataset.samples)
     test_tree_node.set_attribute_list(list(range(6)))
     attribute_values_samples_mapping_dict = test_tree_node.split_by_attribute_internal(3)
     self.assertEqual(attribute_values_samples_mapping_dict[0].shape[0],9)
     self.assertEqual(attribute_values_samples_mapping_dict[1].shape[0],5)
     self.assertEqual(attribute_values_samples_mapping_dict[2].shape[0],3)
コード例 #5
0
 def test_generate_decision_tree_continuous(self):
     test_dataset = Dataset(_dataset_name='watermelon_3.0',
                            _dataset_file_path='./datasets/watermelon2.csv')
     test_dataset.load_dataset(verbose=False)
     test_decision_tree = DecisionTree()
     test_decision_tree.set_training_samples_root(test_dataset.samples)
     test_decision_tree.set_attributes_list(
         list(range(test_dataset.num_features)))
     test_decision_tree.set_attributes_list(
         test_dataset.feature_category_list)
     decision_tree_root = test_decision_tree.generate_decision_tree(
         test_decision_tree.training_samples_root,
         test_decision_tree.attributes_list)
コード例 #6
0
 def test_generate_decision_tree(self):
     test_dataset = Dataset(_dataset_name='watermelon_2.0',
                            _dataset_file_path='./datasets/watermelon.csv')
     test_dataset.load_dataset(verbose=False)
     test_decision_tree = DecisionTree()
     test_decision_tree.set_training_samples_root(test_dataset.samples)
     test_decision_tree.set_attributes_list(
         test_dataset.feature_category_list)
     decision_tree_root = test_decision_tree.generate_decision_tree(
         test_decision_tree.training_samples_root,
         test_decision_tree.attributes_list)
     self.assertEqual(
         len(decision_tree_root.child_node_list[0].child_node_list[1].
             child_node_list), 3)
コード例 #7
0
 def test_vis_tree_1(self):
     test_dataset = Dataset(_dataset_name='watermelon_2.0',
                            _dataset_file_path='./datasets/watermelon.csv')
     test_dataset.load_dataset(verbose=False)
     test_decision_tree = DecisionTree()
     test_decision_tree.set_training_samples_root(test_dataset.samples)
     test_decision_tree.set_attributes_list(
         test_dataset.feature_category_list)
     decision_tree_root = test_decision_tree.generate_decision_tree(
         test_decision_tree.training_samples_root,
         test_decision_tree.attributes_list)
     test_decision_tree.set_root(decision_tree_root)
     test_vis_tree = VisTree(test_decision_tree,test_dataset.feature2number_mapping,\
         test_dataset.feature_name_list,_tree_name="test_decision_tree")
     test_vis_tree.vis_tree(mode=1)
コード例 #8
0
 def test_generate_random_decision_tree_2(self):
     test_dataset = Dataset(_dataset_name='watermelon_3.0',
                            _dataset_file_path='./datasets/watermelon2.csv')
     test_dataset.load_dataset(verbose=False)
     test_decision_tree = DecisionTree()
     test_decision_tree.set_training_samples_root(test_dataset.samples)
     test_decision_tree.set_attributes_list(
         test_dataset.feature_category_list)
     decision_tree_root_1 = test_decision_tree.generate_decision_tree(
         test_decision_tree.training_samples_root,
         test_decision_tree.attributes_list,
         random_state=1)
     decision_tree_root_2 = test_decision_tree.generate_decision_tree(
         test_decision_tree.training_samples_root,
         test_decision_tree.attributes_list,
         random_state=2)
コード例 #9
0
 def test_get_all_possible_values_on_attribute(self):
     test_dataset = Dataset(_dataset_name='watermelon_2.0',
                            _dataset_file_path='./datasets/watermelon.csv')
     test_dataset.load_dataset(verbose=False)
     test_decision_tree = DecisionTree()
     test_decision_tree.set_training_samples_root(test_dataset.samples)
     test_decision_tree.set_attributes_list(
         test_dataset.feature_category_list)
     self.assertEqual(
         test_decision_tree.get_all_possible_values_on_attribute((0, 0)),
         [0, 1, 2])
     self.assertEqual(
         test_decision_tree.get_all_possible_values_on_attribute((1, 0)),
         [0, 1, 2])
     self.assertEqual(
         test_decision_tree.get_all_possible_values_on_attribute((5, 0)),
         [0, 1])
コード例 #10
0
    def test_get_ent(self):
        test_tree_node = TreeNode()
        ent1 = test_tree_node.get_ent(np.ones((4,5)))
        self.assertEqual(ent1,0)

        ent2 = test_tree_node.get_ent(np.zeros((4,5)))
        self.assertEqual(ent2,0)

        input_np = np.ones((4,5))
        input_np[0][4] = 0
        input_np[1][4] = 0
        ent2 = test_tree_node.get_ent(input_np)
        self.assertEqual(ent2,1)

        test_dataset = Dataset(_dataset_name = 'watermelon_2.0', _dataset_file_path = './datasets/watermelon.csv')
        test_dataset.load_dataset(verbose=False)
        ent3 = test_tree_node.get_ent(test_dataset.samples)
        self.assertAlmostEqual(ent3,0.9975025463691152)
コード例 #11
0
    def test_random_forest_3(self):
        test_dataset = Dataset(_dataset_name = 'uci_blood', _dataset_file_path = './datasets/uci_blood.csv')
        test_dataset.load_dataset(verbose=False)
        
        test_random_forest = RandomForest(n_estimators = 20,n_samples=400)
        test_random_forest.set_dataset(test_dataset)
        import time 
        start = time.clock()
        test_random_forest.generate_random_forest()
        end = time.clock()
        print((end-start)/20.0)
        """ i = 0
        for tree in test_random_forest.forest:
            test_vis_tree = VisTree(tree,test_dataset.feature2number_mapping,\
                test_dataset.feature_name_list,_tree_name="test_3_random_decision_tree_%d" %(i))
            test_vis_tree.vis_tree(mode=1)
            i += 1
 """
        print(test_random_forest.calculate_out_of_bag_error())
コード例 #12
0
 def test_decision_tree_predict(self):
     test_dataset = Dataset(_dataset_name='watermelon_3.0',
                            _dataset_file_path='./datasets/watermelon2.csv')
     test_dataset.load_dataset(verbose=False)
     test_decision_tree = DecisionTree()
     test_decision_tree.set_training_samples_root(test_dataset.samples)
     test_decision_tree.set_attributes_list(
         test_dataset.feature_category_list)
     decision_tree_root = test_decision_tree.generate_decision_tree(
         test_decision_tree.training_samples_root,
         test_decision_tree.attributes_list)
     test_decision_tree.set_root(decision_tree_root)
     for i in range((test_dataset.num_samples)):
         test_sample = test_dataset.samples[i, :]
         test_X = test_sample[0:-1]
         test_y = test_sample[-1]
         test_predicted_label = test_decision_tree.predict(
             test_sample=test_X)
         self.assertEqual(test_predicted_label, test_y)
コード例 #13
0
    def test_random_forest_predict(self):
        test_dataset = Dataset(_dataset_name = 'watermelon_3.0', _dataset_file_path = './datasets/watermelon2.csv')
        test_dataset.load_dataset(verbose=False)
        
        test_random_forest = RandomForest(10)
        test_random_forest.set_dataset(test_dataset)
        test_random_forest.generate_random_forest()

        # i = 0
        # for tree in test_random_forest.forest:
        #     test_vis_tree = VisTree(tree,test_dataset.feature2number_mapping,\
        #         test_dataset.feature_name_list,_tree_name="test_2_random_decision_tree_%d" %(i))
        #     test_vis_tree.vis_tree(mode=1)
        #     i += 1

        for i in range((test_dataset.num_samples)):
            test_sample = test_dataset.samples[i,:]
            test_X = test_sample[0:-1]
            test_y = test_sample[-1]
            test_predicted_label = test_random_forest.predict(test_sample=test_X)
コード例 #14
0
# tree.plot_tree(clf.fit(X, y))

import graphviz
# dot_data = tree.export_graphviz(clf, out_file="./random_forests_sklearn/vis/iris")
# graph = graphviz.Source(dot_data)
# graphviz.render(engine="dot",format="pdf",filepath="./random_forests_sklearn/vis/iris")

from random_forests.utils import Dataset
# test_dataset = Dataset(_dataset_name = 'watermelon_2.0', _dataset_file_path = './datasets/watermelon.csv')
# test_dataset.load_dataset(verbose=False)
# X = test_dataset.samples[:,:-1].astype(int)
# y = test_dataset.labels.astype(int)
# clf = tree.DecisionTreeClassifier(criterion="entropy")
# clf = clf.fit(X,y)

test_dataset = Dataset(_dataset_name='uci_blood',
                       _dataset_file_path='./datasets/uci_blood.csv')
test_dataset.load_dataset(verbose=False)
X = test_dataset.samples[:, :-1].astype(int)
y = test_dataset.labels.astype(int)
clf = tree.DecisionTreeClassifier(criterion="entropy")
stupid_clf = DummyClassifier(strategy='uniform')
RF_clf = RandomForestClassifier(n_estimators=200)

clf = clf.fit(X, y)
RF_clf = RF_clf.fit(X, y)

aver_list = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
aver_list2 = cross_val_score(stupid_clf, X, y, cv=5, scoring='accuracy')
aver_list3 = cross_val_score(RF_clf, X, y, cv=5, scoring='accuracy')

print(aver_list)
コード例 #15
0
def main():
    test_dataset = Dataset(_dataset_name='watermelon_2.0',
                           _dataset_file_path='./datasets/watermelon.csv')
    test_dataset.load_dataset()
    myDecisionTree = DecisionTree()