__author__ = 'fabian'

import split_gini_cython
import numpy as np
import DecisionTree_optimized

feat = np.array([1,1,1,2,2,2,3,3,3,3], dtype=np.float32)
labels = np.array([0,0,0,1,1,1,1,0,0,1], dtype=np.int32)
classes = np.array([0,1], dtype=np.int32)
class_distrib = np.array([5,5], dtype=np.int32)

print split_gini_cython.split_gini(feat, labels, classes, class_distrib)

print DecisionTree_optimized.split_gini_new(feat, labels, class_distrib)


from sklearn import datasets, cross_validation

iris = datasets.load_iris()

x_tr, x_te, y_tr, y_te = cross_validation.train_test_split(iris.data, iris.target)
dt = DecisionTree_optimized.DecisionTree()
dt.train(x_tr, y_tr)
pred_y = dt.predict(x_te)
accur = np.sum(pred_y == y_te) / float(len(y_te)) * 100.
print(accur)
__author__ = 'fabian'

import split_gini_cython
import numpy as np
import DecisionTree_optimized

feat = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 3], dtype=np.float32)
labels = np.array([0, 0, 0, 1, 1, 1, 1, 0, 0, 1], dtype=np.int32)
classes = np.array([0, 1], dtype=np.int32)
class_distrib = np.array([5, 5], dtype=np.int32)

print split_gini_cython.split_gini(feat, labels, classes, class_distrib)

print DecisionTree_optimized.split_gini_new(feat, labels, class_distrib)

from sklearn import datasets, cross_validation

iris = datasets.load_iris()

x_tr, x_te, y_tr, y_te = cross_validation.train_test_split(
    iris.data, iris.target)
dt = DecisionTree_optimized.DecisionTree()
dt.train(x_tr, y_tr)
pred_y = dt.predict(x_te)
accur = np.sum(pred_y == y_te) / float(len(y_te)) * 100.
print(accur)
    def split_node(self, max_depth=10, min_instances=10):
        # print "splitting node..."
        # print "depth ", self._depth
        # print "n_instances ", self.data.shape[0]

        features_for_split = self._get_features_for_split()

        # gini coefficient must be minimized by the split
        best_split_score = float("inf")
        best_split_feature = None
        best_split_threshold = None

        for j in features_for_split:
            curr_best_split_score, curr_best_split_threshold = \
                split_gini_cython.split_gini(self.data[:, j], self.labels, self._classes, self.class_distrib)

            if best_split_score > curr_best_split_score:
                best_split_score = curr_best_split_score
                best_split_threshold = curr_best_split_threshold
                best_split_feature = j

        if best_split_feature is None:
            self._make_leaf()
            return []

        best_split_indices_left = np.where(self.data[:, best_split_feature] <= best_split_threshold)[0]
        best_split_indices_right = np.where(self.data[:, best_split_feature] > best_split_threshold)[0]
        if (len(best_split_indices_right) == 0) or (len(best_split_indices_left) == 0):
            self._make_leaf()
            return []


        self._threshold = best_split_threshold
        self._splitFeatureID = best_split_feature

        self._child_left = DecisionTreeNode(self.data[best_split_indices_left, :],
                                            self.labels[best_split_indices_left],
                                            self._classes,
                                            use_features=self._use_features,
                                            depth=self._depth + 1)

        self._child_right = DecisionTreeNode(self.data[best_split_indices_right, :],
                                             self.labels[best_split_indices_right],
                                             self._classes,
                                             use_features=self._use_features,
                                             depth=self._depth + 1)
        '''print "\nbest split feature: ", best_split_feature
        print "best gini coeff: ", best_split_score
        print "threshold: ", best_split_threshold
        print "n_left: ", len(best_split_indices_left)
        print "n_right: ", len(best_split_indices_right)'''

        returned_children = []
        for child in [self._child_left, self._child_right]:
            if not child.check_constraints_violated(max_depth, min_instances):
                returned_children.append(child)

        # print "number if children returned: ", len(returned_children)
        # print "am I now a leaf? ", self.isLeaf
        # print "\n"
        return returned_children
    def split_node(self, max_depth=10, min_instances=10):
        # print "splitting node..."
        # print "depth ", self._depth
        # print "n_instances ", self.data.shape[0]

        features_for_split = self._get_features_for_split()

        # gini coefficient must be minimized by the split
        best_split_score = float("inf")
        best_split_feature = None
        best_split_threshold = None

        for j in features_for_split:
            curr_best_split_score, curr_best_split_threshold = \
                split_gini_cython.split_gini(self.data[:, j], self.labels, self._classes, self.class_distrib)

            if best_split_score > curr_best_split_score:
                best_split_score = curr_best_split_score
                best_split_threshold = curr_best_split_threshold
                best_split_feature = j

        if best_split_feature is None:
            self._make_leaf()
            return []

        best_split_indices_left = np.where(
            self.data[:, best_split_feature] <= best_split_threshold)[0]
        best_split_indices_right = np.where(
            self.data[:, best_split_feature] > best_split_threshold)[0]
        if (len(best_split_indices_right)
                == 0) or (len(best_split_indices_left) == 0):
            self._make_leaf()
            return []

        self._threshold = best_split_threshold
        self._splitFeatureID = best_split_feature

        self._child_left = DecisionTreeNode(
            self.data[best_split_indices_left, :],
            self.labels[best_split_indices_left],
            self._classes,
            use_features=self._use_features,
            depth=self._depth + 1)

        self._child_right = DecisionTreeNode(
            self.data[best_split_indices_right, :],
            self.labels[best_split_indices_right],
            self._classes,
            use_features=self._use_features,
            depth=self._depth + 1)
        '''print "\nbest split feature: ", best_split_feature
        print "best gini coeff: ", best_split_score
        print "threshold: ", best_split_threshold
        print "n_left: ", len(best_split_indices_left)
        print "n_right: ", len(best_split_indices_right)'''

        returned_children = []
        for child in [self._child_left, self._child_right]:
            if not child.check_constraints_violated(max_depth, min_instances):
                returned_children.append(child)

        # print "number if children returned: ", len(returned_children)
        # print "am I now a leaf? ", self.isLeaf
        # print "\n"
        return returned_children