def CreateFullNewTree(dtree): d = dict() d['node_count'] = 1 d['max_depth'] = 0 d['nodes'] = np.zeros(1, dtype=[('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8')]) d['values'] = np.zeros( (1, dtree.tree_.value.shape[1], dtree.tree_.value.shape[2])) (Tree, (n_f, n_c, n_o), b) = dtree.tree_.__reduce__() new_tree = Tree(n_f, n_c, n_o) new_tree.__setstate__(d) new_dtree = DecisionTreeClassifier() new_dtree.n_features_ = n_f new_dtree.n_classes_ = n_c[0] new_dtree.classes_ = np.linspace(0, n_c[0] - 1, n_c[0]).astype(int) new_dtree.n_outputs_ = n_o new_dtree.tree_ = new_tree return new_dtree
def build_decision_tree(t): dt = DecisionTreeClassifier(random_state=0) dt.n_features_ = t.n_features dt.n_outputs_ = t.n_outputs dt.n_classes_ = t.n_classes[0] dt.classes_ = np.array([x for x in range(dt.n_classes_)]) dt.tree_ = t return dt
def equivalent_random(dtree): dic_or = dtree.tree_.__getstate__().copy() leaves = np.where(dtree.tree_.feature == -2)[0] all_splits = np.zeros(dtree.tree_.node_count - leaves.size, dtype=object) compt = 0 for i in range(dtree.tree_.node_count): if i not in leaves: all_splits[compt] = (dtree.tree_.feature[i], dtree.tree_.threshold[i]) compt = compt + 1 print(all_splits) d = dict() d['node_count'] = 1 d['max_depth'] = 0 d['nodes'] = np.zeros(1, dtype=[('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8')]) d['values'] = np.zeros( (1, dic_or['values'].shape[1], dic_or['values'].shape[2])) #initial node k = np.random.randint(all_splits.size) phi_init, th_init = all_splits[k] d['nodes'][0]['feature'] = phi_init d['nodes'][0]['threshold'] = th_init d['nodes'][0]['left_child'] = -1 d['nodes'][0]['right_child'] = -1 d = rec_split(dic_or, 0, d, all_splits) print('nb noeuds :', d['node_count']) (Tree, (n_f, n_c, n_o), b) = dtree.tree_.__reduce__() new_tree = Tree(n_f, n_c, n_o) new_tree.__setstate__(d) print('nb noeuds :', new_tree.__getstate__()['nodes'].size) new_dtree = DecisionTreeClassifier() new_dtree.n_features_ = n_f new_dtree.n_classes_ = n_c[0] #print(n_c) new_dtree.classes_ = np.linspace(0, n_c[0] - 1, n_c[0]).astype(int) new_dtree.n_outputs_ = n_o new_dtree.tree_ = new_tree new_dtree.max_depth = new_tree.max_depth return new_dtree
def deserialize_decision_tree(model_dict): deserialized_model = DecisionTreeClassifier(**model_dict['params']) deserialized_model.classes_ = np.array(model_dict['classes_']) deserialized_model.max_features_ = model_dict['max_features_'] deserialized_model.n_classes_ = model_dict['n_classes_'] deserialized_model.n_features_ = model_dict['n_features_'] deserialized_model.n_outputs_ = model_dict['n_outputs_'] tree = deserialize_tree(model_dict['tree_'], model_dict['n_features_'], model_dict['n_classes_'], model_dict['n_outputs_']) deserialized_model.tree_ = tree return deserialized_model
def train_decision_tree_classifer(training_data, depth=50): dtree = DecisionTreeClassifier(max_depth=depth, min_samples_split=2, class_weight="balanced") dtree.classes_ = [0, 1] scores = cross_val_score(dtree, training_data[:, selected_features].astype('float'), training_data[:, -1].astype('float'), cv=5, scoring='roc_auc') print("Scores gotten using Decision Tree (max depth=" + str(depth) + ")") print(scores) print(np.mean(scores)) return dtree, np.mean(scores)
def fed_integrate_model_cart(model1, model2): coef_1 = model1.coef_ coef_2 = model2.coef_ intercept_1 = model1.intercept_ intercept_2 = model2.intercept_ classes = model1.classes_ model = DecisionTreeClassifier() model.coef_ = mulkeys_add_2Darray(model1.coef_, model2.coef_)/2 model.intercept_ = mulkeys_add_array( model1.intercept_, model1.intercept_)/2 model.classes_ = classes return model