def test_depth_dist(): """ test for depth_dist function """ X_train = np.concatenate( (np.random.normal(loc=(1, 2), scale=.6, size=(100, 2)), np.random.normal(loc=(-1.2, -.5), scale=.6, size=(100, 2))), axis=0) y_train = np.concatenate((np.zeros(100, dtype=np.int), np.ones(100, dtype=np.int))) amount = np.int(200) # creating a random forest rf_class_known = sklearn.ensemble.RandomForestClassifier( n_estimators=1, min_samples_leaf=1) random_forest = rf_class_known.fit(X=np.array(X_train)[:amount, :], y=y_train[:amount].ravel()) data = np.array(X_train[:amount, :]) depth_dict, max_depth = smooth_rf.calc_depth_for_forest(random_forest, verbose=False) Vt_dict = smooth_rf.make_Vt_mat(random_forest, data, depth_dict=depth_dict, verbose=False) Ut_prime_dict = smooth_rf.make_Ut_prime_mat_no_sym(Vt_dict, Vt_dict, max_depth=max_depth, verbose=False) Ut_prime_dict = smooth_rf.remove_0_from_Ut_prime(Ut_prime_dict) if len(Ut_prime_dict) > 0: K_mat = smooth_rf.make_kernel(Ut_prime_dict) DD_mat = smooth_rf.depth_dist(K_mat) assert K_mat.shape == DD_mat.shape, \ "dimensions between K_mat and DD_mat should be the same" if type(DD_mat) is sparse.coo.core.COO: assert np.all(np.diag(DD_mat.todense()) == 0), \ "diagonal should be naturally 0 (has error)" else: assert np.all(np.diag(DD_mat) == 0), \ "diagonal should be naturally 0 (has error)" assert np.all(DD_mat >= 0), \ "all entries should be positive in DD (has error)" else: K_mat = smooth_rf.make_kernel(Ut_prime_dict) assert K_mat == 0, \ "when you provide an empty Ut_prime_dict you should get a 0"
def test_make_kernel(): """ test for make_kernel """ X_train = np.concatenate( (np.random.normal(loc=(1, 2), scale=.6, size=(100, 2)), np.random.normal(loc=(-1.2, -.5), scale=.6, size=(100, 2))), axis=0) y_train = np.concatenate((np.zeros(100, dtype=np.int), np.ones(100, dtype=np.int))) amount = np.int(200) # creating a random forest rf_class_known = sklearn.ensemble.RandomForestClassifier( n_estimators=1, min_samples_leaf=1) random_forest = rf_class_known.fit(X=np.array(X_train)[:amount, :], y=y_train[:amount].ravel()) data = np.array(X_train[:amount, :]) depth_dict, max_depth = smooth_rf.calc_depth_for_forest(random_forest, verbose=False) Vt_dict = smooth_rf.make_Vt_mat(random_forest, data, depth_dict=depth_dict, verbose=False) Ut_prime_dict = smooth_rf.make_Ut_prime_mat_no_sym(Vt_dict, Vt_dict, max_depth=max_depth, verbose=False) Ut_prime_dict = smooth_rf.remove_0_from_Ut_prime(Ut_prime_dict) if len(Ut_prime_dict) > 0: K_mat = smooth_rf.make_kernel(Ut_prime_dict) assert K_mat.shape[0:2] == (K_mat.shape[0],K_mat.shape[0]), \ "returned K matrix is not symmetric when the inputs were." assert len(K_mat.shape) == 2, \ "returned K matrix is not 2d as expected." else: K_mat = smooth_rf.make_kernel(Ut_prime_dict) assert K_mat == 0, \ "when you provide an empty Ut_prime_dict you should get a 0"
def test_make_Vt_mat(): """ tests for make_Vt_mat """ X_train = np.concatenate( (np.random.normal(loc=(1, 2), scale=.6, size=(100, 2)), np.random.normal(loc=(-1.2, -.5), scale=.6, size=(100, 2))), axis=0) y_train = np.concatenate((np.zeros(100, dtype=np.int), np.ones(100, dtype=np.int))) amount = np.int(200) s = 20 c = y_train[:amount] # creating a random forest rf_class_known = sklearn.ensemble.RandomForestClassifier( n_estimators=100, min_samples_leaf=1) fit_rf_known = rf_class_known.fit(X=np.array(X_train)[:amount, :], y=y_train[:amount].ravel()) random_forest = fit_rf_known data = np.array(X_train[:amount, :]) Vt_dict = smooth_rf.make_Vt_mat(random_forest, data, verbose=False) assert len(Vt_dict) == 100, \ "incorrect number of trees suggested in the full Vt output" for _ in range(10): r_idx = np.random.randint(100) random_Vt_dict = Vt_dict[r_idx] assert type(random_Vt_dict) == dict, \ "output of _make_Vt_mat_tree is not a dictionary" assert np.all([x.shape[0] == data.shape[0] \ for x in random_Vt_dict.values()]), \ "output of _make_Vt_mat_tree elements have incorrect number of rows" assert np.sum([x.shape[1] for x in random_Vt_dict.values()]) == \ len(random_forest.estimators_[r_idx].tree_.children_left), \ "output of split of Vt matrices have more columns than a" + \ " full Vt mat would"