def test_fit_defaultclassifier(data): clf = CTClassifier(random_state=10) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [2., 3., 2., 2., 2.] truth_proba = [[0.88555037, 0.11444963], [0.05650123, 0.94349877], [0.50057741, 0.49942259], [0.89236186, 0.10763814], [0.95357416, 0.04642584]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_fit_defaultclassifier(data): clf = CTClassifier(random_state=10) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [2., 3., 2., 2., 2.] truth_proba = [[0.90643161, 0.09356839], [0.35361377, 0.64638623], [0.50968072, 0.49031928], [0.58446856, 0.41553144], [0.87548943, 0.12451057]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_predict_num_iter(data): random_seed = 10 gnb1 = GaussianNB() gnb2 = GaussianNB() clf = CTClassifier(gnb1, gnb2, num_iter=9, random_state=random_seed) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [2., 3., 2., 2., 2.] truth_proba = [[0.88555037, 0.11444963], [0.05650123, 0.94349877], [0.50057741, 0.49942259], [0.89236186, 0.10763814], [0.95357416, 0.04642584]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_predict_set_n(data): random_seed = 10 gnb1 = GaussianNB() gnb2 = GaussianNB() clf = CTClassifier(gnb1, gnb2, n=9, random_state=random_seed) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [3., 3., 2., 3., 3.] truth_proba = [[0.29020704, 0.70979296], [0.44024614, 0.55975386], [0.5710383, 0.4289617], [0.37366059, 0.62633941], [0.22157484, 0.77842516]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_predict_set_p(data): random_seed = 10 gnb1 = GaussianNB() gnb2 = GaussianNB() clf = CTClassifier(gnb1, gnb2, p=12, random_state=random_seed) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [3., 3., 3., 3., 3.] truth_proba = [[0.31422418, 0.68577582], [0.40938282, 0.59061718], [0.48448605, 0.51551395], [0.38853225, 0.61146775], [0.22972488, 0.77027512]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_predict_num_iter(data): random_seed = 10 gnb1 = GaussianNB() gnb2 = GaussianNB() clf = CTClassifier(gnb1, gnb2, num_iter=9, random_state=random_seed) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [2., 3., 2., 2., 2.] truth_proba = [[0.90643161, 0.09356839], [0.35361377, 0.64638623], [0.50968072, 0.49031928], [0.58446856, 0.41553144], [0.87548943, 0.12451057]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_predict_unlabeled_pool_size(data): random_seed = 10 gnb1 = GaussianNB() gnb2 = GaussianNB() clf = CTClassifier(gnb1, gnb2, unlabeled_pool_size=20, random_state=random_seed) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [2., 3., 2., 2., 2.] truth_proba = [[0.55708013, 0.44291987], [0.29591617, 0.70408383], [0.50441055, 0.49558945], [0.99276393, 0.00723607], [0.95221514, 0.04778486]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
def test_predict_unlabeled_pool_size(data): random_seed = 10 gnb1 = GaussianNB() gnb2 = GaussianNB() clf = CTClassifier(gnb1, gnb2, unlabeled_pool_size=20, random_state=random_seed) clf.fit(data['random_data'], data['random_labels']) y_pred_test = clf.predict(data['random_test']) y_pred_prob = clf.predict_proba(data['random_test']) truth = [2., 3., 3., 2., 2.] truth_proba = [[0.75269031, 0.24730969], [0.37316567, 0.62683433], [0.45283102, 0.54716898], [0.59666021, 0.40333979], [0.81196019, 0.18803981]] for i in range(data['N_test']): assert y_pred_test[i] == truth[i] for i in range(data['N_test']): for j in range(2): assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001