def test_fit_with_inm( prune_count_method='inverse_nm_dot_s', seed=0, used_by_another_test=False, ): lnl = LearningWithNoisyLabels( seed=seed, prune_count_method=prune_count_method, ) inm = compute_inv_noise_matrix( data["py"], data["noise_matrix"], data["ps"], ) # Learn with noisy labels with inverse noise matrix given lnl.fit(data['X_train'], data['s'], inverse_noise_matrix=inm) score_inm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the inv noise matrix. lnl2 = LearningWithNoisyLabels( seed=seed, prune_count_method=prune_count_method, ) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) if used_by_another_test: return score, score_inm else: assert (score < score_inm + 1e-4)
def test_clf_fit_nm_inm(sparse): data = SPARSE_DATA if sparse else DATA lnl = LearningWithNoisyLabels(seed=SEED) nm = data['noise_matrix'] inm = compute_inv_noise_matrix( data["py"], nm, data["ps"], ) lnl.fit( X=data['X_train'], s=data['s'], noise_matrix=nm, inverse_noise_matrix=inm, ) score_nm_inm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the inv noise matrix. lnl2 = LearningWithNoisyLabels(seed=SEED) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) assert (score < score_nm_inm + 1e-4)
def test_fit_with_inm( sparse, seed=SEED, used_by_another_test=False, ): data = SPARSE_DATA if sparse else DATA lnl = LearningWithNoisyLabels(seed=seed, ) inm = compute_inv_noise_matrix( data["py"], data["noise_matrix"], data["ps"], ) # Learn with noisy labels with inverse noise matrix given lnl.fit(data['X_train'], data['s'], inverse_noise_matrix=inm) score_inm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the inv noise matrix. lnl2 = LearningWithNoisyLabels(seed=seed, ) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) if used_by_another_test: return score, score_inm else: assert (score < score_inm + 1e-4)
def test_fit_psx(): from cleanlab.latent_estimation import estimate_cv_predicted_probabilities lnl = LearningWithNoisyLabels() psx = estimate_cv_predicted_probabilities( X=data['X_train'], labels=data['y_train'], ) lnl.fit(X=data['X_train'], s=data['y_train'], psx=psx) score_with_psx = lnl.score(data['X_test'], data['y_test']) lnl = LearningWithNoisyLabels() lnl.fit( X=data['X_train'], s=data['y_train'], ) score_no_psx = lnl.score(data['X_test'], data['y_test']) assert (abs(score_with_psx - score_no_psx) < 1e-6)
def test_rp(): rp = LearningWithNoisyLabels(clf=LogisticRegression( multi_class='auto', solver='lbfgs', random_state=seed)) rp.fit(data["X_train"], data["s"]) score = rp.score(data["X_test"], data["y_test"]) print(score) # Check that this runs without error. assert (True)
def test_fit_with_nm( seed=0, used_by_another_test=False, ): lnl = LearningWithNoisyLabels(seed=seed, ) nm = data['noise_matrix'] # Learn with noisy labels with noise matrix given lnl.fit(data['X_train'], data['s'], noise_matrix=nm) score_nm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the noise matrix. lnl2 = LearningWithNoisyLabels(seed=seed, ) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) if used_by_another_test: return score, score_nm else: assert (score < score_nm + 1e-4)
def test_no_score(): class Struct(): def fit(self): pass def predict_proba(self): pass def predict(self, X): return data['y_test'] lnl = LearningWithNoisyLabels(clf=Struct()) score = lnl.score(data['X_test'], data['y_test']) assert (abs(score - 1) < 1e-6)
def test_score(): phrase = 'cleanlab is dope' class Struct(): def fit(self): pass def predict_proba(self): pass def predict(self): pass def score(self, X, y): return phrase lnl = LearningWithNoisyLabels(clf=Struct()) score = lnl.score(data['X_test'], data['y_test']) assert (score == phrase)
def train_without_noisy_labels(X_train, y_train, X_test, y_test, clf=None): if clf is None: model = baseclf(**params) clf = LearningWithNoisyLabels(clf=model, seed=seed, n_jobs=cpu_count()) clf.fit(X_train, y_train) return clf.score(X_test, y_test)