def test_clf_fit_nm_inm(sparse): data = SPARSE_DATA if sparse else DATA lnl = LearningWithNoisyLabels(seed=SEED) nm = data['noise_matrix'] inm = compute_inv_noise_matrix( data["py"], nm, data["ps"], ) lnl.fit( X=data['X_train'], s=data['s'], noise_matrix=nm, inverse_noise_matrix=inm, ) score_nm_inm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the inv noise matrix. lnl2 = LearningWithNoisyLabels(seed=SEED) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) assert (score < score_nm_inm + 1e-4)
def __model_build_noisy(X_train, y_train, X_test, alg, seed): model = GaussianNB() if alg == 'Logistic': model = LogisticRegression(multi_class='auto') clf = LearningWithNoisyLabels(clf=model, seed=seed, n_jobs=cpu_count()) clf.fit(X_train, y_train) return clf.predict(X_test)
def test_fit_with_inm( prune_count_method='inverse_nm_dot_s', seed=0, used_by_another_test=False, ): lnl = LearningWithNoisyLabels( seed=seed, prune_count_method=prune_count_method, ) inm = compute_inv_noise_matrix( data["py"], data["noise_matrix"], data["ps"], ) # Learn with noisy labels with inverse noise matrix given lnl.fit(data['X_train'], data['s'], inverse_noise_matrix=inm) score_inm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the inv noise matrix. lnl2 = LearningWithNoisyLabels( seed=seed, prune_count_method=prune_count_method, ) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) if used_by_another_test: return score, score_inm else: assert (score < score_inm + 1e-4)
def test_fit_with_inm( sparse, seed=SEED, used_by_another_test=False, ): data = SPARSE_DATA if sparse else DATA lnl = LearningWithNoisyLabels(seed=seed, ) inm = compute_inv_noise_matrix( data["py"], data["noise_matrix"], data["ps"], ) # Learn with noisy labels with inverse noise matrix given lnl.fit(data['X_train'], data['s'], inverse_noise_matrix=inm) score_inm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the inv noise matrix. lnl2 = LearningWithNoisyLabels(seed=seed, ) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) if used_by_another_test: return score, score_inm else: assert (score < score_inm + 1e-4)
def test_rp(): rp = LearningWithNoisyLabels(clf=LogisticRegression( multi_class='auto', solver='lbfgs', random_state=seed)) rp.fit(data["X_train"], data["s"]) score = rp.score(data["X_test"], data["y_test"]) print(score) # Check that this runs without error. assert (True)
def test_raise_error_no_clf_predict(): class struct(object): def fit(self): pass def predict_proba(self): pass try: LearningWithNoisyLabels(clf=struct()) except Exception as e: assert ('predict' in str(e)) with pytest.raises(ValueError) as e: LearningWithNoisyLabels(clf=struct())
def test_no_score(): class Struct(): def fit(self): pass def predict_proba(self): pass def predict(self, X): return data['y_test'] lnl = LearningWithNoisyLabels(clf=Struct()) score = lnl.score(data['X_test'], data['y_test']) assert (abs(score - 1) < 1e-6)
def train_noisy_to_pseudo(X_train, y_train, X_test, y_test, clf=None): model = baseclf(**params) if clf is None: clf = LearningWithNoisyLabels(clf=model, seed=seed, n_jobs=cpu_count()) clf.fit(X_train, y_train) # trainのcorruptedにだけpseudo X_with_noise = X_train[clf.noise_mask] y_train_pseudo = y_train_corrupted.copy() y_train_pseudo[clf.noise_mask] = clf.predict(X_with_noise) # きれいにしたtrain dataでtrain model.fit(X_train, y_train_pseudo) return model.score(X_test, y_test)
def test_fit_psx(): from cleanlab.latent_estimation import estimate_cv_predicted_probabilities lnl = LearningWithNoisyLabels() psx = estimate_cv_predicted_probabilities( X=data['X_train'], labels=data['y_train'], ) lnl.fit(X=data['X_train'], s=data['y_train'], psx=psx) score_with_psx = lnl.score(data['X_test'], data['y_test']) lnl = LearningWithNoisyLabels() lnl.fit( X=data['X_train'], s=data['y_train'], ) score_no_psx = lnl.score(data['X_test'], data['y_test']) assert (abs(score_with_psx - score_no_psx) < 1e-6)
def test_score(): phrase = 'cleanlab is dope' class Struct(): def fit(self): pass def predict_proba(self): pass def predict(self): pass def score(self, X, y): return phrase lnl = LearningWithNoisyLabels(clf=Struct()) score = lnl.score(data['X_test'], data['y_test']) assert (score == phrase)
def test_fit_with_nm( seed=0, used_by_another_test=False, ): lnl = LearningWithNoisyLabels(seed=seed, ) nm = data['noise_matrix'] # Learn with noisy labels with noise matrix given lnl.fit(data['X_train'], data['s'], noise_matrix=nm) score_nm = lnl.score(data['X_test'], data['y_test']) # Learn with noisy labels and estimate the noise matrix. lnl2 = LearningWithNoisyLabels(seed=seed, ) lnl2.fit( data['X_train'], data['s'], ) score = lnl2.score(data['X_test'], data['y_test']) if used_by_another_test: return score, score_nm else: assert (score < score_nm + 1e-4)
def test_no_fit_sample_weight(): class Struct(): def fit(self, X, y): pass def predict_proba(self): pass def predict(self, X): return data['y_test'] n = np.shape(data['y_test'])[0] m = len(np.unique(data['y_test'])) psx = np.zeros(shape=(n, m)) lnl = LearningWithNoisyLabels(clf=Struct()) lnl.fit(data['X_train'], data['y_train'], psx=psx, noise_matrix=data['noise_matrix']) # If we make it here, without any error: assert (True)
def test_clf_fit_nm(): lnl = LearningWithNoisyLabels() # Example of a bad noise matrix (impossible to learn from) nm = np.array([[0, 1], [1, 0]]) try: lnl.fit(X=np.arange(3), s=np.array([0, 0, 1]), noise_matrix=nm) except Exception as e: assert ('Trace(noise_matrix)' in str(e)) with pytest.raises(ValueError) as e: lnl.fit(X=np.arange(3), s=np.array([0, 0, 1]), noise_matrix=nm)
def test_pred_and_pred_proba(): lnl = LearningWithNoisyLabels() lnl.fit(data['X_train'], data['s']) n = np.shape(data['y_test'])[0] m = len(np.unique(data['y_test'])) pred = lnl.predict(data['X_test']) probs = lnl.predict_proba(data['X_test']) # Just check that this functions return what we expect assert (np.shape(pred)[0] == n) assert (np.shape(probs) == (n, m))
def __model_build_noisy_pseudo(X_train, y_train, X_test, alg, seed): model = GaussianNB() if alg == 'Logistic': model = LogisticRegression(multi_class='auto') clf = LearningWithNoisyLabels(clf=model, seed=seed, n_jobs=cpu_count()) clf.fit(X_train, y_train) # Pseudo-labelling X_with_noise = X_train[clf.noise_mask] y_train_pseudo = y_train.copy() y_train_pseudo[clf.noise_mask] = clf.predict(X_with_noise) y_test_pseudo = clf.predict(X_test) y_pseudo = np.hstack([y_train_pseudo, y_test_pseudo]) X_for_pseudo = np.vstack([X_train, X_test]) model.fit(X_for_pseudo, y_pseudo) return model.predict(X_test)
def train_test_and_noisy_to_pseudo(X_train, y_train, X_test, y_test, clf=None): model = baseclf(**params) if clf is None: clf = LearningWithNoisyLabels(clf=model, seed=seed, n_jobs=cpu_count()) clf.fit(X_train, y_train) # trainのcorruptedとtestの両方をpseudoにする X_with_noise = X_train[clf.noise_mask] y_train_pseudo = y_train_corrupted.copy() y_train_pseudo[clf.noise_mask] = clf.predict(X_with_noise) y_test_psuedo = clf.predict(X_test) y_pseudo = np.hstack([y_train_pseudo, y_test_psuedo]) X_for_pseudo = sp.vstack([X_train, X_test]) # pseudo込の全データでtrain model.fit(X_for_pseudo, y_pseudo) return model.score(X_test, y_test)
np.random.seed(seed=0) clf_copy = copy.deepcopy(clf) # Compute p(y=k), the ground truth class prior on the labels. py = np.bincount(y_train) / float(len(y_train)) # Generate the noisy channel to characterize the label errors. noise_matrix = generate_noise_matrix_from_trace( K=num_classes, trace=num_classes * avg_trace, py=py, frac_zero_noise_rates=frac_zero_noise_rates, ) print_noise_matrix(noise_matrix) # Create the noisy labels. This method is exact w.r.t. the noise_matrix. y_train_with_errors = generate_noisy_labels(y_train, noise_matrix) lnl_cv = GridSearch( model=LearningWithNoisyLabels(clf), param_grid=param_grid, num_threads=4, seed=0, ) lnl_cv.fit( X_train=X_train, y_train=y_train_with_errors, X_val=X_val, y_val=y_val, verbose=False, ) # Also compute the test score with default parameters clf_copy.fit(X_train, y_train_with_errors) score_opt = lnl_cv.model.score(X_test, y_test) score_default = clf_copy.score(X_test, y_test)
def clean_labels(X: pd.DataFrame, y, count_start, pulearning=None, strategy="cut", round=0, early_stop=False): count = count_start from preprocess import sample cols = [ c for c in X if len(c.split("_")) == 2 and ( c.startswith("c_") or c.startswith("n_")) ] print(cols) while count <= count_start + round: try: params = { "objective": "binary", "metric": "auc", "verbosity": -1, "seed": count, "num_threads": 4, "num_boost_round": 50 } X_sample, y_sample = sample(X[cols], y, 30000, random_state=count) hyperparams = _hyperopt(X_sample, y_sample, params, random_state=count) # confident_joint, psx = estimate_confident_joint_and_cv_pred_proba( # X=X.values, # s=1 * (y.values == 1), # clf=lgb.LGBMClassifier(**hyperparams, **params), # default, you can use any classifier # seed=count, # ) # est_py, est_nm, est_inv = estimate_latent(confident_joint, s=1 * (y.values == 1)) model = LearningWithNoisyLabels( lgb.LGBMClassifier(**hyperparams, **params), seed=count, cv_n_folds=5, prune_method="both", # 'prune_by_noise_rate', converge_latent_estimates=True, pulearning=pulearning) print(X.shape, len(y)) # import pdb;pdb.set_trace() noisy, noise_matrix, inverse_noise_matrix, confident_joint, psx = model.fit( X[cols].values, 1 * (y.values == 1), thresholds=None) # noise_matrix=est_nm, # inverse_noise_matrix=est_inv, ) if count == count_start: rou_0 = noise_matrix[1, 0] rou_1 = noise_matrix[0, 1] print(rou_0, rou_1) if early_stop and rou_0 + rou_1 <= 0.9: break if len(noisy) <= 0: break print(len([x for x in noisy if x == True])) if strategy == "cut": X = X[~noisy] y = y[~noisy] else: X = X[~noisy] y = y[~noisy] except Exception as exp: print("error:", exp) finally: count += 1 return X, y, rou_0 + rou_1, rou_0, rou_1
def fit(self, X_full, y_full, time_remain): start_fit = time.time() # SEED = 2019 # for SEED in range(2019, self.iter + 2019): SEED = 2019 budget = time_remain - (time.time() - start_fit) best_iter = [] while True: try: print(SEED, budget) round_start = time.time() self.hyper_seed = SEED params = { "objective": "binary", "metric": "auc", "verbosity": -1, "seed": self.hyper_seed, "num_threads": 4, "num_boost_round": 500 } X, y = downsampling(X_full, y_full, sum(y_full) * self.sample_rto, seed=self.hyper_seed) # X_sample, y_sample = sample(X, y, 30000, random_state=self.hyper_seed) hyperparams = self._hyperopt(X, y, params, random_state=self.hyper_seed) X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=0.1, random_state=self.hyper_seed) watchlist = [(X_train, y_train), (X_val, y_val)] _model = lgb.LGBMClassifier(**hyperparams, **params) _model.fit(X_train, y_train, early_stopping_rounds=30, eval_set=watchlist, verbose=100) params["num_boost_round"] = _model.best_iteration_ best_iter.append(_model.best_iteration_) confident_joint, psx = estimate_confident_joint_and_cv_pred_proba( X=X.values, s=1 * (y.values == 1), clf=lgb.LGBMClassifier(**hyperparams, **params), seed=SEED, ) est_py, est_nm, est_inv = estimate_latent(confident_joint, s=1 * (y.values == 1)) self.model = LearningWithNoisyLabels( lgb.LGBMClassifier(**hyperparams, **params), seed=1, cv_n_folds=5, prune_method="both", # 'prune_by_noise_rate', converge_latent_estimates=True, pulearning=1) self.model.fit( X.values, 1 * (y.values == 1), psx=psx, thresholds=None, noise_matrix=est_nm, inverse_noise_matrix=est_inv, ) self.models.append(self.model) if SEED == 2019: single_round = time.time() - round_start budget -= (time.time() - round_start) if budget <= single_round * 5: break SEED += 1 except: if SEED == 2019: single_round = time.time() - round_start budget -= (time.time() - round_start) if budget <= single_round * 5: break SEED += 1 print(best_iter) return self
from cleanlab.classification import LearningWithNoisyLabels from cleanlab.noise_generation import generate_noisy_labels from cleanlab.util import value_counts from cleanlab.latent_algebra import compute_inv_noise_matrix # ## **rankpruning** is the first practical *(works for any classifier, runs fast, robust to poor probability estimation)* algorithm for multiclass learning with noisy labels. Its comprised of components from the theory and algorithsm of **confident learning**. It's a Python class that wraps around any classifier as long as .fit(X, y, sample_weight), .predict(X), .predict_proba(X) are defined. Inspect the **cleanlab** package for documentation. # # ## Here we show the performance of multiclass rankpruning wrapped around a sklearn LogisiticRegression classifier versus LogisticRegression without any help from confident learning on the Iris dataset. # In[16]: # Seed for reproducibility seed = 2 rp = LearningWithNoisyLabels(clf = logreg(), seed = seed) np.random.seed(seed = seed) # Get iris dataset iris = datasets.load_iris() X = iris.data # we only take the first two features. y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) try: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt _ = plt.figure(figsize=(12,8)) color_list = plt.cm.tab10(np.linspace(0, 1, 6)) _ = plt.scatter(X_train[:,1], X_train[:,3], color = [color_list[z] for z in y_train], s = 50)
# clf = LearningWithNoisyLabels(clf=GaussianProcessClassifier(kernel= kernel, max_iter_predict=1000, multi_class='one_vs_rest')) # clf = GaussianProcessClassifier(kernel= kernel, multi_class='one_vs_rest') # clf = LearningWithNoisyLabels(GaussianProcessClassifier(kernel= kernel, multi_class='one_vs_rest')) # clf = LearningWithNoisyLabels(clf = RandomForestClassifier()) # clf = LogisticRegression(penalty="l1", solver="liblinear") # clf = GradientBoostingClassifier() # clf = SVC(probability=True) # clf = GaussianProcessClassifier(kernel=kernel, multi_class='one_vs_rest') ratioList = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6] scoreList = [] noiseScoreist = [] for ratio in ratioList: clf = RandomForestClassifier() clfNoise = LearningWithNoisyLabels(clf=RandomForestClassifier()) newTrainX = trainX newTrainY = copy.deepcopy(trainY) for i in range(len(newTrainX)): if (random.random() < ratio): while True: noiseLabel = random.randint(1, 4) - 1 # print('trainY[i] : ', newTrainY[i], 'noiseLabel :',noiseLabel) if newTrainY[i] != noiseLabel: newTrainY[i] = noiseLabel break clf.fit(newTrainX, newTrainY) clfNoise.fit(newTrainX, newTrainY) # importances = clf.feature_importances_ # indices = np.argsort(importances)[::-1]
'% confident learning errors that are actual errors: {:.0%}'.format(score)) # original lr f1 print('WITHOUT confident learning,', end=" ") clf.fit(X_train, s) pred = clf.predict(X_test) print("dataset test f1:", round(f1_score(pred, y_test, average='micro'), 4)) print("\nNow we show improvement using cleanlab to characterize the noise") print( "and learn on the data that is (with high confidence) labeled correctly.") print() print('WITH confident learning (psx not given),', end=" ") rp = LearningWithNoisyLabels(clf=clf) rp.fit(X_train, s) pred = rp.predict(X_test) print("dataset test f1:", round(f1_score(pred, y_test, average='micro'), 4)) print('WITH confident learning (psx given),', end=" ") rp.fit(X=X_train, s=s, psx=psx) pred = rp.predict(X_test) print("dataset test f1:", round(f1_score(pred, y_test, average='micro'), 4)) print('WITH all label right,', end=" ") clf.fit(X_train, y_train) pred = clf.predict(X_test) print("dataset test f1:", round(f1_score(pred, y_test, average='micro'), 4)) print("-------------------")
def denoiseA(data_cor, rho, mode): ''' Denoise the corrupted sensitive attribute using RankPrune. ''' rho_a_plus, rho_a_minus = rho dataX = data_cor[0] cor_dataA = data_cor[2] # dataA = data_cor[5] # # auc3, auc4 = None, None noise_matrix = np.array([[1 - rho_a_minus, rho_a_plus], [rho_a_minus, 1 - rho_a_plus]]) # noise_matrix = None lnl = LearningWithNoisyLabels(clf=LogisticRegression( random_state=0, solver='lbfgs', multi_class='auto')) lnl.fit(X=dataX.values, s=cor_dataA.values, noise_matrix=noise_matrix) # Logistic Regression Baseline # lnl = clf=LogisticRegression(random_state=0, solver = 'lbfgs', multi_class = 'auto') # lnl.fit(X = dataX.values, y = cor_dataA.values) denoised_dataA = pd.Series(lnl.predict(dataX.values)) data_denoised = copy.deepcopy(data_cor) data_denoised[2] = denoised_dataA # print(lnl.noise_matrix, rho_a_plus, rho_a_minus) # Check recovery accuracy # auc1 = np.mean(dataA.values==cor_dataA.values) # auc2 = np.mean(dataA.values==denoised_dataA.values) # The following is under development. rho_est = None data_denoised_est = None if mode == 'six': lnl2 = LearningWithNoisyLabels( LogisticRegression(random_state=0, solver='lbfgs', multi_class='auto')) lnl2.fit(X=dataX.values, s=cor_dataA.values) denoised_dataA_est = pd.Series(lnl2.predict(dataX.values)) data_denoised_est = copy.deepcopy(data_cor) data_denoised_est[2] = denoised_dataA_est rho_a_plus_est = lnl2.noise_matrix[0][1] rho_a_minus_est = lnl2.noise_matrix[1][0] rho_est = [rho_a_plus_est, rho_a_minus_est] # print(lnl2.noise_matrix, rho_a_plus_est, rho_a_minus_est) # lnl3 = LogisticRegression(random_state=0, solver = 'lbfgs', multi_class = 'auto') # lnl3.fit(dataX.values, cor_dataA.values) # pred_dataA = pd.Series(lnl3.predict(dataX.values)) # auc3 = np.mean(dataA.values==denoised_dataA_est.values) # auc4 = np.mean(dataA.values==pred_dataA.values) # print('auc:', auc1, auc2, auc3, auc4) return data_denoised, data_denoised_est, rho_est
for name, clf in zip(names, classifiers): # Create four copies of the classifier. # perf_label_clf - Will be trained on the hidden, noise-free labels # noisy_clf - Will be trained on the noisy labels # noisy_clf_w_rp - Will be trained on the noisy labels using LearningWithNoisyLabels clfs = [copy.deepcopy(clf) for i in range(len(experiments))] perf_label_clf, noisy_clf, noisy_clf_w_rp = clfs # Classifier (trained without label errors) perf_label_clf.fit(X_train, y_train) perf_label_score = perf_label_clf.score(X_test, y_test) # Classifier (trained with label errors) noisy_clf.fit(X_train, y_train_w_errors) noisy_score = noisy_clf.score(X_test, y_test) # Classifier + RP (trained with label errors) rp = LearningWithNoisyLabels(noisy_clf_w_rp) rp.fit(X_train, y_train_w_errors) noisy_score_w_rp = rp.clf.score(X_test, y_test) # Store results for each classifier in a dict with key = clf_name. clf_results[name] = { 'clfs': clfs, "perf_label_score": perf_label_score, "noisy_score": noisy_score, "noisy_score_w_rp": noisy_score_w_rp, } results.append({ "X": X, "X_train": X_train, "y_train": y_train,
print('The actual, latent, underlying noise matrix.') print_noise_matrix(noise_matrix) print('Our estimate of the noise matrix.') print_noise_matrix(est_noise_matrix) print() print('The actual, latent, underlying joint distribution matrix.') cleanlab.util.print_joint_matrix(true_joint_distribution_of_label_errors) print('Our estimate of the joint distribution matrix.') cleanlab.util.print_joint_matrix(est_joint) print("Accuracy Comparison") print("-------------------") clf = LogisticRegression(solver='lbfgs', multi_class='auto') baseline_score = accuracy_score(y_test, clf.fit(X_train, s).predict(X_test)) print("Logistic regression:", baseline_score) rp = LearningWithNoisyLabels(seed=seed) rp_score = accuracy_score(y_test, rp.fit(X_train, s, psx=psx).predict(X_test)) print("Logistic regression (+rankpruning):", rp_score) diff = rp_score - baseline_score clf = LogisticRegression(solver='lbfgs', multi_class='auto') print( 'Fit on denoised data without re-weighting:', accuracy_score( y_test, clf.fit(X_train[~idx_errors], s[~idx_errors]).predict(X_test))) try: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt print("\n\n\n\n\n\n")
def test_default_clf(): lnl = LearningWithNoisyLabels() return lnl.clf is not None and hasattr(lnl.clf, 'fit') and hasattr( lnl.clf, 'predict') and hasattr(lnl.clf, 'predict_proba')
def test_seed(): lnl = LearningWithNoisyLabels(seed=0) assert (lnl.seed is not None)
def test_default_clf(): lnl = LearningWithNoisyLabels() check1 = lnl.clf is not None and hasattr(lnl.clf, 'fit') check2 = hasattr(lnl.clf, 'predict') and hasattr(lnl.clf, 'predict_proba') assert (check1 and check2)
# Work around indexing bug X_train = X_train.reset_index(drop=True) A_train = A_train.reset_index(drop=True) X_test = X_test.reset_index(drop=True) A_test = A_test.reset_index(drop=True) # A_test = A_test.map({ 0:"female", 1:"male"}) # flip across different groups Y_noised = flip(Y_train, A_train, error_rate=error_rate) noise_matrix = generate_noise_matrix(Y_noised, Y_train) est_error_rate = estimation(X_train.values, Y_noised, A_train.values, ngroups=2**args.ngroups) print(f"True error rate is {error_rate}.\nEstimated error rate is {est_error_rate}.") # Learning with Noisy Labels lnl = LearningWithNoisyLabels(clf=LogisticRegression()) lnl.fit(X=X_train.values, s=Y_noised, noise_matrix=noise_matrix) Y_lnlt = lnl.predict(X_train.values).astype(int) lnl.fit(X=X_train.values, s=Y_noised) Y_lnle = lnl.predict(X_train.values).astype(int) def run_corrupt(fairness_constraints): all_results = {} all_results['eps'] = fairness_constraints all_results['accuracy'] = { 'train': [], 'test': [] } all_results['violation'] = {
# learning with noisy labels. Its comprised of components from the theory and # algorithms of **confident learning**. It's a Python class that wraps around # any classifier as long as .fit(X, y, sample_weight), # .predict(X), .predict_proba(X) are defined. # See https://l7.curtisnorthcutt.com/cleanlab-python-package for docs. # # # ## Here we show the performance with LogisiticRegression classifier # ## versus LogisticRegression \*without\* cleanlab on the Iris dataset. # In[2]: # Seed for reproducibility seed = 2 clf = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=1000) rp = LearningWithNoisyLabels(clf=clf, seed=seed) np.random.seed(seed=seed) # Get iris dataset iris = datasets.load_iris() X = iris.data # we only take the first two features. y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) try: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt _ = plt.figure(figsize=(12, 8)) color_list = plt.cm.tab10(np.linspace(0, 1, 6)) _ = plt.scatter(X_train[:, 1],