SVM_plot = plt.figure() i = 0 epochs = 50 for epoch in range(epochs): i = i + 1 print(i) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) y_train = y_train.values ## Define the n-folds for hyper-parameter optimization on training set. cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=50, random_state=200889) ## Define L2 regularized logistic classifier model = SVC(kernel='rbf') ## Define the hyper-parameters optimization on training set. c_values = [0.000001, 0.00001, 0.0001, 0.001, 0.01] gamma = [0.00000001, 0.0000001, 0.000001] param_grid = dict(C=c_values, gamma=gamma) grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv, scoring='roc_auc', n_jobs=-1) grid_result = grid.fit(x_train, y_train) print('Best model:', grid_result.best_estimator_)
]] studies = [ AcharyaStudy, HosseinZahdeStudy, FergusStudy, Fergus2013Study, IdowuStudy, HussainStudy, AhmedStudy, RenStudy, KhanStudy, PengStudy, JagerLibensekStudy ] Xs = [ X_acharya, X_hosseinzahde, X_fergus, X_fergus2013, X_idowu, X_husain, X_ahmed, X_ren, X_khan, X_peng, X_jagerlibensek ] y = LabelEncoder().fit_transform(y) validator = RepeatedStratifiedKFold(n_repeats=2, n_splits=10) results = {} tests = {} models = {} for i, (train, test) in enumerate(validator.split(X, y)): print("fold: %d" % i) models[i] = {} results[i] = {} tests[i] = {} for j in range(len(studies)): print("study: %s" % studies[j].__name__) models[i][j] = studies[j]().fit(Xs[j].iloc[train].values, y[train]) results[i][j] = models[i][j].predict_proba(Xs[j].iloc[test].values)[:, 1]
C = np.arange(1e-05, 5.5, 0.1) scoring = {'Accuracy': 'accuracy', 'AUC': 'roc_auc', 'Log_loss': 'neg_log_loss'} log_reg = LogisticRegression() #Simple pre-processing estimators ############################################################################### std_scale = StandardScaler(with_mean=False, with_std=False) #std_scale = StandardScaler() #Defining the CV method: Using the Repeated Stratified K Fold ############################################################################### n_folds=5 n_repeats=5 rskfold = RepeatedStratifiedKFold(n_splits=n_folds, n_repeats=n_repeats, random_state=2) #Creating simple pipeline and defining the gridsearch ############################################################################### log_clf_pipe = Pipeline(steps=[('scale',std_scale), ('clf',log_reg)]) log_clf = GridSearchCV(estimator=log_clf_pipe, cv=rskfold, scoring=scoring, return_train_score=True, param_grid=dict(clf__C=C), refit='Accuracy') log_clf.fit(X, y) results = log_clf.cv_results_ print('='*20) print("best params: " + str(log_clf.best_estimator_))
tam = len(class_names) * figprop[len(class_names)] results = {} models = {} nrounds = 1 if debug else 5 """##kfold Experiments""" X, y = None, None for load in range(4): X, y = concatenate_datasets(X, y, eval('xn_' + str(load)), eval('yn_' + str(load))) for severity in severities: X, y = concatenate_datasets(X, y, eval('x' + str(severity)), eval('y' + str(severity))) rskf = RepeatedStratifiedKFold(n_splits=len(severities), n_repeats=nrounds, random_state=36851234) fold = 0 count_round = 0 results['kfold'] = {} models['kfold'] = {} y_test_round = None y_pred_round = {} print("k-Fold") for train_index, test_index in rskf.split(X, y): print("{}/{}".format(fold + 1, rskf.get_n_splits() // nrounds), end=" x ") x_train, x_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] if y_test_round is None:
# Spot-check algorithms models = [] models.append(('LR', LogisticRegression(solver='liblinear'))) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) # evaluate each model in turn num_folds = 5 scoring = 'roc_auc' results = [] names = [] for name, model in models: kfold = RepeatedStratifiedKFold(n_splits=num_folds, random_state=seed) cv_results = cross_val_score(estimator=model, X=X_train, y=y_train, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg = f"{name} {cv_results.mean()} +/- ({cv_results.std()})" print(msg) # - # compare performance fig1 = pyplot.figure() fig1.suptitle('Algorithm comparison') ax = fig1.add_subplot(111)
def run_repeatedCV(): sen_df = pd.DataFrame() spe_df = pd.DataFrame() auc_df = pd.DataFrame() mcc_df = pd.DataFrame() k = 0 pos_data, kmer_data = pd_read_pattern() new_kmer = shuffle_data(kmer_data) #KDE kmer using top 30 percentile features with tfidf scores for test derived from train using transform() for i in range(0, 10): ctr = 0 print("Window size: ", i + 1, "\n") if (i == 0): names2 = ['df_2_cv', 'df_3_cv', 'df_2_tf', 'df_3_tf'] else: names2 = [ 'df_2_cv', 'df_3_cv', 'df_4_cv', 'df_2_tf', 'df_3_tf', 'df_4_tf' ] X = new_kmer[i] y = new_kmer[i]['Label'] print("kmer size", names2[ctr]) ctr = ctr + 1 rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=3) for train_index, test_index in rskf.split(X, y): sen_kde = [] spe_kde = [] acc_kde = [] auc_kde = [] m_kde = [] c = [] k = k + 1 print(k, end=",") X_train, X_test = X.iloc[train_index], X.iloc[test_index] dat_train, dat_test, names = cv_tf_transformation(X_train, X_test) for j in range(0, len(dat_train)): dat_train[j]['Chr'] = dat_train[j]['Chr'].replace(['X'], '21') dat_test[j]['Chr'] = dat_test[j]['Chr'].replace(['X'], '21') train_x = dat_train[j].drop('Label', axis=1) train_y = dat_train[j]['Label'] test_x = dat_test[j].drop('Label', axis=1) test_y = dat_test[j]['Label'] X_red = feature_reduction_using_trees(train_x, train_y) rf = RandomForestClassifier() param_grid = { 'n_estimators': [50, 100, 200, 300, 400], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [2, 3, 5, 7], 'min_samples_leaf': [1, 3], 'min_samples_split': [2, 5, 10], } grid = GridSearchCV(rf, param_grid, cv=3) grid.fit(X_red, train_y.ravel()) best_model = grid.best_estimator_ best_model.fit(X_red, train_y.ravel()) y_probs = best_model.predict_proba(test_x[X_red.columns])[:, 1] thresholds = arange(0, 1, 0.001) scores = [ roc_auc_score(test_y, convert_to_labels(y_probs, t)) for t in thresholds ] ix = argmax(scores) y_test_predictions = np.where( best_model.predict_proba(test_x[X_red.columns])[:, 1] > thresholds[ix], 2, 1) sensi = sensitivity_score(test_y, y_test_predictions, pos_label=2) speci = specificity_score(test_y, y_test_predictions, pos_label=2) accu = accuracy_score(test_y, y_test_predictions) auro = roc_auc_score(test_y, y_test_predictions) mcc = metrics.matthews_corrcoef(test_y, y_test_predictions) c.append(X_red.columns) sen_kde.append(sensi) spe_kde.append(speci) acc_kde.append(accu) auc_kde.append(auro) m_kde.append(mcc) if (i == 0): sen_df = sen_df.append( { 'df_2_cv': sen_kde[0], 'df_3_cv': sen_kde[1], 'df_2_tf': sen_kde[2], 'df_3_tf': sen_kde[3] }, ignore_index=True) spe_df = spe_df.append( { 'df_2_cv': spe_kde[0], 'df_3_cv': spe_kde[1], 'df_2_tf': spe_kde[2], 'df_3_tf': spe_kde[3] }, ignore_index=True) auc_df = auc_df.append( { 'df_2_cv': auc_kde[0], 'df_3_cv': auc_kde[1], 'df_2_tf': auc_kde[2], 'df_3_tf': auc_kde[3] }, ignore_index=True) mcc_df = mcc_df.append( { 'df_2_cv': m_kde[0], 'df_3_cv': m_kde[1], 'df_2_tf': m_kde[2], 'df_3_tf': m_kde[3] }, ignore_index=True) else: sen_df = sen_df.append( { 'df_2_cv': sen_kde[0], 'df_3_cv': sen_kde[1], 'df_4_cv': sen_kde[2], 'df_2_tf': sen_kde[3], 'df_3_tf': sen_kde[4], 'df_4_tf': sen_kde[5] }, ignore_index=True) spe_df = spe_df.append( { 'df_2_cv': spe_kde[0], 'df_3_cv': spe_kde[1], 'df_4_cv': spe_kde[2], 'df_2_tf': spe_kde[3], 'df_3_tf': spe_kde[4], 'df_4_tf': spe_kde[5] }, ignore_index=True) auc_df = auc_df.append( { 'df_2_cv': auc_kde[0], 'df_3_cv': auc_kde[1], 'df_4_cv': auc_kde[2], 'df_2_tf': auc_kde[3], 'df_3_tf': auc_kde[4], 'df_4_tf': auc_kde[5] }, ignore_index=True) mcc_df = mcc_df.append( { 'df_2_cv': m_kde[0], 'df_3_cv': m_kde[1], 'df_4_cv': m_kde[2], 'df_2_tf': m_kde[3], 'df_3_tf': m_kde[4], 'df_4_tf': m_kde[5] }, ignore_index=True)
query={}, project={'key_id':1, 'recognized':1, 'word':1}, host='localhost', port=27017, username=None, password=None, no_id=True, num_sample=10000 ) df = df.loc[df['recognized'] == True].reset_index() ids = df.index.values word_class = df.word.values rskf = RepeatedStratifiedKFold(n_splits=3, n_repeats=1,random_state=999) cv_idx = 0 for train_idx, test_idx in rskf.split(ids, word_class): train_cv = df.loc[train_idx] test_cv = df.loc[test_idx] sys.exit() train_cv.to_csv(f'{path_CV}/train_df_{cv_idx}.csv') test_cv.to_csv(f'{path_CV}/val_df_{cv_idx}.csv') cv_idx+=1 # In[8]: train_cv
RANDOM_STATE_CV = 124213 group_labels = [ "[0 - 5)", "[5 - 10)", "[10 - 15)", "[15 - 20)", "[20 - 25)", "[25 - 30)", "[30 - 35)", "[35 - 40)", "[40 - 45)", "[45 - 50)", "[50 - 55)", "[55 - 60)", "[60 - 65)", "[65 - 70)", "[70 - 75)", "75+" ] X = otu_df.loc[age_cohort.index, :].astype(float).values y = age_cohort["target"].astype(float) print y.value_counts() X = np.log(X + 1.0) X, y = shuffle(X, y) cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=RANDOM_STATE_CV) results = [] C_dist = [0.001, 0.01, 0.1, 1.0, 5.0] C_dist = [0.001] confusion_mats = [] for c in C_dist: print c alg_rf = SVC(C=c, kernel='linear', class_weight='balanced', random_state=RANDOM_STATE_SVM) alg_rf = RandomForestClassifier(n_estimators=256, class_weight='balanced',
def objective(trial, train, test, raw_features): # start_time = timer() CLIP_FEATURES = False # trial.suggest_categorical("clip", ["True", "False"]) df_all_X = pd.concat([train.drop('target', axis=1), test], axis=0) if CLIP_FEATURES == False: le = LabelEncoder() df_all_X = df_all_X.apply(le.fit_transform) else: missing_integer = df_all_X.max().max( ) + 15 # we will replace any unseen test value with this one. for col in test.columns: # inspect across all test set's columns if not all(test[col].isin(train[col].value_counts().index.tolist()) ): # see if there's out of bounds value for i in set(test[col]).difference(set(train[col])): test[col].replace( i, missing_integer, inplace=True ) # replace the oob value with a dummy value train[ f'{col}_{missing_integer}'] = 0 # generate a boolean column to mark all those missing values in test set test[ f'{col}_{missing_integer}'] = 1 # generate a boolean column to mark all those missing values in test set test[f'{col}_{missing_integer}'].where( test[col] == missing_integer, 0, inplace=True) df_all_X = pd.concat([train.drop('target', axis=1), test], axis=0) # Gropu low frequency into one value GROUP_LOW_FREQUENCY = False GROUP_LOW_FREQUENCY_THRESHOLD = 0 # trial.suggest_discrete_uniform("threshold", 0, 50, 1) if GROUP_LOW_FREQUENCY: for col in raw_features: value_counts_SS = df_all_X[col].value_counts() low_freq_values = value_counts_SS.index[ value_counts_SS < GROUP_LOW_FREQUENCY_THRESHOLD] if len(low_freq_values) > 0: df_all_X[f'{col}_low_freq'] = 0 for i in low_freq_values.tolist(): df_all_X[f'{col}_low_freq'].iloc[df_all_X[col] == i] = 1 Xtrn, Xtst = df_all_X.iloc[:len(train)], df_all_X.iloc[len(train):] le = LabelEncoder() y = pd.Series(le.fit_transform(train['target'])) # ANCHOR CONSTRUCTION class0 = 0.8023844111083878 class1 = 0.4973760913650416 class2 = 0.8940055025348296 class3 = 0.8641162667601383 class_weights = [class0, class1, class2, class3] losses = [] y_oof = np.zeros((Xtrn.shape[0], len(np.unique(y)))) pruning_callback = optuna.integration.XGBoostPruningCallback( trial, "val-mlogloss") rskf = RepeatedStratifiedKFold(n_splits=N_SPLITS, n_repeats=N_REPEATS, random_state=RANDOM_SEED) temp_map = { # "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.05), "colsample_bytree": trial.suggest_loguniform("colsample_bytree", 0.1, 0.8), "subsample": trial.suggest_loguniform("subsample", 0.1, 0.8), "alpha": trial.suggest_loguniform("alpha", 0.01, 10.0), "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0), "gamma": trial.suggest_loguniform("lambda", 1e-8, 1.0), "min_child_weight": trial.suggest_loguniform("min_child_weight", 3, 100), 'max_depth': trial.suggest_int('max_depth', 3, 12) } # ANCHOR CONSTRUCTION # from sklearn.model_selection import train_test_split # le = LabelEncoder() # y = le.fit_transform(train['target']) # X_A, X_B, y_A, y_B = train_test_split(train.drop('target', axis=1), y, test_size=0.33, random_state=42) # dtrain = xgb.DMatrix(X_A, label=y_A) # dtest = xgb.DMatrix(X_B, label=y_B) # params = { # 'objective': "multi:softprob", # 'eval_metric': 'mlogloss', # 'n_estimators': 10000, # 'booster': 'gbtree', # 'tree_method': 'gpu_hist', # 'num_class': 4 # } # xgb_model = xgb.train(params, # dtrain=dtrain, # evals=[(dtest, 'val'), (dtrain, 'train')], # verbose_eval=False) # tmp = xgb_model.predict(xgb.DMatrix(X_B)) for i, (train_index, valid_index) in enumerate(rskf.split(Xtrn, y)): X_A, X_B = Xtrn.iloc[train_index, :], Xtrn.iloc[valid_index, :] y_A, y_B = y.iloc[train_index], y.iloc[valid_index] # sample_weight_fold = [class_weights[j] for j in y_A] params = { 'objective': 'multi:softprob', 'eval_metric': 'mlogloss', 'n_estimators': 10000, 'booster': 'gbtree', 'verbosity': 0, 'tree_method': 'gpu_hist', 'num_class': 4 } dtrain = xgb.DMatrix(X_A, label=y_A) dtest = xgb.DMatrix( X_B, label=y_B) #, weight=[class_weights[j] for j in y_B]) dtestX = xgb.DMatrix(X_B) params.update(temp_map) # learning api https://tinyurl.com/yz8bqyfd xgb_model = xgb.train( params, dtrain=dtrain, evals=[(dtest, 'val'), (dtrain, 'train')], # sample_weight=sample_weight_fold, early_stopping_rounds=EARLY_STOPPING_ROUNDS, callbacks=[pruning_callback], verbose_eval=False) # xgb_classifier = XGBClassifier(**params) # xgb_classifier.fit( # X_A, y_A, eval_set=[(X_B, y_B)], # sample_weight=sample_weight_fold, # early_stopping_rounds=EARLY_STOPPING_ROUNDS, # callbacks=[pruning_callback] # ) # tmp = xgb_classifier.predict_proba(X_B) tmp = xgb_model.predict(dtestX) y_oof[valid_index, :] = tmp / N_REPEATS loss = log_loss(y_B, tmp) losses.append(loss) # print(f'loss: {loss}') mean_running_loss = np.mean(losses) # print(f'average running loss: {mean_running_loss}') # oof_loss = log_loss(y, y_oof) # print(f'average repeat oof loss: {oof_loss}') # timer(start_time) # To avoid running out of memory and still save a copy of the best model. # YOU NEED THE FOLLOWING TWO LINES. # trial.set_user_attr(key="best_booster", value=copy.deepcopy(xgb_model)) # comment this out xgb_model.__del__() # release memory https://tinyurl.com/ydw9nebm return mean_running_loss
le = LabelEncoder() df_all_X = df_all_X.apply(le.fit_transform) Xtrn, Xtst = df_all_X.iloc[:len(train)], df_all_X.iloc[len(train):] le = LabelEncoder() y = pd.Series(le.fit_transform(train['target'])) losses = [] y_oof = np.zeros((Xtst.shape[0], len(np.unique(y)))) y_val = np.zeros((Xtrn.shape[0], len(np.unique(y)))) N_SPLITS = 7 N_REPEATS = 3 EARLY_STOPPING_ROUNDS = 10 RANDOM_SEED = 2021 rskf = RepeatedStratifiedKFold(n_splits=N_SPLITS, n_repeats=N_REPEATS, random_state=RANDOM_SEED) dtestX = xgb.DMatrix(Xtst) for i, (train_index, valid_index) in enumerate(rskf.split(Xtrn, y)): X_A, X_B = Xtrn.iloc[train_index, :], Xtrn.iloc[valid_index, :] y_A, y_B = y.iloc[train_index], y.iloc[valid_index] dtrain = xgb.DMatrix(X_A, label=y_A) dval = xgb.DMatrix(X_B, label=y_B) #, weight=[class_weights[j] for j in y_B]) dvalX = xgb.DMatrix(X_B) # learning api https://tinyurl.com/yz8bqyfd xgb_model = xgb.train(params, dtrain=dtrain, evals=[(dval, 'val'), (dtrain, 'train')], early_stopping_rounds=EARLY_STOPPING_ROUNDS,
# Creamos un ColumnTransformer para el StandardScaler scaler = ColumnTransformer([('scaler_media', scaler_media, slice(0, 8)), ('scaler_moda', scaler_moda, slice(8, len(X.columns)))]) # Creamos el Pipeline incorporando ColumnTransformer y Clasificador pipeline = Pipeline([('imputer', imputer), ('scaler', scaler), ('svm', SVC(random_state=RANDOM_STATE, class_weight=CLASS_WEIGHT, probability=True))]) # InnerCV (GridSearchCV de 2-folds 5-times (stratified) para obtener mejores parámetros) rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=RANDOM_STATE) # inner grid_search = GridSearchCV(estimator=pipeline, param_grid=PARAM_GRID, scoring=SCORING, cv=rskf) # # OuterCV (Validación cruzada de 5 folds (stratified) para estimar Accuracy) # scores = cross_validate(estimator=grid_search, X=X, y=y, cv=5, error_score='raise', return_estimator=True, scoring=SCORING) # outer # print('Scores: {}' .format(scores['test_score'])) # print('Mean score: {}' .format(np.mean(scores['test_score']))) # # Creamos clasificador 'tonto' y obtenemos resultados también con validación cruzada (CV=5) para tener resultados más realistas # dummy_clf = DummyClassifier(strategy='most_frequent', random_state=RANDOM_STATE) # dummy_scores = cross_validate(estimator=dummy_clf, X=X, y=y, cv=5, error_score='raise', return_estimator=True, scoring=SCORING) # print('Dummy scores: {}' .format(dummy_scores['test_score']))
'n_jobs': [-1], 'random_state': [seed], 'verbose': [0], #'class_weight': [], }) rf_gridCV_2 = param_search(X_train, y_train, RandomForestClassifier, rf_params) print(rf_gridCV_2.best_score_) pd.DataFrame(rf_gridCV_2.cv_results_).to_csv("last_results.csv") # Repeated cross validation print("Running model...") start = time.time() model = rf_gridCV_2.best_estimator_ kfold = RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=seed) cv_results = cross_validate(model, X_train, y_train, scoring=['accuracy', 'roc_auc'], cv=kfold, n_jobs=-1, verbose=2, return_train_score=False) print("Test accuracy:{}".format(cv_results['test_accuracy'].mean())) print("Test ROC AUC:{}".format(cv_results['test_roc_auc'].mean())) end = time.time() print("Duration:{}".format(end - start)) pd.DataFrame(cv_results).to_csv("rep_cv_res.csv")
def roc(self, idx: int = None, estimator=None, fitness_idx: int = 0, cv: int = 5, reps: int = 1, positive_class: int = None, random_state: int = 0): """ Function that allows to represent the ROC curve on the solutions found in the non-dominated front using cross validation with repetitions. Parameters ------------ :param idx: int Index of the solution to represent. :param fitness_idx: int Index of the fitness function to represent. :param estimator: <optional> sklearn.base.BaseEstimator If none is provided the algorithm estimator will be used. This must support predictions with probabilities, otherwise it will throw an exception. :param cv: <optional> int Default 5 :param reps: <optional> int Default 1 :param positive_class: <optional> int By default the class selected as positive in the algorithm. In the case that the algorithm does not have a positive class and one is not provided, an exception will be thrown. :param random_state: <optional> int """ import numpy as np import matplotlib.pyplot as plt import matplotlib.lines as mlines from sklearn.model_selection import RepeatedStratifiedKFold from sklearn.metrics import roc_curve, auc # Get positive class if positive_class is None: positive_class = self.algorithm.positive_class # Check if the estimator can be used to compute roc curves if estimator is None: estimator = self.algorithm.fitness[fitness_idx].estimator try: estimator.probability = True except: raise UnsuitableClassifier( "The classifier does not support probabilities, therefore the ROC curve cannot be computed. " "Run the algorithm with a classifier that supports probabilities or provide a valid classifier " "that support probabilities using the argument \"estimator\"." ) # Get dataset x_data, y_data = self.algorithm.get_dataset() # Get non-dominated solutions best_solutions = self._get_pareto_front() # if the user has selected a certain solution use only that solution if idx is not None: indexes = [idx] else: indexes = [n for n in range(len(best_solutions))] fig, ax = plt.subplots(figsize=(10, 5)) viridis = plt.cm.get_cmap('viridis', len(indexes)) solutions_legend = [] for index in indexes: mean_tp = 0.0 mean_fp = np.linspace(0, 1, 100) roc_auc = [] # Create cross-validation iterator cv_iterator = list( RepeatedStratifiedKFold(n_splits=cv, n_repeats=reps, random_state=random_state).split( x_data[:, best_solutions[index]], y_data)) # Compute the ROC curve for each fold of each solution for i, (train, test) in enumerate(cv_iterator): probs = estimator.fit(x_data[np.ix_(train, best_solutions[index])], y_data[train]) \ .predict_proba(x_data[np.ix_(test, best_solutions[index])]) fp, tp, thresholds = roc_curve(y_data[test], probs[:, 1], pos_label=positive_class) mean_tp += np.interp(mean_fp, fp, tp) mean_tp[0] = 0.0 # Compute AUC roc_auc.append(auc(fp, tp)) ax.plot(fp, tp, color=viridis(index), alpha=0.3) solutions_legend.append( mlines.Line2D([], [], color=viridis(index), marker='.', markersize=5, label='Solution (%d) AUC = %.3f +/- %.3f' % (index, np.mean(roc_auc), np.std(roc_auc)))) ax.plot([0, 1], [0, 1], linestyle='-.', color='black', label="Random Classifier") ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.05]) ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.set_title('Receiver operating characteristic') plt.legend(handles=solutions_legend, loc="lower right") plt.show()