def eval(model, x_test, y_test, batch_size=32): """ function : eval evaluate the model on a test set (consisting of pulses) Args: model : keras.models.Model the model to evaluate x_test : dict a dictionary mapping input names to actual data y_test : np.ndarray the targets of the test data batch_size : int [optional, default: 32] the size of the batches to be fed into the network Returns: r : list list of the loss and metrics specified by the model after running the model on the test data """ r = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=int(cfg.verbosity)) predictions = model.predict(x_test) precision, recall, thresholds = precision_recall_curve( y_test, predictions.ravel()) fpr, tpr, thresholds_keras = roc_curve(y_test, predictions.ravel()) # f1 = f1_score(y_test, predictions.ravel()) f1 = (2 * r[2] * r[3]) / (r[2] + r[3]) fpr_tpr_auc = sklearn_auc(fpr, tpr) pr_auc = sklearn_auc(recall, precision) r += [fpr_tpr_auc, pr_auc, f1] if cfg.save_images: plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr, tpr, label='area = {:.3f}'.format(fpr_tpr_auc)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve (lead {})'.format(cfg.current_lead)) plt.legend(loc='best') plt.savefig("images/roc_aoc_lead_{}_{}.png".format( cfg.current_lead, cfg.t)) plt.clf() plt.figure(1) # plt.plot([0, 1], [0, 1], 'k--') plt.plot(recall, precision, label='area = {:.3f}'.format(pr_auc)) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('PR curve (lead {})'.format(cfg.current_lead)) plt.legend(loc='best') plt.savefig("images/pr_aoc_lead_{}_{}.png".format( cfg.current_lead, cfg.t)) plt.clf() return r
def testRocCurve(self): import numpy as np import pandas as pd import mars.dataframe as md from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc client = self.odps.create_mars_cluster(1, 4, 8, name=str(uuid.uuid4())) try: rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m) finally: client.stop_server()
def precision_recall_curve(y_true, y_pred_proba): """ Given labels and binary classifier predicted probabilities, compute and return the data representing a precision-recall curve. Arguments: y_true (ww.DataColumn, pd.Series or np.ndarray): True binary labels. y_pred_proba (ww.DataColumn, pd.Series or np.ndarray): Predictions from a binary classifier, before thresholding has been applied. Note this should be the predicted probability for the "true" label. Returns: list: Dictionary containing metrics used to generate a precision-recall plot, with the following keys: * `precision`: Precision values. * `recall`: Recall values. * `thresholds`: Threshold values used to produce the precision and recall. * `auc_score`: The area under the ROC curve. """ y_true = _convert_to_woodwork_structure(y_true) y_pred_proba = _convert_to_woodwork_structure(y_pred_proba) y_true = _convert_woodwork_types_wrapper(y_true.to_series()) y_pred_proba = _convert_woodwork_types_wrapper(y_pred_proba.to_series()) precision, recall, thresholds = sklearn_precision_recall_curve( y_true, y_pred_proba) auc_score = sklearn_auc(recall, precision) return { 'precision': precision, 'recall': recall, 'thresholds': thresholds, 'auc_score': auc_score }
def pre_rec_auc(target, preds): # Data to plot precision - recall curve precision, recall, thresholds = precision_recall_curve(target, preds) # Use AUC function to calculate the area under the curve of precision recall curve auc_precision_recall = sklearn_auc(recall, precision) # print(auc_precision_recall) return auc_precision_recall
def negative_auc_function(target_values, class_probability_matrix): """Computes negative AUC (area under the ROC curve). This function works only for binary classification! :param target_values: length-E numpy array of target values (integers in 0...1). :param class_probability_matrix: E-by-2 numpy array of predicted probabilities. :return: negative_auc: Negative AUC. :raises: TypeError: if `class_probability_matrix` contains more than 2 classes. """ num_classes = class_probability_matrix.shape[-1] if num_classes != 2: error_string = ( 'This function works only for binary classification, not ' '{0:d}-class classification.' ).format(num_classes) raise TypeError(error_string) return -1 * sklearn_auc( y_true=target_values, y_score=class_probability_matrix[:, -1] )
def testRocCurveAuc(self): service_ep = 'http://127.0.0.1:' + self.web_port timeout = 120 if 'CI' in os.environ else -1 with new_session(service_ep) as sess: run_kwargs = {'timeout': timeout} rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2, session=sess, run_kwargs=run_kwargs) m = auc(fpr, tpr, session=sess, run_kwargs=run_kwargs) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(session=sess), expect_m)
def roc_curve(y_true, y_pred_proba): """ Given labels and classifier predicted probabilities, compute and return the data representing a Receiver Operating Characteristic (ROC) curve. Works with binary or multiclass problems. Arguments: y_true (ww.DataColumn, pd.Series or np.ndarray): True labels. y_pred_proba (ww.DataColumn, pd.Series or np.ndarray): Predictions from a classifier, before thresholding has been applied. Returns: list(dict): A list of dictionaries (with one for each class) is returned. Binary classification problems return a list with one dictionary. Each dictionary contains metrics used to generate an ROC plot with the following keys: * `fpr_rate`: False positive rate. * `tpr_rate`: True positive rate. * `threshold`: Threshold values used to produce each pair of true/false positive rates. * `auc_score`: The area under the ROC curve. """ y_true = _convert_to_woodwork_structure(y_true) y_pred_proba = _convert_to_woodwork_structure(y_pred_proba) if isinstance(y_pred_proba, ww.DataTable): y_pred_proba = _convert_woodwork_types_wrapper( y_pred_proba.to_dataframe()).to_numpy() else: y_pred_proba = _convert_woodwork_types_wrapper( y_pred_proba.to_series()).to_numpy() y_true = _convert_woodwork_types_wrapper(y_true.to_series()).to_numpy() if len(y_pred_proba.shape) == 1: y_pred_proba = y_pred_proba.reshape(-1, 1) if y_pred_proba.shape[1] == 2: y_pred_proba = y_pred_proba[:, 1].reshape(-1, 1) nan_indices = np.logical_or(pd.isna(y_true), np.isnan(y_pred_proba).any(axis=1)) y_true = y_true[~nan_indices] y_pred_proba = y_pred_proba[~nan_indices] lb = LabelBinarizer() lb.fit(np.unique(y_true)) y_one_hot_true = lb.transform(y_true) n_classes = y_one_hot_true.shape[1] curve_data = [] for i in range(n_classes): fpr_rates, tpr_rates, thresholds = sklearn_roc_curve( y_one_hot_true[:, i], y_pred_proba[:, i]) auc_score = sklearn_auc(fpr_rates, tpr_rates) curve_data.append({ 'fpr_rates': fpr_rates, 'tpr_rates': tpr_rates, 'thresholds': thresholds, 'auc_score': auc_score }) return curve_data
def test_dataframe_roc_curve_auc(setup): rs = np.random.RandomState(0) raw = pd.DataFrame({'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10)}) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) assert pytest.approx(m.fetch()) == expect_m
def testLearnInLocalCluster(self, *_): from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m)
def negative_auc_function(target_values, class_probability_matrix): """Computes negative AUC (area under ROC curve). This function works only for binary classification! :param target_values: See doc for `run_permutation_test`. :param class_probability_matrix: Same. :return: negative_auc: Negative AUC. :raises: TypeError: if `class_probability_matrix` contains more than 2 classes. """ num_classes = class_probability_matrix.shape[-1] if num_classes != 2: error_string = ( 'This function works only for binary classification, not ' '{0:d}-class classification.').format(num_classes) raise TypeError(error_string) return -1 * sklearn_auc(y_true=target_values, y_score=class_probability_matrix[:, -1])
def hyperparameter_tuning( train_data: pd.DataFrame, train_data_target: pd.DataFrame, val_data: pd.DataFrame, val_data_target: pd.DataFrame, test1_data: pd.DataFrame, test1_data_target: pd.DataFrame, test2_data: pd.DataFrame, test2_data_target: pd.DataFrame, n_iteration: int, target: str, weight_balance: np.ndarray): """ This method will come handy for choosing hyper-parameters through a randomized search. The method provides the result on validation dataset as well as on the test datasets so that the job becomes easy for us to select the best performing model on test datasets. """ max_depth_dist = np.random.randint(3, 8, n_iteration) learning_rate_dist = np.random.uniform(0.01, 0.5, n_iteration) n_estimators_dist = np.random.randint(30, 120, n_iteration) gamma_dist = np.random.randint(1, 5, n_iteration) min_child_weight_dist = np.random.uniform(0.5, 4, n_iteration) subsample_dist = np.random.uniform(0.4, 1, n_iteration) colsample_bytree_dist = np.random.uniform(0.6, 1, n_iteration) reg_lambda_dist = np.random.uniform(1, 6, n_iteration) train_pr_auc = list() val_pr_auc = list() test1_pr_auc = list() test2_pr_auc = list() for i in range(n_iteration): print("Iteration {} running ...".format(i)) xgb_model = xgb.XGBClassifier( max_depth=max_depth_dist[i], learning_rate=learning_rate_dist[i], n_estimators=n_estimators_dist[i], verbosity=1, objective='binary:logistic', booster='gbtree', tree_method='auto', n_jobs=3, gamma=gamma_dist[i], min_child_weight=min_child_weight_dist[i], max_delta_step=0, subsample=subsample_dist[i], colsample_bytree=colsample_bytree_dist[i], colsample_bylevel=1, colsample_bynode=1, reg_alpha=0, reg_lambda=reg_lambda_dist[i], scale_pos_weight=1, base_score=0.5, random_state=123) xgb_model.fit(train_data.values, train_data_target[target].values, eval_set=[(train_data.values, train_data_target[target].values), (val_data.values, val_data_target[target].values)], sample_weight=weight_balance, verbose=False) # PR AUC value for train datset xgb_predict = xgb_model.predict_proba(train_data.values)[:, 1] precision, recall, thresholds = precision_recall_curve( train_data_target[target].values, xgb_predict) auc = sklearn_auc(recall, precision) train_pr_auc.append(auc) # PR AUC value for val datset xgb_predict = xgb_model.predict_proba(val_data.values)[:, 1] precision, recall, thresholds = precision_recall_curve( val_data_target[target].values, xgb_predict) auc = sklearn_auc(recall, precision) val_pr_auc.append(auc) # PR AUC value for test1 datset xgb_predict = xgb_model.predict_proba(test1_data.values)[:, 1] precision, recall, thresholds = precision_recall_curve( test1_data_target[target].values, xgb_predict) auc = sklearn_auc(recall, precision) test1_pr_auc.append(auc) # PR AUC value for val datset xgb_predict = xgb_model.predict_proba(test2_data.values)[:, 1] precision, recall, thresholds = precision_recall_curve( test2_data_target[target].values, xgb_predict) auc = sklearn_auc(recall, precision) test2_pr_auc.append(auc) print("Iteration {} completed.".format(i)) df = pd.DataFrame({ 'train_pr_auc': train_pr_auc, 'val_pr_auc': val_pr_auc, 'test1_pr_auc': test1_pr_auc, 'test2_pr_auc': test2_pr_auc }) return df, max_depth_dist, learning_rate_dist, n_estimators_dist, gamma_dist, min_child_weight_dist, subsample_dist, colsample_bytree_dist, reg_lambda_dist
def evaluate_model(data_x=[], targets=[], fnames=[], model=None): """ function : evaluate_model create an evaluation (accuracy) for classification based on a threshold at least 'threshold' pulses in an ecg must be classified as unhealthy for the whole ecg to be unhealthy Args: n_ecgs : int or Nonetype [optional, default: None] the number of ecg's to base accuracy on threshold : int [optional, default: 2] the number of pulses that need to be unhealthy for the ecg to be labeled as unhealthy Returns: accuracy : float the accuracy of the modeled tested on ecg's """ if len(data_x) == 0: data_x, targets, fnames = dgen.get_data(return_fnames=True, channels=np.array([0]), norm=True, exclude_targets=[2, 3, 4]) if model == None: model = load_model(cfg.model_save_name, custom_objects={ 'precision': precision, 'recall': recall }) # n_correct = 0 tp = 0 tn = 0 fp = 0 fn = 0 predictions = [] mse = 0 if cfg.verbosity: print("Evaluating model with ECG's") start = time.time() for i, ecg in enumerate(data_x): # print(ecg.shape) pulse_data_x, pulse_data_y = dprep.extract_windows( np.expand_dims(ecg, axis=0), np.array([targets[i]]), cfg.nn_input_size, exclude_first_channel=True) nn_pulse_data_x = {"ecg_inp": np.squeeze(pulse_data_x)} # preds = model.predict(nn_pulse_data_x) preds = [ int(round(pred[0])) for pred in model.predict(nn_pulse_data_x) ] pred = 1 if sum(preds) >= len( preds) * cfg.min_af_ratio_for_positive_prediction else 0 mse += (targets[i] - pred)**2 predictions.append(pred) if pred == 1 and targets[i] == 1: tp += 1 elif pred == 0 and targets[i] == 0: tn += 1 elif pred == 1 and targets[i] == 0: fp += 1 elif pred == 0 and targets[i] == 0: fn += 1 progress_bar("Evaluating ECG", i, data_x.shape[0]) if cfg.verbosity: print('Done, took ' + str(round(time.time() - start, 1)) + ' seconds') mse /= len(targets) ppv, tpr, thresholds_pr = precision_recall_curve(targets, predictions) fpr, tpr, thresholds_roc = roc_curve(targets, predictions) fpr_tpr_auc = sklearn_auc(fpr, tpr) tpr_ppv_auc = sklearn_auc(tpr, ppv) accuracy = (tp + tn) / len(targets) precision = tp / (tp + fp) recall = tp / (tp + fn) # specificity = tn/(fp + tn) f1 = (2 * tp) / (2 * tp + fp + fn) metrics = [mse, accuracy, precision, recall, fpr_tpr_auc, tpr_ppv_auc, f1] print(metrics) return metrics