from sklearn.pipeline import Pipeline # project-specific helper libraries import icu_config import tests ###################################################################### # globals ###################################################################### RANDOM_SEED = 42 # seed for RepeatedStratifiedKFold EPS = 10 * sys.float_info.epsilon NRECORDS = 100 # number of patient records FEATURES_TRAIN_FILENAME, LABELS_TRAIN_FILENAME = \ icu_config.get_filenames(nrecords=NRECORDS) METRICS = [ "accuracy", "auroc", "f1_score", "sensitivity", "specificity", "precision" ] # sensitivity = recall ###################################################################### # functions ###################################################################### def score(y_true, y_score, metric='accuracy'): """ Calculates the performance metric based on the agreement between the true labels and the predicted labels.
from sklearn.utils import resample from sklearn.pipeline import Pipeline # project-specific helper libraries import icu_config from icu_practice import score, METRICS import classifiers ###################################################################### # globals ###################################################################### NRECORDS = 2500 # number of patient records FEATURES_TRAIN_FILENAME, LABELS_TRAIN_FILENAME, \ FEATURES_TEST_FILENAME, LABELS_TEST_FILENAME = \ icu_config.get_filenames(nrecords=NRECORDS, test_data=True) ###################################################################### # functions ###################################################################### def get_test_scores(clf, X, y, n_bootstraps=1, metrics=['accuracy']): """ Estimates the performance of the classifier using the 95% CI. Parameters -------------------- clf : estimator object This is assumed to implement the scikit-learn estimator interface. The estimator must already be fitted to data.