def persist(cls, training_configuration, metrics, data_statistics, algorithm, sampler): identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/evaluation" os.makedirs(base_path, exist_ok=True) evaluation_path = f"{base_path}/{identifier}.evaluation" dill.dump(metrics[["y_true", "y_pred", "y_probs"]], open(evaluation_path, "wb")) obj = cls( training_configuration_id=training_configuration.id, algorithm=algorithm.__name__, sampler=sampler.__name__, auc_roc_mean=metrics.auc_roc.mean(), auc_roc_std=metrics.auc_roc.std(), auc_prc_mean=metrics.auc_prc.mean(), auc_prc_std=metrics.auc_prc.std(), f1_mean=metrics.f1.mean(), f1_std=metrics.f1.std(), accuracy_mean=metrics.accuracy.mean(), accuracy_std=metrics.accuracy.std(), precision_mean=metrics.precision.mean(), precision_std=metrics.precision.std(), recall_mean=metrics.recall.mean(), recall_std=metrics.recall.std(), average_precision_mean=metrics.average_precision.mean(), average_precision_std=metrics.average_precision.std(), num_features_mean=data_statistics.num_features.mean(), num_features_std=data_statistics.num_features.std(), num_test_rows_mean=data_statistics.num_test_rows.mean(), num_test_rows_std=data_statistics.num_test_rows.std(), num_train_rows_mean=data_statistics.num_train_rows.mean(), num_train_rows_std=data_statistics.num_train_rows.std(), num_sampled_train_rows_mean=data_statistics.num_sampled_train_rows. mean(), num_sampled_train_rows_std=data_statistics.num_sampled_train_rows. std(), incidence_rate_train_mean=data_statistics.incidence_rate_train. mean(), incidence_rate_train_std=data_statistics.incidence_rate_train.std( ), incidence_rate_test_mean=data_statistics.incidence_rate_test.mean( ), incidence_rate_test_std=data_statistics.incidence_rate_test.std(), incidence_rate_sampled_train_mean=data_statistics. incidence_rate_sampled_train.mean(), incidence_rate_sampled_train_std=data_statistics. incidence_rate_sampled_train.std(), evaluation_path=evaluation_path, ) session.add(obj) session.commit() return obj
def persist(cls, comment, version, cohort): obj = cls( comment=comment, version=version, start_time=time.time(), end_time=math.inf, cohort=cohort, ) session.add(obj) session.commit() return obj
def persist(cls, training_configuration, data): identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/data" os.makedirs(base_path, exist_ok=True) path = f"{base_path}/{identifier}.data.gz" data.to_csv(path, index=False) obj = cls(training_configuration_id=training_configuration.id, path=path) session.add(obj) session.commit() return obj
def persist(cls, df, comment, version, cohort): # persist df on disk identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/onset-df" os.makedirs(base_path, exist_ok=True) path = f"{base_path}/{identifier}.df.gz" df.to_csv(path, index=False) obj = cls(comment=comment, version=version, created_at=time.time(), path=path, cohort=cohort) session.add(obj) session.commit() return obj
def persist(cls, cohort, **kwargs): # persist cohort on disk identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/cohorts" os.makedirs(base_path, exist_ok=True) path = f"{base_path}/{identifier}.cohort" cohort_dict = cohort.to_json(path, **kwargs) obj = cls(comment=cohort_dict["comment"], version=cohort_dict["version"], created_at=cohort_dict["createdAt"], fiber_version=cohort_dict["fiberVersion"], path=path) session.add(obj) session.commit() return obj
def persist(cls, training_pipeline, threshold_occurring, window_start_occurring, window_end_occurring, feature_type_occurring, threshold_numeric, window_start_numeric, window_end_numeric, feature_type_numeric, target): obj = cls(training_pipeline_id=training_pipeline.id, threshold_occurring=threshold_occurring, window_start_occurring=window_start_occurring, window_end_occurring=window_end_occurring, feature_type_occurring=feature_type_occurring, threshold_numeric=threshold_numeric, window_start_numeric=window_start_numeric, window_end_numeric=window_end_numeric, feature_type_numeric=feature_type_numeric, target=target) session.add(obj) session.commit() return obj
def create(cls, comment, version, cohort, onset_dataframe, data_loader, feature_pipeline): identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/data-loader" os.makedirs(base_path, exist_ok=True) path = f"{base_path}/{identifier}.data-loader.gz" data_loader.dump(path) obj = cls(comment=comment, version=version, start_time=time.time(), end_time=math.inf, cohort=cohort, onset_dataframe=onset_dataframe, data_loader_path=path, feature_pipeline=feature_pipeline) session.add(obj) session.commit() return obj
def persist(cls, df, window, min_threshold, feature_type, condition, feature_pipeline, **kwargs): # persist df on disk identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/features" os.makedirs(base_path, exist_ok=True) path = f"{base_path}/{identifier}.df.gz" df.to_csv(path, index=False) obj = cls( feature_pipeline=feature_pipeline, feature_type=feature_type, path=path, condition=condition.__class__.__name__, window_start=window[0], window_end=window[1], min_threshold=min_threshold, ) session.add(obj) session.commit() return obj
def persist(cls, df, clf, training_configuration, comment, version, explanations=None): # persist df on disk identifier = str(uuid.uuid4()) base_path = f"{BASE_PATH}/prediction-data" os.makedirs(base_path, exist_ok=True) data_path = f"{base_path}/{identifier}.df.gz" df.to_csv(data_path, index=False) # persist clf on disk base_path = f"{BASE_PATH}/predictors" os.makedirs(base_path, exist_ok=True) predictor_path = f"{base_path}/{identifier}.clf" dill.dump(clf, open(predictor_path, 'wb')) if explanations: ax = plt.gca() plot_explanation_heatmap(explanations, top_features=20, ax=ax) base_path = f"{BASE_PATH}/interpretation" os.makedirs(base_path, exist_ok=True) interpretation_path = f"{base_path}/{identifier}.png" ax.figure.savefig(interpretation_path, dpi=300, bbox_inches="tight") else: interpretation_path = '' obj = cls( comment=comment, version=version, predictor_path=predictor_path, data_path=data_path, created_at=time.time(), training_configuration_id=training_configuration.id, interpretation_path=interpretation_path ) session.add(obj) session.commit() return obj