Esempio n. 1
0
    def persist(cls, training_configuration, metrics, data_statistics,
                algorithm, sampler):
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/evaluation"
        os.makedirs(base_path, exist_ok=True)
        evaluation_path = f"{base_path}/{identifier}.evaluation"
        dill.dump(metrics[["y_true", "y_pred", "y_probs"]],
                  open(evaluation_path, "wb"))

        obj = cls(
            training_configuration_id=training_configuration.id,
            algorithm=algorithm.__name__,
            sampler=sampler.__name__,
            auc_roc_mean=metrics.auc_roc.mean(),
            auc_roc_std=metrics.auc_roc.std(),
            auc_prc_mean=metrics.auc_prc.mean(),
            auc_prc_std=metrics.auc_prc.std(),
            f1_mean=metrics.f1.mean(),
            f1_std=metrics.f1.std(),
            accuracy_mean=metrics.accuracy.mean(),
            accuracy_std=metrics.accuracy.std(),
            precision_mean=metrics.precision.mean(),
            precision_std=metrics.precision.std(),
            recall_mean=metrics.recall.mean(),
            recall_std=metrics.recall.std(),
            average_precision_mean=metrics.average_precision.mean(),
            average_precision_std=metrics.average_precision.std(),
            num_features_mean=data_statistics.num_features.mean(),
            num_features_std=data_statistics.num_features.std(),
            num_test_rows_mean=data_statistics.num_test_rows.mean(),
            num_test_rows_std=data_statistics.num_test_rows.std(),
            num_train_rows_mean=data_statistics.num_train_rows.mean(),
            num_train_rows_std=data_statistics.num_train_rows.std(),
            num_sampled_train_rows_mean=data_statistics.num_sampled_train_rows.
            mean(),
            num_sampled_train_rows_std=data_statistics.num_sampled_train_rows.
            std(),
            incidence_rate_train_mean=data_statistics.incidence_rate_train.
            mean(),
            incidence_rate_train_std=data_statistics.incidence_rate_train.std(
            ),
            incidence_rate_test_mean=data_statistics.incidence_rate_test.mean(
            ),
            incidence_rate_test_std=data_statistics.incidence_rate_test.std(),
            incidence_rate_sampled_train_mean=data_statistics.
            incidence_rate_sampled_train.mean(),
            incidence_rate_sampled_train_std=data_statistics.
            incidence_rate_sampled_train.std(),
            evaluation_path=evaluation_path,
        )

        session.add(obj)
        session.commit()
        return obj
Esempio n. 2
0
 def persist(cls, comment, version, cohort):
     obj = cls(
         comment=comment,
         version=version,
         start_time=time.time(),
         end_time=math.inf,
         cohort=cohort,
     )
     session.add(obj)
     session.commit()
     return obj
Esempio n. 3
0
    def persist(cls, training_configuration, data):
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/data"
        os.makedirs(base_path, exist_ok=True)
        path = f"{base_path}/{identifier}.data.gz"
        data.to_csv(path, index=False)

        obj = cls(training_configuration_id=training_configuration.id,
                  path=path)
        session.add(obj)
        session.commit()
        return obj
Esempio n. 4
0
    def persist(cls, df, comment, version, cohort):
        # persist df on disk
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/onset-df"
        os.makedirs(base_path, exist_ok=True)
        path = f"{base_path}/{identifier}.df.gz"
        df.to_csv(path, index=False)

        obj = cls(comment=comment,
                  version=version,
                  created_at=time.time(),
                  path=path,
                  cohort=cohort)
        session.add(obj)
        session.commit()
        return obj
Esempio n. 5
0
    def persist(cls, cohort, **kwargs):
        # persist cohort on disk
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/cohorts"
        os.makedirs(base_path, exist_ok=True)
        path = f"{base_path}/{identifier}.cohort"
        cohort_dict = cohort.to_json(path, **kwargs)

        obj = cls(comment=cohort_dict["comment"],
                  version=cohort_dict["version"],
                  created_at=cohort_dict["createdAt"],
                  fiber_version=cohort_dict["fiberVersion"],
                  path=path)
        session.add(obj)
        session.commit()
        return obj
Esempio n. 6
0
 def persist(cls, training_pipeline, threshold_occurring,
             window_start_occurring, window_end_occurring,
             feature_type_occurring, threshold_numeric,
             window_start_numeric, window_end_numeric, feature_type_numeric,
             target):
     obj = cls(training_pipeline_id=training_pipeline.id,
               threshold_occurring=threshold_occurring,
               window_start_occurring=window_start_occurring,
               window_end_occurring=window_end_occurring,
               feature_type_occurring=feature_type_occurring,
               threshold_numeric=threshold_numeric,
               window_start_numeric=window_start_numeric,
               window_end_numeric=window_end_numeric,
               feature_type_numeric=feature_type_numeric,
               target=target)
     session.add(obj)
     session.commit()
     return obj
Esempio n. 7
0
    def create(cls, comment, version, cohort, onset_dataframe, data_loader,
               feature_pipeline):
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/data-loader"
        os.makedirs(base_path, exist_ok=True)
        path = f"{base_path}/{identifier}.data-loader.gz"
        data_loader.dump(path)

        obj = cls(comment=comment,
                  version=version,
                  start_time=time.time(),
                  end_time=math.inf,
                  cohort=cohort,
                  onset_dataframe=onset_dataframe,
                  data_loader_path=path,
                  feature_pipeline=feature_pipeline)
        session.add(obj)
        session.commit()
        return obj
Esempio n. 8
0
    def persist(cls, df, window, min_threshold, feature_type, condition,
                feature_pipeline, **kwargs):
        # persist df on disk
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/features"
        os.makedirs(base_path, exist_ok=True)
        path = f"{base_path}/{identifier}.df.gz"
        df.to_csv(path, index=False)

        obj = cls(
            feature_pipeline=feature_pipeline,
            feature_type=feature_type,
            path=path,
            condition=condition.__class__.__name__,
            window_start=window[0],
            window_end=window[1],
            min_threshold=min_threshold,
        )
        session.add(obj)
        session.commit()
        return obj
Esempio n. 9
0
    def persist(cls, df, clf, training_configuration, comment, version, explanations=None):
        # persist df on disk
        identifier = str(uuid.uuid4())
        base_path = f"{BASE_PATH}/prediction-data"
        os.makedirs(base_path, exist_ok=True)
        data_path = f"{base_path}/{identifier}.df.gz"
        df.to_csv(data_path, index=False)

        # persist clf on disk
        base_path = f"{BASE_PATH}/predictors"
        os.makedirs(base_path, exist_ok=True)
        predictor_path = f"{base_path}/{identifier}.clf"
        dill.dump(clf, open(predictor_path, 'wb'))

        if explanations:
            ax = plt.gca()
            plot_explanation_heatmap(explanations, top_features=20, ax=ax)
            base_path = f"{BASE_PATH}/interpretation"
            os.makedirs(base_path, exist_ok=True)
            interpretation_path = f"{base_path}/{identifier}.png"
            ax.figure.savefig(interpretation_path, dpi=300, bbox_inches="tight")
        else:
            interpretation_path = ''

        obj = cls(
            comment=comment,
            version=version,
            predictor_path=predictor_path,
            data_path=data_path,
            created_at=time.time(),
            training_configuration_id=training_configuration.id,
            interpretation_path=interpretation_path
        )
        session.add(obj)
        session.commit()
        return obj