Ejemplo n.º 1
0
Archivo: main.py Proyecto: dotXem/GLYFE
def main(dataset, subject, model, params, exp, mode, log, ph, plot):
    printd(dataset, subject, model, params, exp, mode, log, ph, plot)

    # retrieve model's parameters
    search = locate_search(params)
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq

    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f, day_len_f)
    """ MODEL TRAINING & TUNING """
    if search:
        params = find_best_hyperparameters(subject, model_class, params, search, ph_f, train, valid, test)

    raw_results = make_predictions(subject, model_class, params, ph_f, train, valid, test, mode=mode)
    """ POST-PROCESSING """
    raw_results = postprocessing(raw_results, scalers, dataset)

    """ EVALUATION """
    results = ResultsSubject(model, exp, ph, dataset, subject, params=params, results=raw_results)
    printd(results.compute_results())
    if plot:
        results.plot(0)
Ejemplo n.º 2
0
def study(dataset, model, params, mode, ph, all_feat, patients, combs):
    # retrieve model's parameters
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq

    # full processing
    for i in patients:
        dir = os.path.join(cs.path, "study", dataset, model, mode,
                           "patient " + str(i))
        """ PREPROCESSING ALL FEATURES"""
        printd("Preprocessing patient " + str(i))
        data = preprocessing_full(dataset, str(i), ph_f, hist_f, day_len_f,
                                  all_feat)

        for ele in combs:
            printd("Preprocessing patient", str(i),
                   "with features glucose " + " + ".join(ele))
            train, valid, test, scalers = preprocessing_select(
                data, dataset, day_len_f, all_feat, ele)

            for j in range(5):
                torch.manual_seed(j)
                """ MODEL TRAINING & TUNING """
                if not ele:
                    file = os.path.join(dir, "reference", "seed " + str(j),
                                        "weights", "weights")
                else:
                    file = os.path.join(dir, " + ".join(ele), "seed " + str(j),
                                        "weights", "weights")
                raw_results = make_predictions(str(i),
                                               model_class,
                                               params,
                                               ph_f,
                                               train,
                                               valid,
                                               test,
                                               mode=mode,
                                               save_model_file=file)
                """ POST-PROCESSING """
                raw_results = postprocessing(raw_results, scalers, dataset)
                """ EVALUATION """
                if not ele:
                    file_save = os.path.join("reference", "seed " + str(j))
                else:
                    file_save = os.path.join(" + ".join(ele), "seed " + str(j))
                results = ResultsSubject(model,
                                         file_save,
                                         ph,
                                         dataset,
                                         str(i),
                                         params=params,
                                         results=raw_results,
                                         study=True,
                                         mode=mode)
                printd(results.compute_mean_std_results())
Ejemplo n.º 3
0
def evaluation(raw_results, scalers, source_dataset, target_dataset, target_subject, model, params, exp, plot, tl_mode):
    raw_results = postprocessing(raw_results, scalers, target_dataset)

    exp += "_" + tl_mode.split("_")[1]
    exp = os.path.join(source_dataset + "_2_" + target_dataset, exp)
    results = ResultsSubject(model.__name__, exp, ph, target_dataset, target_subject, params=params,
                             results=raw_results)

    res_mean = results.compute_mean_std_results()
    printd(res_mean)
    if plot:
        results.plot(0)

    return res_mean
    def mse_impact_per_pega_zone_per_subject(self, dataset, subject):
        """
        Return the impact (ratio of total MSE) of each P-EGA zones for a given subject of given dataset
        :param dataset: name of dataset (e.g. "ohio")
        :param subject: name of subject (e.g., "575")
        :return: impact ratio of A, B, C, D, E regions of P-EGA
        """
        res = ResultsSubject(self.model, self.exp, 30, dataset,
                             subject).results
        cg_ega_arr = [
            CG_EGA(res_split, misc.datasets.datasets[dataset]
                   ["glucose_freq"]).per_sample().dropna() for res_split in res
        ]

        a_impact, b_impact, c_impact, d_impact, e_impact = [], [], [], [], []
        for cg_ega_split in cg_ega_arr:
            sum = ((cg_ega_split.y_true - cg_ega_split.y_pred)**2).sum()
            for impact_arr, zone in zip(
                [a_impact, b_impact, c_impact, d_impact, e_impact],
                ["A", "B", "C", "D", "E"]):
                subset = cg_ega_split.loc[cg_ega_split.P_EGA == zone]
                impact_arr.append(
                    ((subset.y_true - subset.y_pred)**2).sum() / sum)

        a_impact, b_impact, c_impact, d_impact, e_impact = [
            np.nanmean(impact)
            for impact in [a_impact, b_impact, c_impact, d_impact, e_impact]
        ]

        return a_impact, b_impact, c_impact, d_impact, e_impact
def smooth_resultssubject(results_sbj, smoothing_params):
    res = results_sbj.results.copy()
    res = [smooth_results(res_split, smoothing_params) for res_split in res]
    return ResultsSubject(results_sbj.model,
                          results_sbj.experiment + "_smooth", results_sbj.ph,
                          results_sbj.dataset, results_sbj.subject,
                          results_sbj.params, res)
def main_standard(dataset, subject, model, params, exp, eval_set, ph):
    printd(dataset, subject, model, params, exp, eval_set, ph)

    # retrieve model's parameters
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq
    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f,
                                                day_len_f)
    """ MODEL TRAINING """
    raw_results = make_predictions_pclstm(subject,
                                          model_class,
                                          params,
                                          ph_f,
                                          train,
                                          valid,
                                          test,
                                          scalers,
                                          mode=eval_set)
    """ POST-PROCESSING """
    raw_results = postprocessing(raw_results, scalers, dataset)
    """ EVALUATION """
    ResultsSubject(model,
                   exp,
                   ph,
                   dataset,
                   subject,
                   params=params,
                   results=raw_results).save_raw_results()
def smooth_dataset_experiment(model, exp, dataset):
    smoothing = {
        "func": exponential_smoothing,
        "params": [0.85] if dataset == "idiab" else [0.85]
    }
    for sbj in misc.datasets.datasets[dataset]["subjects"]:
        smooth_resultssubject(ResultsSubject(model, exp, 30, dataset, sbj),
                              smoothing).save_raw_results()
def main_cgega_iterative_training(dataset,
                                  subject,
                                  model,
                                  params1,
                                  params2,
                                  exp,
                                  eval_set,
                                  ph,
                                  save_iter=False):
    printd(dataset, subject, model, params1, params2, exp, eval_set, ph)

    # retrieve model's parameters
    params1 = locate_params(params1)
    params2 = locate_params(params2)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params1["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq
    freq_ds = misc.datasets.datasets[dataset]["glucose_freq"]
    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f,
                                                day_len_f)
    """ MODEL TRAINING """
    dir = join(cs.path, "processing", "models", "weights", "cg_ega")
    file = join(dir, exp, model_class.__name__ + "_" + dataset + subject)

    results_test, results_valid_iter = progressive_improvement_clinical_acceptability(
        subject, model_class, params1, params2, ph, freq_ds, train, valid,
        test, scalers, file, eval_set)

    results_test = postprocessing(results_test, scalers, dataset)
    results_valid_iter = postprocessing_all_iter(results_valid_iter, scalers,
                                                 dataset)

    ResultsSubject(model,
                   exp,
                   ph,
                   dataset,
                   subject,
                   params=[params1, params2],
                   results=results_test).save_raw_results()
    if save_iter:
        ResultsSubjectPICA(model,
                           exp,
                           ph,
                           dataset,
                           subject,
                           params=[params1, params2],
                           results=results_valid_iter).save_raw_results()
    def save_all_p_r_ega_points(self, dataset):
        """
        Save P-EGA and R-EGA points into files
        :param dataset: name of dataset (e.g., "ohio")
        :return:
        """
        ap_p, be_p, ep_p, ap_r, be_r, ep_r = [], [], [], [], [], []
        for i, sbj, in enumerate(misc.datasets.datasets[dataset]["subjects"]):
            for split_res in ResultsSubject(self.model, self.exp, 30, dataset,
                                            sbj).results:
                cg_ega = CG_EGA(
                    split_res, misc.datasets.datasets[dataset]
                    ["glucose_freq"]).per_sample()
                ap_p.append(cg_ega.loc[cg_ega.CG_EGA == "AP",
                                       ["y_true", "y_pred"]].values)
                be_p.append(cg_ega.loc[cg_ega.CG_EGA == "BE",
                                       ["y_true", "y_pred"]].values)
                ep_p.append(cg_ega.loc[cg_ega.CG_EGA == "EP",
                                       ["y_true", "y_pred"]].values)
                ap_r.append(cg_ega.loc[cg_ega.CG_EGA == "AP",
                                       ["dy_true", "dy_pred"]].values)
                be_r.append(cg_ega.loc[cg_ega.CG_EGA == "BE",
                                       ["dy_true", "dy_pred"]].values)
                ep_r.append(cg_ega.loc[cg_ega.CG_EGA == "EP",
                                       ["dy_true", "dy_pred"]].values)

        ap_p = np.concatenate(ap_p, axis=0).reshape(-1, 2)
        be_p = np.concatenate(be_p, axis=0).reshape(-1, 2)
        ep_p = np.concatenate(ep_p, axis=0).reshape(-1, 2)
        ap_r = np.concatenate(ap_r, axis=0).reshape(-1, 2)
        be_r = np.concatenate(be_r, axis=0).reshape(-1, 2)
        ep_r = np.concatenate(ep_r, axis=0).reshape(-1, 2)

        np.savetxt(
            os.path.join(path, "tmp", "figures_data",
                         "P-EGA_AP_" + dataset + ".dat"), ap_p)
        np.savetxt(
            os.path.join(path, "tmp", "figures_data",
                         "P-EGA_BE_" + dataset + ".dat"), be_p)
        np.savetxt(
            os.path.join(path, "tmp", "figures_data",
                         "P-EGA_EP_" + dataset + ".dat"), ep_p)
        np.savetxt(
            os.path.join(path, "tmp", "figures_data",
                         "R-EGA_AP_" + dataset + ".dat"), ap_r)
        np.savetxt(
            os.path.join(path, "tmp", "figures_data",
                         "R-EGA_BE_" + dataset + ".dat"), be_r)
        np.savetxt(
            os.path.join(path, "tmp", "figures_data",
                         "R-EGA_EP_" + dataset + ".dat"), ep_r)
Ejemplo n.º 10
0
    def compute_average_params(self):
        params = []
        for subject in self.subjects:
            res_subject = ResultsSubject(self.model,
                                         self.experiment,
                                         self.ph,
                                         self.dataset,
                                         subject,
                                         legacy=self.legacy)
            params.append(res_subject.params)

        return dict(
            zip(params[0].keys(),
                np.nanmean([list(_.values()) for _ in params], axis=0)))
 def _get_res(self, model, exp, dataset, subject):
     """
     Retrieve results (split 0) of given model, exp, dataset and subject
     :param model: name of model
     :param exp: name of experiment
     :param dataset: name of dataset
     :param subject: name of subject
     :return:
     """
     res = ResultsSubject(model, exp, 30, dataset, subject)
     res_raw = res.results[0]
     res_raw.loc[:, "error"] = (res_raw.y_true - res_raw.y_pred)**2
     res_raw = res_raw.dropna()
     return res_raw
    def r_ega_analysis_per_subject(self, dataset, subject):
        """
        Retrieve R-EGA of given subject
        :param dataset: name of dataset (e.g., "ohio")
        :param subject: name of subject (e.g., "575")
        :return:
        """
        res = ResultsSubject(self.model, self.exp, 30, dataset,
                             subject).results
        r_ega_arr = np.array([
            R_EGA(res_split,
                  misc.datasets.datasets[dataset]["glucose_freq"]).mean()
            for res_split in res
        ])

        r_ega_arr = np.c_[np.sum(r_ega_arr[:, :2], axis=1).reshape(-1, 1),
                          r_ega_arr]
        r_ega_arr = np.nanmean(r_ega_arr, axis=0)

        return r_ega_arr
Ejemplo n.º 13
0
    def compute_results(self, details=False):
        """
        Loop through the subjects of the dataset, and compute the mean performances
        :return: mean of metrics, std of metrics
        """
        res = []
        for subject in self.subjects:
            res_subject = ResultsSubject(
                self.model,
                self.experiment,
                self.ph,
                self.dataset,
                subject,
                legacy=self.legacy).compute_mean_std_results()
            if details:
                print(self.dataset, subject, res_subject)

            res.append(res_subject[0])  # only the mean

        keys = list(res[0].keys())
        res = [list(res_.values()) for res_ in res]
        mean, std = np.nanmean(res, axis=0), np.nanstd(res, axis=0)
        return dict(zip(keys, mean)), dict(zip(keys, std))
Ejemplo n.º 14
0
def main(dataset,
         subject,
         model,
         params,
         exp,
         mode,
         log,
         ph,
         plot,
         save=False):
    printd(dataset, subject, model, params, exp, mode, log, ph, plot)

    # retrieve model's parameters
    search = locate_search(params)
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq
    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f,
                                                day_len_f)
    start = time.time()
    """ MODEL TRAINING & TUNING """
    if search:
        params = find_best_hyperparameters(subject, model_class, params,
                                           search, ph_f, train, valid, test)

    if save:
        dir = os.path.join(cs.path, "processing", "models", "weights",
                           model_class.__name__, exp)
        file = os.path.join(dir,
                            model_class.__name__ + "_" + dataset + subject)
    else:
        file = None

    raw_results = make_predictions(subject,
                                   model_class,
                                   params,
                                   ph_f,
                                   train,
                                   valid,
                                   test,
                                   mode=mode,
                                   save_model_file=file)
    """ POST-PROCESSING """
    raw_results = postprocessing(raw_results, scalers, dataset)
    """ EVALUATION """
    results = ResultsSubject(model,
                             exp,
                             ph,
                             dataset,
                             subject,
                             params=params,
                             results=raw_results)
    printd(results.compute_mean_std_results())
    end = time.time()
    printd("Time elapsed : " + str(end - start) + " seconds")
    if plot:
        results.plot(0)
Ejemplo n.º 15
0
 def compute_results_iter_split(self, iter=0, split=0):
     res = [self.results[split][iter]]
     results_subject = ResultsSubject(self.model, self.experiment, self.ph,
                                      self.dataset, self.subject,
                                      self.params, res)
     return results_subject.compute_mean_std_results()
    def erroneous_prediction_analysis_per_subject(self, dataset, subject):
        """
        Compute responsability of P-EGA and R-EGA (or both of them) on EP prediction forn given dataset and subject
        :param dataset: name of dataset (e.g., "dataset")
        :param subject: name of subject (e.g., "subject")
        :return: array of shape (3 - hypo/eu/hyper, 3 - P/R/P-R) of mean responsability,
                array of shape (3 - hypo/eu/hyper, 3 - P/R/P-R) of std responsability
        """
        res = ResultsSubject(self.model, self.exp, 30, dataset,
                             subject).results
        cg_ega_arr = [
            CG_EGA(
                res_split,
                misc.datasets.datasets[dataset]["glucose_freq"]).per_sample()
            for res_split in res
        ]

        hypo, eu, hyper = [], [], []
        for cg_ega_split in cg_ega_arr:
            cg_ega_split["reason"] = ""
            cg_ega_split = cg_ega_split.loc[cg_ega_split.CG_EGA ==
                                            "EP"].dropna()
            hypo_split = cg_ega_split[cg_ega_split.y_true <= 70]
            eu_split = cg_ega_split[(cg_ega_split.y_true > 70)
                                    & (cg_ega_split.y_true <= 180)]
            hyper_split = cg_ega_split[cg_ega_split.y_true > 180]

            if not hypo_split.empty:
                hypo_split.loc[(hypo_split.P_EGA == "A"), "reason"] = "R"
                hypo_split.loc[(~(hypo_split.P_EGA == "A")), "reason"] = "B"
                hypo_split.loc[((hypo_split.R_EGA == "A") |
                                (hypo_split.R_EGA == "B")), "reason"] = "P"
            hypo.append(hypo_split)

            if not eu_split.empty:
                eu_split.loc[((eu_split.P_EGA == "A") |
                              (eu_split.P_EGA == "B")), "reason"] = "R"
                eu_split.loc[(eu_split.P_EGA == "C"), "reason"] = "B"
                eu_split.loc[(eu_split.P_EGA == "C") &
                             ((eu_split.R_EGA == "A") |
                              (eu_split.R_EGA == "B")), "reason"] = "P"
            eu.append(eu_split)

            if not hyper_split.empty:
                hyper_split.loc[((hyper_split.P_EGA == "A") |
                                 (hyper_split.P_EGA == "B")), "reason"] = "R"
                hyper_split.loc[(hyper_split.P_EGA == "C") |
                                (hyper_split.P_EGA == "D") |
                                (hyper_split.P_EGA == "E"), "reason"] = "B"
                hyper_split.loc[((hyper_split.R_EGA == "A") |
                                 (hyper_split.R_EGA == "B")) &
                                ((hyper_split.P_EGA == "C") |
                                 (hyper_split.P_EGA == "D") |
                                 (hyper_split.P_EGA == "E")), "reason"] = "P"
            hyper.append(hyper_split)

        def stats_region_reason(region):
            total_arr = np.array([split.__len__() for split in region])
            p_arr = np.array([
                split.loc[(split.reason == "P")].__len__() for split in region
            ]) / total_arr
            r_arr = np.array([
                split.loc[(split.reason == "R")].__len__() for split in region
            ]) / total_arr
            b_arr = np.array([
                split.loc[(split.reason == "B")].__len__() for split in region
            ]) / total_arr
            return [np.nanmean(p_arr),
                    np.nanmean(r_arr),
                    np.nanmean(b_arr)
                    ], [np.nanstd(p_arr),
                        np.nanstd(r_arr),
                        np.nanstd(b_arr)]

        hypo_ep_mean, hypo_ep_std = stats_region_reason(hypo)
        eu_ep_mean, eu_ep_std = stats_region_reason(eu)
        hyper_ep_mean, hyper_ep_std = stats_region_reason(hyper)

        return np.r_[hypo_ep_mean, eu_ep_mean,
                     hyper_ep_mean], np.r_[hypo_ep_std, eu_ep_std,
                                           hyper_ep_std]
    def plot_importance_bad_pega_in_loss(self, c=None):
        """
        Plot relative importance of P-C/D/E (and R) on loss depending on P-A/B scaling factor
        :param c: coherence factor weighing MSE of predicted variations compared to MSE of predictions
        :return:
        """
        scaling_arr = np.logspace(0, 10, num=21, base=2)

        cde_impact_ds, r_impact_ds = [], []
        for dataset in ["idiab", "ohio"]:
            cde_impact_sbj, r_impact_sbj = [], []
            for subject in misc.datasets.datasets[dataset]["subjects"]:
                res = ResultsSubject(self.model, self.exp, 30, dataset,
                                     subject).results
                cg_ega_arr = [
                    CG_EGA(res_split, misc.datasets.datasets[dataset]
                           ["glucose_freq"]).per_sample() for res_split in res
                ]

                cde_impact_arr, r_impact_arr = [], []
                for scaling in scaling_arr:
                    cde_impact_tmp, r_impact_tmp = [], []
                    for cg_ega_split in cg_ega_arr:
                        ab_ind = (cg_ega_split["P_EGA"]
                                  == "A") | (cg_ega_split["P_EGA"] == "B")
                        cg_ega_ab, cg_ega_cde = cg_ega_split.loc[
                            ab_ind], cg_ega_split.loc[~ab_ind]
                        ab_loss = ((cg_ega_ab.y_true - cg_ega_ab.y_pred)**
                                   2).sum() / scaling
                        cde_loss = ((cg_ega_cde.y_true -
                                     cg_ega_cde.y_pred)**2).sum()
                        if c is not None:
                            r_ega = cg_ega_split.loc[~(
                                (cg_ega_split["R_EGA"] == "A") |
                                (cg_ega_split["R_EGA"] == "B"))]
                            r_loss = (
                                (r_ega.dy_true - r_ega.dy_pred)**2).sum() * c
                            total_loss = ab_loss + cde_loss + r_loss
                            r_impact_in_loss = r_loss / total_loss
                            r_impact_tmp.append(r_impact_in_loss)
                        else:
                            total_loss = ab_loss + cde_loss
                        cde_impact_in_loss = cde_loss / total_loss
                        cde_impact_tmp.append(cde_impact_in_loss)

                    cde_impact_arr.append(np.mean(cde_impact_tmp))
                    if c is not None:
                        r_impact_arr.append(np.mean(r_impact_tmp))

                cde_impact_sbj.append(cde_impact_arr)
                if c is not None:
                    r_impact_sbj.append(r_impact_arr)

            cde_impact_ds.append([
                np.mean(cde_impact_sbj, axis=0),
                np.std(cde_impact_sbj, axis=0)
            ])
            if c is not None:
                r_impact_ds.append([
                    np.mean(r_impact_sbj, axis=0),
                    np.std(r_impact_sbj, axis=0)
                ])

        for ds, edgecolor, facecolor in zip(cde_impact_ds,
                                            ["#25416d", "#004e11"],
                                            ["#afccff", "#b7ffc5"]):
            plt.fill_between(scaling_arr,
                             ds[0] - ds[1],
                             ds[0] + ds[1],
                             alpha=0.5,
                             edgecolor=edgecolor,
                             facecolor=facecolor)

        if c is not None:
            for ds, edgecolor, facecolor in zip(r_impact_ds,
                                                ["#8e0606", '#CC4F1B'],
                                                ["#ffb7b7", '#FF9848']):
                plt.fill_between(scaling_arr,
                                 ds[0] - ds[1],
                                 ds[0] + ds[1],
                                 alpha=0.5,
                                 edgecolor=edgecolor,
                                 facecolor=facecolor)

        if c is not None:
            plt.legend([
                "idiab_p_cde_impact", "ohio_p_cde_impact", "idiab_r_impact",
                "ohio_r_impact"
            ])
        else:
            plt.legend(["idiab_p_cde_impact", "ohio_p_cde_impact"])

        for ds, edgecolor, facecolor in zip(cde_impact_ds,
                                            ["#25416d", "#004e11"],
                                            ["#afccff", "#b7ffc5"]):
            plt.plot(scaling_arr, ds[0], color=edgecolor)

        if c is not None:
            for ds, edgecolor, facecolor in zip(r_impact_ds,
                                                ["#8e0606", '#CC4F1B'],
                                                ["#ffb7b7", '#FF9848']):
                plt.plot(scaling_arr, ds[0], color=edgecolor)

        plt.xlabel("P-A/B scaling factor")
        plt.ylabel("relative importance of P-C/D/E samples on loss")
        plt.title(
            "relative importance of P-C/D/E (and R) on loss depending on P-A/B scaling factor"
        )