コード例 #1
0
ファイル: training.py プロジェクト: benSlash2/stage_MSV
def evaluate(epoch, early_stopping, model, loss_func, dls):
    """ evaluate the data loaders after 1 epoch of training_old """
    dls_names = ["[train]", "[valid]"]
    with torch.no_grad():
        loss = []
        for dl, name in zip(dls, dls_names):
            if loss_func.__class__.__name__ == "DALoss":
                losses, mse, nll, nums = zip(
                    *[loss_batch(model, loss_func, xb, yb) for xb, yb in dl])
                sum_ = np.sum(nums)
                loss_dl = [
                    np.sum(np.multiply(losses, nums)) / sum_,
                    np.sum(np.multiply(mse, nums)) / sum_,
                    np.sum(np.multiply(nll, nums)) / sum_
                ]
                es_loss = loss_dl[1]
            else:
                losses, nums = zip(
                    *[loss_batch(model, loss_func, xb, yb) for xb, yb in dl])
                loss_dl = np.sum(np.multiply(losses, nums)) / np.sum(nums)
                es_loss = loss_dl

            loss.append(loss_dl)

            if name == "[valid]":
                early_stopping(es_loss, model, epoch)

    res = np.r_[epoch, np.c_[dls_names, loss].ravel()]
    printd(*res)

    return early_stopping, res
コード例 #2
0
ファイル: training.py プロジェクト: benSlash2/stage_MSV
def fit(epochs, batch_size, model, loss_func, opt, train_ds, valid_ds,
        patience, checkpoint_file):
    """ fit the model on the training_old data given the loss, optimizer, batch size, epochs,
        and early_stopping patience """
    train_dl, valid_dl = create_dataloaders_from_datasets(
        train_ds, valid_ds, batch_size)

    early_stopping = EarlyStopping(patience=patience, path_=checkpoint_file)

    model.eval()
    early_stopping, res = evaluate(0, early_stopping, model, loss_func,
                                   [train_dl, valid_dl])

    for epoch in range(epochs):
        model.train()
        zip(*
            [loss_batch(model, loss_func, xb, yb, opt) for xb, yb in train_dl])

        model.eval()
        early_stopping, res = evaluate(epoch, early_stopping, model, loss_func,
                                       [train_dl, valid_dl])

        if early_stopping.early_stop:
            printd("Early Stopped.")
            break

    early_stopping.save()
コード例 #3
0
def main_standard(dataset, subject, model, params, exp, eval_set, ph):
    printd(dataset, subject, model, params, exp, eval_set, ph)

    # retrieve model's parameters
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq
    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f,
                                                day_len_f)
    """ MODEL TRAINING """
    raw_results = make_predictions_pclstm(subject,
                                          model_class,
                                          params,
                                          ph_f,
                                          train,
                                          valid,
                                          test,
                                          scalers,
                                          mode=eval_set)
    """ POST-PROCESSING """
    raw_results = postprocessing(raw_results, scalers, dataset)
    """ EVALUATION """
    ResultsSubject(model,
                   exp,
                   ph,
                   dataset,
                   subject,
                   params=params,
                   results=raw_results).save_raw_results()
コード例 #4
0
ファイル: main.py プロジェクト: dotXem/GLYFE
def main(dataset, subject, model, params, exp, mode, log, ph, plot):
    printd(dataset, subject, model, params, exp, mode, log, ph, plot)

    # retrieve model's parameters
    search = locate_search(params)
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq

    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f, day_len_f)
    """ MODEL TRAINING & TUNING """
    if search:
        params = find_best_hyperparameters(subject, model_class, params, search, ph_f, train, valid, test)

    raw_results = make_predictions(subject, model_class, params, ph_f, train, valid, test, mode=mode)
    """ POST-PROCESSING """
    raw_results = postprocessing(raw_results, scalers, dataset)

    """ EVALUATION """
    results = ResultsSubject(model, exp, ph, dataset, subject, params=params, results=raw_results)
    printd(results.compute_results())
    if plot:
        results.plot(0)
コード例 #5
0
ファイル: visualization.py プロジェクト: benSlash2/stage_MSV
def comparison_all(mode, variables, metrics, patients=None):
    compare_dict = {}
    compare_mean = {}
    if patients is None:
        printd(
            "-------------------------------- Global -------------------------------"
        )
        file = os.path.join(cs.path, "study", "idiab", "lstm", mode,
                            "metrics.npy")
        param, results = np.load(file, allow_pickle=True)
        compare_dict["global"] = {}
        compare_mean["global"] = {}
        for variable in variables:
            compare_dict["global"][variable], compare_mean["global"][
                variable] = comparison(results, variable, metrics)
            print_dict_stats_physio(compare_dict["global"][variable], variable)
        print_dict_latex_physio(compare_mean["global"])

    else:
        for i in patients:
            printd("-------------------------------- Patient", str(i),
                   "--------------------------------")
            file = os.path.join(cs.path, "study", "idiab", "lstm", mode,
                                "patient " + str(i), "results.npy")
            param, results = np.load(file, allow_pickle=True)
            compare_dict["patient " + str(i)] = {}
            compare_mean["patient " + str(i)] = {}
            for variable in variables:
                compare_dict["patient " + str(i)][variable], compare_mean["patient " + str(i)][variable] = \
                    comparison(results, variable, metrics)
                print_dict_stats_physio(
                    compare_dict["patient " + str(i)][variable], variable, i)
            print_dict_latex_physio(compare_mean["patient " + str(i)], i)
    return compare_dict, compare_mean
コード例 #6
0
    def fit(self):
        # get training data
        x_train, y_train, t_train = self._str2dataset("train")
        x_valid, y_valid, t_valid = self._str2dataset("valid")

        # save model
        rnd = np.random.randint(1e7)
        self.checkpoint_file = os.path.join(
            cs.path, "tmp", "checkpoints",
            self.__class__.__name__ + "_" + str(rnd) + ".pt")
        printd("Saved model's file:", self.checkpoint_file)

        self.model = self.FFNN_Module(x_train.shape[1], self.params["hidden"],
                                      self.params["cell_type"],
                                      self.params["dropout"])
        self.model.cuda()
        self.loss_func = nn.MSELoss()
        self.opt = torch.optim.Adam(self.model.parameters(),
                                    lr=self.params["lr"],
                                    weight_decay=self.params["l2"])

        train_ds = self.to_dataset(x_train, y_train)
        valid_ds = self.to_dataset(x_valid, y_valid)

        fit(self.params["epochs"], self.params["batch_size"], self.model,
            self.loss_func, self.opt, train_ds, valid_ds,
            self.params["patience"], self.checkpoint_file)
コード例 #7
0
ファイル: early_stopping.py プロジェクト: dotXem/GLYFE
 def save_checkpoint(self, val_loss, model):
     '''Saves model when validation loss decrease.'''
     if self.verbose:
         printd(
             f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...'
         )
     torch.save(model.state_dict(), self.path)
     self.val_loss_min = val_loss
コード例 #8
0
 def _str2dataset(self, dataset_name):
     if dataset_name in ["train", "training"]:
         return self.train_x, self.train_y, self.train_t
     elif dataset_name in ["valid", "validation"]:
         return self.valid_x, self.valid_y, self.valid_t
     elif dataset_name in ["test", "testing"]:
         return self.test_x, self.test_y, self.test_t
     else:
         printd("Dataset name not known.")
         sys.exit(-1)
コード例 #9
0
 def save_raw_results(self):
     """
     Save the results and params
     :return:
     """
     dir = os.path.join(cs.path, "study", self.dataset, self.model,
                        self.mode, "patient " + self.subject)
     Path(dir).mkdir(parents=True, exist_ok=True)
     savable_results = self.compute_results()
     printd("Global results for patient", self.subject,
            " with all experiments saved at", dir)
     np.save(os.path.join(dir, "results.npy"),
             [self.compute_params(), savable_results])
コード例 #10
0
def main_cgega_iterative_training(dataset,
                                  subject,
                                  model,
                                  params1,
                                  params2,
                                  exp,
                                  eval_set,
                                  ph,
                                  save_iter=False):
    printd(dataset, subject, model, params1, params2, exp, eval_set, ph)

    # retrieve model's parameters
    params1 = locate_params(params1)
    params2 = locate_params(params2)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params1["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq
    freq_ds = misc.datasets.datasets[dataset]["glucose_freq"]
    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f,
                                                day_len_f)
    """ MODEL TRAINING """
    dir = join(cs.path, "processing", "models", "weights", "cg_ega")
    file = join(dir, exp, model_class.__name__ + "_" + dataset + subject)

    results_test, results_valid_iter = progressive_improvement_clinical_acceptability(
        subject, model_class, params1, params2, ph, freq_ds, train, valid,
        test, scalers, file, eval_set)

    results_test = postprocessing(results_test, scalers, dataset)
    results_valid_iter = postprocessing_all_iter(results_valid_iter, scalers,
                                                 dataset)

    ResultsSubject(model,
                   exp,
                   ph,
                   dataset,
                   subject,
                   params=[params1, params2],
                   results=results_test).save_raw_results()
    if save_iter:
        ResultsSubjectPICA(model,
                           exp,
                           ph,
                           dataset,
                           subject,
                           params=[params1, params2],
                           results=results_valid_iter).save_raw_results()
コード例 #11
0
    def predict(self, dataset):
        # get the data for which we make the predictions
        [endog, exog, exog_oos, y_true, t] = self.data_dict[dataset]
        ph = self.ph

        y_pred = []
        for endog_i, exog_i, exog_oos_i in zip(endog, exog, exog_oos):
            model = self.model.apply(endog_i, exog_i)
            preds = model.forecast(steps=ph, exog=exog_oos_i)
            y_pred.append(preds[-1])

        printd("end predict")

        return self._format_results(y_true, y_pred, t)
コード例 #12
0
 def params_search(grid):
     results = []
     for params_tmp in grid:
         res = make_predictions(subject,
                                model_class,
                                params_tmp,
                                ph,
                                train,
                                valid,
                                test,
                                mode="valid")
         results.append([rmse(res_) for res_ in res])
         printd(params_tmp, results[-1])
     return grid[np.argmin(np.mean(np.transpose(results), axis=0))]
コード例 #13
0
def preprocessing_source_multi(source_datasets, target_dataset, target_subject,
                               ph, hist, day_len):
    """
    Preprocessing for multi-source training :
    - preprocess all the subjects from the source dataset, exluding the target subject if it is from the same dataset;
    - affect a class number to every subject;
    - merge the training and validation sets, and set the testing set as validation;
    - merge the sets from all the patients.
    :param source_datasets: name of the source datasets, separated by a "+" if several (e.g., "idiab+ohio")
    :param target_dataset: target dataset (i.e., "idiab" or "ohio")
    :param target_subject: target subject within target dataset (e.g, "559" if target_dataset is "ohio")
    :param ph: prediction horizon
    :param hist: history length
    :param day_len: length of day
    :return:
    """
    train_ds, valid_ds, test_ds, scalers_ds = [], [], [], []
    subject_domain = 0
    for source_dataset in source_datasets.split("+"):
        for source_subject in misc.datasets.datasets[source_dataset][
                "subjects"]:
            if target_dataset == source_dataset and target_subject == source_subject:
                continue

            printd("Preprocessing " + source_dataset + source_subject + "...")

            n_days_test = misc.datasets.datasets[source_dataset]["n_days_test"]
            train_sbj, valid_sbj, test_sbj, scalers_sbj = preprocessing_per_dataset[
                source_dataset](source_dataset, source_subject, ph, hist,
                                day_len, n_days_test)

            # no cross-validation when source training, train and valid are concatenated, and we evaluate on test
            train, valid, test = pd.concat([
                train_sbj[0], valid_sbj[0]
            ]).sort_values("datetime"), test_sbj[0], test_sbj[0]

            # add subject domain
            train["domain"], valid["domain"], test[
                "domain"] = subject_domain, subject_domain, subject_domain
            subject_domain += 1

            for ds, set in zip([train_ds, valid_ds, test_ds, scalers_ds],
                               [train, valid, test, scalers_sbj[0]]):
                ds.append(set)

    train_ds, valid_ds, test_ds = [
        pd.concat(ds) for ds in [train_ds, valid_ds, test_ds]
    ]

    return [train_ds], [valid_ds], [test_ds], scalers_ds
コード例 #14
0
ファイル: main_tl.py プロジェクト: benSlash2/stage_MSV
def evaluation(raw_results, scalers, source_dataset, target_dataset, target_subject, model, params, exp, plot, tl_mode):
    raw_results = postprocessing(raw_results, scalers, target_dataset)

    exp += "_" + tl_mode.split("_")[1]
    exp = os.path.join(source_dataset + "_2_" + target_dataset, exp)
    results = ResultsSubject(model.__name__, exp, ph, target_dataset, target_subject, params=params,
                             results=raw_results)

    res_mean = results.compute_mean_std_results()
    printd(res_mean)
    if plot:
        results.plot(0)

    return res_mean
コード例 #15
0
def combinations(dataset, model, params, mode, ph, features_comb, number_comb,
                 patients):
    """
    Return a set of combinations which will be used during the processing phase.
    :param dataset: samples Dataframe
    :param model: constant for model
    :param params: choose to display the
    :param mode:
    :param ph:
    :param features_comb:
    :param number_comb:
    :param patients:
    :return: list of combinations, list of patients
    """
    if features_comb is None:
        all_feat = all_features(dataset)
    else:
        all_feat = features_comb.split(',')

    combs = []
    if number_comb is None:
        number_comb = range(0, len(all_feat) + 1)
    else:
        number_comb = list(map(int, number_comb.split(',')))

    for i in number_comb:
        els = [list(x) for x in itertools.combinations(all_feat, i)]
        combs.extend(els)

    combs = [
        ele for ele in combs if ("CPB" not in ele or "CHO" not in ele) and (
            "IOB" not in ele or "insulin" not in ele) and (
                "AOB" not in ele or "steps" not in ele)
    ]
    # 107 combinations * 6 patients * 5 seeds * 5 sets = 32100 models to train !!

    if patients is None:
        patients = range(1, 7)
    else:
        patients = list(map(int, patients.split(',')))

    printd("Dataset:", dataset, "-------- Patients:",
           ", ".join(str(patient) for patient in patients),
           "-------- Features:", "glucose,", ", ".join(all_feat),
           "-------- Model:", model, "-------- Params:", params,
           "-------- Mode:", mode, "-------- Horizon:", ph, "minutes")
    return all_feat, combs, patients
コード例 #16
0
ファイル: main.py プロジェクト: dotXem/GlucosePredictionATL
def process_main_args(args):
    Model = locate_model(args.model)
    params = locate_params(args.params)

    # redirect the logs to a file if specified
    if args.log is not None:
        log_file = args.log
        log_path = os.path.join(path, "logs", log_file)
        sys.stdout = open(log_path, "w")

    sbj_msg = args.source_dataset + "_2_" + args.target_dataset, " " + args.target_subject
    if args.tl_mode == "source_training":
        printd("source_training", sbj_msg)
        main_source_training(args.source_dataset, args.target_dataset,
                             args.target_subject, Model, params, args.weights,
                             args.eval_mode)
    elif args.tl_mode == "target_training":
        printd("target_training", sbj_msg)
        main_target_training(args.source_dataset, args.target_dataset,
                             args.target_subject, Model, params,
                             args.eval_mode, args.exp, args.plot)
    elif args.tl_mode == "target_global":
        printd("target_global", sbj_msg)
        main_target_global(args.source_dataset, args.target_dataset,
                           args.target_subject, Model, params, args.weights,
                           args.eval_mode, args.exp, args.plot)
    elif args.tl_mode == "target_finetuning":
        printd("target_finetuning", sbj_msg)
        main_target_finetuning(args.source_dataset, args.target_dataset,
                               args.target_subject, Model, params,
                               args.weights, args.eval_mode, args.exp,
                               args.plot)
    elif args.tl_mode == "end_to_end" and args.params_ft is not None:
        printd("end_to_end", sbj_msg)

        params_ft = locate_params(args.params_ft)

        main_source_training(args.source_dataset, args.target_dataset,
                             args.target_subject, Model, params, args.weights,
                             args.eval_mode)
        main_target_global(args.source_dataset, args.target_dataset,
                           args.target_subject, Model, params_ft, args.weights,
                           args.eval_mode, args.exp, args.plot)
        main_target_finetuning(args.source_dataset, args.target_dataset,
                               args.target_subject, Model, params_ft,
                               args.weights, args.eval_mode, args.exp,
                               args.plot)
コード例 #17
0
    def __init__(self, subject, ph, params, train, valid, test):
        super().__init__(subject, ph, params, train, valid, test)
        x_train, y_train, t_train = self._str2dataset("train")
        # save model
        rnd = np.random.randint(int(1e7))
        self.checkpoint_file = os.path.join(cs.path, "tmp", "checkpoints",
                                            "lstm_" + str(rnd) + ".pt")
        printd("Saved model's file:", self.checkpoint_file)

        self.model = self.LstmModule(x_train.shape[2], self.params["hidden"],
                                     self.params["dropout_weights"],
                                     self.params["dropout_layer"])
        self.model.cuda()
        self.loss_func = nn.MSELoss()
        self.opt = torch.optim.Adam(self.model.parameters(),
                                    lr=self.params["lr"],
                                    weight_decay=self.params["l2"])
コード例 #18
0
ファイル: visualization.py プロジェクト: benSlash2/stage_MSV
def top_model_all(mode, metrics, patients=None):
    best = {}
    if patients is None:
        printd(
            "-------------------------------- Global -------------------------------"
        )
        file = os.path.join(cs.path, "study", "idiab", "lstm", mode,
                            "metrics.npy")
        param, results = np.load(file, allow_pickle=True)
        best["global"] = top_model(results, metrics)
    else:
        for i in patients:
            printd("-------------------------------- Patient", str(i),
                   "--------------------------------")
            file = os.path.join(cs.path, "study", "idiab", "lstm", mode,
                                "patient " + str(i), "results.npy")
            param, results = np.load(file, allow_pickle=True)
            best["patient " + str(i)] = top_model(results, metrics, i)
    return best
コード例 #19
0
    def __call__(self, val_loss, model, epoch):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.best_model = copy.deepcopy(model.state_dict())
            self.val_loss_min = val_loss
            # self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            printd(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.best_model = copy.deepcopy(model.state_dict())
            self.val_loss_min = val_loss
            self.counter = 0
コード例 #20
0
ファイル: study.py プロジェクト: benSlash2/stage_MSV
def study(dataset, model, params, mode, ph, all_feat, patients, combs):
    # retrieve model's parameters
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq

    # full processing
    for i in patients:
        dir = os.path.join(cs.path, "study", dataset, model, mode,
                           "patient " + str(i))
        """ PREPROCESSING ALL FEATURES"""
        printd("Preprocessing patient " + str(i))
        data = preprocessing_full(dataset, str(i), ph_f, hist_f, day_len_f,
                                  all_feat)

        for ele in combs:
            printd("Preprocessing patient", str(i),
                   "with features glucose " + " + ".join(ele))
            train, valid, test, scalers = preprocessing_select(
                data, dataset, day_len_f, all_feat, ele)

            for j in range(5):
                torch.manual_seed(j)
                """ MODEL TRAINING & TUNING """
                if not ele:
                    file = os.path.join(dir, "reference", "seed " + str(j),
                                        "weights", "weights")
                else:
                    file = os.path.join(dir, " + ".join(ele), "seed " + str(j),
                                        "weights", "weights")
                raw_results = make_predictions(str(i),
                                               model_class,
                                               params,
                                               ph_f,
                                               train,
                                               valid,
                                               test,
                                               mode=mode,
                                               save_model_file=file)
                """ POST-PROCESSING """
                raw_results = postprocessing(raw_results, scalers, dataset)
                """ EVALUATION """
                if not ele:
                    file_save = os.path.join("reference", "seed " + str(j))
                else:
                    file_save = os.path.join(" + ".join(ele), "seed " + str(j))
                results = ResultsSubject(model,
                                         file_save,
                                         ph,
                                         dataset,
                                         str(i),
                                         params=params,
                                         results=raw_results,
                                         study=True,
                                         mode=mode)
                printd(results.compute_mean_std_results())
コード例 #21
0
def preprocessing_idiab(dataset, subject, ph, hist, day_len, n_days_test):
    """
    Idiab dataset preprocessing pipeline:
    loading -> remove anomalies -> resample -> remove last day -> samples creation -> cleaning (1st) -> features
    selection -> splitting -> cleaning (2nd) -> standardization

    First cleaning is done before splitting to speedup the preprocessing

    :param dataset: name of the dataset, e.g. "idiab"
    :param subject: id of the subject, e.g. "1"
    :param ph: prediction horizon, e.g. 30
    :param hist: history length, e.g. 60
    :param day_len: length of a day normalized by sampling frequency, e.g. 288 (1440/5)
    :param n_days_test:
    :return: training folds, validation folds, testing folds, list of scaler (one per fold)
    """
    printd("Preprocessing " + dataset + subject + "...")
    data = load(dataset, subject)
    data = remove_anomalies(data)
    data = resample(data, cs.freq)
    data = remove_last_day(data)
    # data["CHO"] = CPB(data, cs.C_bio, cs.t_max)
    # data["insulin"] = IOB(data, cs.K_DIA)
    # data["steps"] = AOB(data, cs.k_s)
    data = create_samples(data, ph, hist, day_len)
    data = fill_nans(data, day_len, n_days_test)
    to_drop = ["calories", "heartrate", "mets", "steps"]
    for col in data.columns:
        for ele in to_drop:
            if ele in col:
                data = data.drop(col, axis=1)
                break

    train, valid, test = split(data, day_len, n_days_test, cs.cv)
    [train, valid, test] = [remove_nans(set_) for set_ in [train, valid, test]]
    train, valid, test, scalers = standardize(train, valid, test)
    print(test[0].shape)
    return train, valid, test, scalers
コード例 #22
0
    def local_domain_perplexity(self, n_neighbours, reduce_tsne=False, save_file=None):
        """
        Compute the local domain perplexity metric (LDP) for every target subjects and splits
        :param n_neighbours: size of neighbourhood
        :param reduce_tsne: if the features need to be reduced to 2D with t-SNE
        :param save_file: if the end results should be saved
        :return: mean and std of the LDP metrics
        """
        ldp_arr = []
        for target_subject in self.target_subjects:
            printd("Perplexity " + self.target_dataset + target_subject)

            for split in range(misc.constants.cv):
                features, domains = self._compute_features(target_subject, split)
                if reduce_tsne:
                    features = self._compute_tsne_features(features)

                ldp_arr.append(local_domain_perplexity(features, domains, n_neighbours))

        if save_file is not None:
            np.save(save_file, ldp_arr)

        return np.mean(ldp_arr, axis=0), np.std(ldp_arr, axis=0)
コード例 #23
0
def remove_anomalies(data, anomalies_threshold=2.5, n_run=5, disp=False):
    """
    Remove glucose anomalies within the signals.
    :param data: time-series Dataframe
    :param anomalies_threshold: anomaly detection threshold
    :param n_run: number of times to run the algorithm
    :param disp: if the results of the removal shall be plotted and printed
    :return: Dataframe with no anomaly
    """
    data_no_anomaly = data.copy()
    for i in range(n_run):
        anomalies_indexes = detect_glucose_readings_anomalies(
            data_no_anomaly, threshold=anomalies_threshold)
        data_no_anomaly = data_no_anomaly.drop(anomalies_indexes, axis=0)
        data_no_anomaly = data_no_anomaly.reset_index(drop=True)
        if disp:
            printd("[iter {}] Number of anomalies removed : {}".format(
                i, len(anomalies_indexes)))

    if disp:
        plot(data, data_no_anomaly)

    return data_no_anomaly
コード例 #24
0
ファイル: visualization.py プロジェクト: benSlash2/stage_MSV
def visualization_old(patients, mode):
    for i in patients:
        printd("-------------------------------- Patient", str(i),
               "--------------------------------")
        file = os.path.join(cs.path, "study", "idiab", "lstm", mode,
                            "patient " + str(i), "results.npy")
        param, results = np.load(file, allow_pickle=True)
        mean_rmse = {key: results[key][0]["RMSE"] for key in results.keys()}
        min_rmse = min(mean_rmse, key=lambda k: mean_rmse[k])
        printd("Ref", results["reference"][0]["RMSE"])
        printd(results[min_rmse][0]["RMSE"] / results["reference"][0]["RMSE"])
        printd("The best RMSE model for patient", str(i), "is", min_rmse,
               "with ", results[min_rmse])
        mean_mape = {key: results[key][0]["MAPE"] for key in results.keys()}
        min_mape = min(mean_mape, key=lambda k: mean_mape[k])
        printd("The best MAPE model for patient", str(i), "is", min_mape,
               "with ", results[min_mape])
        mean_mase = {key: results[key][0]["MASE"] for key in results.keys()}
        min_mase = min(mean_mase, key=lambda k: mean_mase[k])
        printd("The best MASE model for patient", str(i), "is", min_mase,
               "with ", results[min_mase])

    printd(
        "-------------------------------- Global -------------------------------"
    )
    file = os.path.join(cs.path, "study", "idiab", "lstm", mode, "metrics.npy")
    param, results = np.load(file, allow_pickle=True)
    mean_rmse = {key: results[key][0]["RMSE"] for key in results.keys()}
    min_rmse = min(mean_rmse, key=lambda k: mean_rmse[k])
    printd("The best global RMSE model is", min_rmse, "with ",
           results[min_rmse])
    mean_mape = {key: results[key][0]["MAPE"] for key in results.keys()}
    min_mape = min(mean_mape, key=lambda k: mean_mape[k])
    printd("The best global MAPE model is", min_mape, "with ",
           results[min_mape])
    mean_mase = {key: results[key][0]["MASE"] for key in results.keys()}
    min_mase = min(mean_mase, key=lambda k: mean_mase[k])
    printd("The best global MASE model is", min_mase, "with ",
           results[min_mase])
コード例 #25
0
ファイル: main_glyfe.py プロジェクト: benSlash2/stage_MSV
def main(dataset,
         subject,
         model,
         params,
         exp,
         mode,
         log,
         ph,
         plot,
         save=False):
    printd(dataset, subject, model, params, exp, mode, log, ph, plot)

    # retrieve model's parameters
    search = locate_search(params)
    params = locate_params(params)
    model_class = locate_model(model)

    # scale variables in minutes to the benchmark sampling frequency
    ph_f = ph // cs.freq
    hist_f = params["hist"] // cs.freq
    day_len_f = cs.day_len // cs.freq
    """ PREPROCESSING """
    train, valid, test, scalers = preprocessing(dataset, subject, ph_f, hist_f,
                                                day_len_f)
    start = time.time()
    """ MODEL TRAINING & TUNING """
    if search:
        params = find_best_hyperparameters(subject, model_class, params,
                                           search, ph_f, train, valid, test)

    if save:
        dir = os.path.join(cs.path, "processing", "models", "weights",
                           model_class.__name__, exp)
        file = os.path.join(dir,
                            model_class.__name__ + "_" + dataset + subject)
    else:
        file = None

    raw_results = make_predictions(subject,
                                   model_class,
                                   params,
                                   ph_f,
                                   train,
                                   valid,
                                   test,
                                   mode=mode,
                                   save_model_file=file)
    """ POST-PROCESSING """
    raw_results = postprocessing(raw_results, scalers, dataset)
    """ EVALUATION """
    results = ResultsSubject(model,
                             exp,
                             ph,
                             dataset,
                             subject,
                             params=params,
                             results=raw_results)
    printd(results.compute_mean_std_results())
    end = time.time()
    printd("Time elapsed : " + str(end - start) + " seconds")
    if plot:
        results.plot(0)
コード例 #26
0
ファイル: deep_predictor.py プロジェクト: benSlash2/stage_MSV
 def _compute_checkpoint_file(self, model_name):
     rnd = np.random.randint(int(1e7))
     checkpoint_file = os.path.join(cs.path, "tmp", "checkpoints", model_name + "_" + str(rnd) + ".pt")
     printd("Saved model's file:", checkpoint_file)
     return checkpoint_file