예제 #1
0
                    "name": "eeg",
                    "processing": []
                }],
                "split": {
                    "train": train_split,
                    "val": validation_split,
                    "test": None,
                    "parameters_init": train_split,
                },
                "hypnogram_filename": f"{scorer}.mm"
            },
            "save_folder": EXPERIMENT_OUTPUT_FOLDER,
        }

        folder = log_experiment(**experiment_to_log,
                                num_workers=num_workers,
                                generate_memmaps=False)
        checkpoint = {"directory": folder, "net_to_load": "training/best_net"}
        trainer_eval = {
            "type": "base",
            "args": {
                "epochs": 0,
                "patience": 0,
                "num_workers": 0,
                "optimizer": {
                    "type": "adam",
                    "args": {
                        "lr": 1e-3,
                        "amsgrad": True
                    }
                },
예제 #2
0
        for dataset in datasets:
            train_split += datasets_split[dataset]["train"]
            validation_split += datasets_split[dataset]["validation"]
            test_split += datasets_split[dataset]["test"]

        experiment_to_log = {
            "memmap_description": memmaps_description[target_dataset],
            "dataset_settings": settings[target_dataset],
            "trainer_parameters": trainer_parameters,
            "normalization_parameters": model_normalization,
            "net_parameters": model_description,
            "dataset_parameters": {
                "temporal_context": TEMPORAL_CONTEXT,
                "transform_parameters": [{
                    "name": "eeg",
                    "processing": []
                }],
                "split": {
                    "train": train_split,
                    "val": validation_split,
                    "test": test_split,
                    "parameters_init": train_split,
                },
            },
            "save_folder": EXPERIMENT_OUTPUT_FOLDER,
        }

        folder = log_experiment(**experiment_to_log,
                                num_workers=num_workers,
                                generate_memmaps=False)
def run_experiment(settings,
                   memmaps_description,
                   temporal_context,
                   trainer,
                   normalization,
                   model,
                   split,
                   transform,
                   save_folder,
                   hypnogram_filename_train='hypno.mm',
                   hypnogram_filename_test='hypno.mm',
                   fold_to_run=None,
                   force=False,
                   error_tolerant=False,
                   checkpoint=None,
                   num_workers=0,
                   SEED=2019,
                   validate_with_ablation_modalities=None,
                   max_training_records=None):
    if os.path.exists(save_folder) and force:
        shutil.rmtree(save_folder)

    description_hash = memmap_hash(memmaps_description)
    print(settings["memmap_directory"])
    h5_to_memmaps(
        records=[
            settings["h5_directory"] + record
            for record in os.listdir(settings["h5_directory"])
        ],
        memmap_description=memmaps_description,
        memmap_directory=settings["memmap_directory"],
        error_tolerant=error_tolerant,
    )
    dataset_dir = settings["memmap_directory"] + description_hash + "/"
    available_dreem_records = [
        dataset_dir + record + "/" for record in os.listdir(dataset_dir)
        if ".json" not in record
    ]

    # build the folds
    rd.seed(SEED)
    rd.shuffle(available_dreem_records)

    assert split["type"] in ["loov", "kfolds"]

    if split["type"] == "kfolds":
        N_FOLDS = split["args"]["n_folds"]
        if "subjects" not in split["args"]:  # assumer record-wise split
            folds = split_list(available_dreem_records, N_FOLDS)
        else:  # assume multiple record per subject and subject-wise split
            subjects = []
            for subject in split["args"]["subjects"]:
                for record in subject["records"]:
                    if record in os.listdir(dataset_dir):
                        subjects += [subject]
                        break

            subject_per_folds = split_list(subjects, N_FOLDS)

            folds = []
            for subjects in subject_per_folds:
                record_in_fold = []
                for subject in subjects:
                    for record in subject["records"]:
                        record_in_fold += [dataset_dir + record + "/"]
                folds += [record_in_fold]

    elif split["type"] == "loov":
        # LOOV training
        folds = [[record] for record in available_dreem_records]
    else:
        raise ValueError

    if fold_to_run is None:
        fold_to_run = [j for j, _ in enumerate(folds)]

    outfolders = []
    for i, fold in enumerate(folds):

        if i in fold_to_run:
            other_folds = [fold for k, fold in enumerate(folds) if k != i]
            rd.seed(SEED + i)
            rd.shuffle(other_folds)
            n_val = max(1, int(len(other_folds) * 0.2))

            train_folds, val_folds = other_folds[n_val:], other_folds[:n_val]
            train_records = [
                record for train_fold in train_folds for record in train_fold
            ]
            val_records = [
                record for val_fold in val_folds for record in val_fold
            ]
            if max_training_records is not None:
                rd.shuffle(train_records)
                rd.shuffle(val_records)
                record_to_use_for_training = min(
                    int(max_training_records * 0.7), max_training_records - 1)
                record_to_use_for_validation = max_training_records - record_to_use_for_training
                train_records = train_records[:record_to_use_for_training]
                val_records = val_records[:record_to_use_for_validation]

            experiment_description = {
                "memmap_description": memmaps_description,
                "dataset_settings": settings,
                "trainer_parameters": trainer,
                "normalization_parameters": normalization,
                "net_parameters": model,
                "dataset_parameters": {
                    "split": {
                        "train": train_records,
                        "val": val_records,
                        "test": fold
                    },
                    "temporal_context": temporal_context,
                    "transform_parameters": transform,
                    "hypnogram_filename": hypnogram_filename_train,
                    "hypnogram_filename_test": hypnogram_filename_test
                },
                "save_folder": f"{save_folder}",
            }

            outfolders += [
                log_experiment(**experiment_description,
                               num_workers=num_workers,
                               generate_memmaps=False,
                               checkpoint=checkpoint,
                               validate_with_ablation_modalities=
                               validate_with_ablation_modalities)
            ]

    return outfolders