"MIMIC": percentage_sigs_mimic,
        "eICU": percentage_sigs_eicu
    }
    stats_dir = args.stats_dir

    if not os.path.exists(f"{stats_dir}/DA/"):
        os.makedirs(f"{stats_dir}/DA/")

    with open(f"{stats_dir}/DA/percentage_sigs.pkl", "wb") as f:
        pickle.dump(percentage_sigs, f)

    del all_eicu, all_mimic

    for mimic_model_info, eicu_model_info in zip(
            init_models(input_dim=len(feature_names_eicu),
                        selection=args.models,
                        origin="MIMIC"),
            init_models(input_dim=len(feature_names_eicu),
                        selection=args.models,
                        origin="eICU"),
    ):
        print(mimic_model_info[2])
        ood_detect_aucs, ood_recall = (
            defaultdict(lambda: defaultdict(list)),
            defaultdict(lambda: defaultdict(list)),
        )
        metrics = defaultdict(lambda: defaultdict(list))

        ood_detect_aucs, ood_recall, metrics = ood_utils.run_ood_experiment_on_group(
            id_data=mimic_data,
            ood_data=eicu_data,
    y_name = dh.load_target_name()

    pipe = pipeline.Pipeline([("scaler", StandardScaler()),
                              ("imputer", SimpleImputer())])

    pipe.fit(train_data[feature_names])
    X_train = pipe.transform(train_data[feature_names])
    X_test = pipe.transform(test_data[feature_names])
    X_val = pipe.transform(val_data[feature_names])

    uncertainties = defaultdict(list)

    for ne, scoring_funcs, method_name in init_models(
            input_dim=len(feature_names),
            selection=args.models,
            origin=args.data_origin,
    ):
        print(method_name)
        predictions = []

        for i in tqdm(range(N_SEEDS)):
            ne.train(X_train, train_data[y_name].values, X_val,
                     val_data[y_name].values)

            for scoring_func in scoring_funcs:
                uncertainties[scoring_func] += [
                    ne.get_novelty_score(X_test, scoring_func=scoring_func)
                ]
                print(len(uncertainties[scoring_func][0]))
    # Loading the data
    data_loader = load_data_from_origin(args.data_origin)
    dh = DataHandler(**data_loader)
    feature_names = dh.load_feature_names()

    train_data, test_data, val_data = dh.load_data_splits()
    y_name = dh.load_target_name()

    if args.data_origin in MIMIC_ORIGINS:
        train_newborns, test_newborns, val_newborns = dh.load_other_groups("newborns")

    ood_mappings = dh.load_ood_mappings()

    # loop over the different methods
    for model_info in init_models(
        input_dim=len(feature_names), selection=args.models, origin=args.data_origin
    ):
        print(model_info[2])
        ood_detect_aucs, ood_recall = (
            defaultdict(lambda: defaultdict(list)),
            defaultdict(lambda: defaultdict(list)),
        )
        metrics = defaultdict(lambda: defaultdict(list))

        # Experiments on Newborns, only on MIMIC for now
        if args.data_origin in MIMIC_ORIGINS:
            id_data = DomainData(
                train_data, test_data, val_data, feature_names, y_name, "in-domain"
            )
            ood_data = DomainData(
                train_newborns,