Esempio n. 1
0
        def apply_aec(data_type, with_noise):  # auto-encode cluster

            # Global initialization
            kmeans_ms = {}  # K-means results
            agg_ms = {}  # Agglomerative results
            gmm_ms = {}  # Gaussian Mixture Model
            GT_ms = {}  # Ground Truth

            if setting_ != 'all':
                for setting, repeats in DATA.items():

                    if str(setting) == setting_:

                        print("setting:", setting, )

                        kmeans_ms[setting] = {}
                        agg_ms[setting] = {}
                        gmm_ms[setting] = {}
                        GT_ms[setting] = {}

                        for repeat, matrices in repeats.items():
                            print("repeat:", repeat)
                            GT = matrices['GT']
                            Y = matrices['Y'].astype("float32")
                            P = matrices['P'].astype("float32")
                            Yn = matrices['Yn']
                            if len(Yn) != 0:
                                Yn = Yn.astype('float32')
                            N, V = Y.shape

                            # Quantitative case
                            if type_of_data == 'Q' or name.split('(')[-1] == 'r':
                                _, _, Yz, _, Yrng, _, = ds.preprocess_Y(Yin=Y, data_type='Q')
                                if with_noise == 1:
                                    Yn, _, Ynz, _, Ynrng, _, = ds.preprocess_Y(Yin=Yn, data_type='Q')

                            # Because there is no Yn in the case of categorical features.
                            if type_of_data == 'C':
                                enc = OneHotEncoder(sparse=False, categories='auto')
                                Y_oneHot = enc.fit_transform(Y)  # .astype("float32")  # oneHot encoding

                                # for WITHOUT follow-up rescale Y_oneHot and for WITH follow-up
                                # Y_oneHot should be replaced with Y
                                Y, _, Yz, _, Yrng, _, = ds.preprocess_Y(Yin=Y_oneHot, data_type='C')

                            if type_of_data == 'M':
                                Vq = int(np.ceil(V / 2))  # number of quantitative features -- Y[:, :Vq]
                                Vc = int(np.floor(V / 2))  # number of categorical features  -- Y[:, Vq:]
                                Y_q, _, Yz_q, _, Yrng_q, _, = ds.preprocess_Y(Yin=Y[:, :Vq], data_type='Q')
                                enc = OneHotEncoder(sparse=False, categories='auto',)
                                Y_oneHot = enc.fit_transform(Y[:, Vq:])  # oneHot encoding

                                # for WITHOUT follow-up rescale Y_oneHot and for WITH follow-up
                                # Y_oneHot should be replaced with Y
                                Y_c, _, Yz_c, _, Yrng_c, _, = ds.preprocess_Y(Yin=Y_oneHot, data_type='C')

                                Y = np.concatenate([Y[:, :Vq], Y_oneHot], axis=1)
                                Yrng = np.concatenate([Yrng_q, Yrng_c], axis=1)
                                Yz = np.concatenate([Yz_q, Yz_c], axis=1)

                                if with_noise == 1:
                                    Vq = int(np.ceil(V / 2))  # number of quantitative features -- Y[:, :Vq]
                                    Vc = int(np.floor(V / 2))  # number of categorical features  -- Y[:, Vq:]
                                    Vqn = (Vq + Vc)  # the column index of which noise model1 starts

                                    _, _, Ynz_q, _, Ynrng_q, _, = ds.preprocess_Y(Yin=Yn[:, :Vq], data_type='Q')

                                    enc = OneHotEncoder(sparse=False, categories='auto',)
                                    Yn_oneHot = enc.fit_transform(Yn[:, Vq:Vqn])  # oneHot encoding
                                    # for WITHOUT follow-up rescale Yn_oneHot and for WITH
                                    # follow-up Yn_oneHot should be replaced with Y
                                    Yn_c, _, Ynz_c, _, Ynrng_c, _, = ds.preprocess_Y(Yin=Yn_oneHot, data_type='C')

                                    Y_ = np.concatenate([Yn[:, :Vq], Yn_c], axis=1)
                                    Yrng = np.concatenate([Ynrng_q, Ynrng_c], axis=1)
                                    Yz = np.concatenate([Ynz_q, Ynz_c], axis=1)

                                    _, _, Ynz_, _, Ynrng_, _, = ds.preprocess_Y(Yin=Yn[:, Vqn:], data_type='Q')
                                    Yn_ = np.concatenate([Y_, Yn[:, Vqn:]], axis=1)
                                    Ynrng = np.concatenate([Yrng, Ynrng_], axis=1)
                                    Ynz = np.concatenate([Yz, Ynz_], axis=1)

                            P, _, _, Pu, _, _, Pm, _, _, Pl, _, _ = ds.preprocess_P(P=P)

                            # Pre-processing - Without Noise
                            if data_type == "NP".lower() and with_noise == 0:
                                print("NP")
                                
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y, P, GT, n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "z-u".lower() and with_noise == 0:
                                print("z-u")
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yz, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "z-m".lower() and with_noise == 0:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yz, P=Pm, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "z-l".lower() and with_noise == 0:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yz, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "rng-u".lower() and with_noise == 0:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yrng, P=Pu, GT=GT, n_epochs=n_epochs,
                                    latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name,
                                    setting=setting)

                            elif data_type == "rng-m".lower() and with_noise == 0:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yrng, P=Pm, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "rng-l".lower() and with_noise == 0:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yrng, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            # Pre-processing - With Noise
                            if data_type == "NP".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Yn, P=P, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "z-u".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Ynz, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "z-m".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Ynz, P=Pm, GT=GT, n_epochs=n_epochs,
                                    latent_dim_ratio=latent_dim_ratio, repeat=repeat,
                                    name=data_name, setting=setting)

                            elif data_type == "z-l".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Ynz, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "rng-u".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Ynrng, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            elif data_type == "rng-m".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Ynrng, P=Pm, GT=GT, n_epochs=n_epochs,
                                    latent_dim_ratio=latent_dim_ratio, repeat=repeat,
                                    name=data_name, setting=setting)

                            elif data_type == "rng-l".lower() and with_noise == 1:
                                kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                    Y=Ynrng, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat, name=data_name, setting=setting)

                            kmeans_ms[setting][repeat] = kmeans_labels
                            agg_ms[setting][repeat] = agg_labels
                            gmm_ms[setting][repeat] = gmm_labels
                            GT_ms[setting][repeat] = y_test

                    print("Algorithm is applied on the" + setting_ + "data set!")

            if setting_ == 'all':

                for setting, repeats in DATA.items():

                    print("setting:", setting, )

                    kmeans_ms[setting] = {}
                    agg_ms[setting] = {}
                    gmm_ms[setting] = {}
                    GT_ms[setting] = {}

                    for repeat, matrices in repeats.items():
                        print("repeat:", repeat)
                        GT = matrices['GT']
                        Y = matrices['Y'].astype('float32')
                        P = matrices['P'].astype('float32')
                        Yn = matrices['Yn']
                        if len(Yn) != 0:
                            Yn = Yn.astype('float32')
                        N, V = Y.shape

                        # Quantitative case
                        if type_of_data == 'Q' or name.split('(')[-1] == 'r':
                            _, _, Yz, _, Yrng, _, = ds.preprocess_Y(Yin=Y, data_type='Q')
                            if with_noise == 1:
                                Yn, _, Ynz, _, Ynrng, _, = ds.preprocess_Y(Yin=Yn, data_type='Q')

                        # Because there is no Yn in the case of categorical features.
                        if type_of_data == 'C':
                            enc = OneHotEncoder()  # categories='auto')
                            Y = enc.fit_transform(Y)  # oneHot encoding
                            Y = Y.toarray()
                            # Boris's Theory
                            Y, _, Yz, _, Yrng, _, = ds.preprocess_Y(Yin=Y, data_type='C')

                        if type_of_data == 'M':
                            Vq = int(np.ceil(V / 2))  # number of quantitative features -- Y[:, :Vq]
                            Vc = int(np.floor(V / 2))  # number of categorical features  -- Y[:, Vq:]
                            Y_, _, Yz_, _, Yrng_, _, = ds.preprocess_Y(Yin=Y[:, :Vq], data_type='M')
                            enc = OneHotEncoder(sparse=False, )  # categories='auto', )
                            Y_oneHot = enc.fit_transform(Y[:, Vq:])  # oneHot encoding
                            Y = np.concatenate([Y_oneHot, Y[:, :Vq]], axis=1)
                            Yrng = np.concatenate([Y_oneHot, Yrng_], axis=1)
                            Yz = np.concatenate([Y_oneHot, Yz_], axis=1)

                            if with_noise == 1:
                                Vq = int(np.ceil(V / 2))  # number of quantitative features -- Y[:, :Vq]
                                Vc = int(np.floor(V / 2))  # number of categorical features  -- Y[:, Vq:]
                                Vqn = (Vq + Vc)  # the column index of which noise model1 starts

                                _, _, Yz_, _, Yrng_, _, = ds.preprocess_Y(Yin=Yn[:, :Vq], data_type='M')
                                enc = OneHotEncoder(sparse=False, )  # categories='auto',)
                                Yn_oneHot = enc.fit_transform(Yn[:, Vq:Vqn])  # oneHot encoding
                                Y_ = np.concatenate([Yn_oneHot, Yn[:, :Vq]], axis=1)
                                Yrng = np.concatenate([Yn_oneHot, Yrng_], axis=1)
                                Yz = np.concatenate([Yn_oneHot, Yz_], axis=1)

                                _, _, Ynz_, _, Ynrng_, _, = ds.preprocess_Y(Yin=Yn[:, Vqn:], data_type='M')
                                Yn_ = np.concatenate([Y_, Yn[:, Vqn:]], axis=1)
                                Ynrng = np.concatenate([Yrng, Ynrng_], axis=1)
                                Ynz = np.concatenate([Yz, Ynz_], axis=1)

                        P, _, _, Pu, _, _, Pm, _, _, Pl, _, _ = ds.preprocess_P(P=P)

                        # Pre-processing - Without Noise
                        if data_type == "NP".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Y, P=P, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "z-u".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yz, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "z-m".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yz, P=Pm, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "z-l".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yz, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "rng-u".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yrng, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "rng-m".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yrng, P=Pm, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "rng-l".lower() and with_noise == 0:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yrng, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        # Pre-processing - With Noise
                        if data_type == "NP".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Yn, P=P, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "z-u".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Ynz, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "z-m".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Ynz, P=Pm, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "z-l".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Ynz, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "rng-u".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Ynrng, P=Pu, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "rng-m".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Ynrng, P=Pm, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        elif data_type == "rng-l".lower() and with_noise == 1:
                            kmeans_labels, agg_labels, gmm_labels, y_test = run_cluster_latents(
                                Y=Ynrng, P=Pl, GT=GT, n_epochs=n_epochs, latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat, name=data_name, setting=setting)

                        kmeans_ms[setting][repeat] = kmeans_labels
                        agg_ms[setting][repeat] = agg_labels
                        gmm_ms[setting][repeat] = gmm_labels
                        GT_ms[setting][repeat] = y_test

                print("Algorithm is applied on the entire data set!")

            return kmeans_ms, agg_ms, gmm_ms, GT_ms
Esempio n. 2
0
        def apply_the_algorithm(data_type, ):  # auto-encode cluster

            # Global initialization
            alg_ms = {}  # the algorithm results
            gt_ms = {}  # Ground Truth

            for setting, repeats in DATA.items():

                print(
                    "setting:",
                    setting,
                )

                alg_ms[setting] = {}
                gt_ms[setting] = {}

                for repeat, matrices in repeats.items():
                    print("repeat:", repeat)

                    X_tr = DATA[setting][repeat]['X_tr'].astype('float32')
                    X_vl = DATA[setting][repeat]['X_vl'].astype('float32')
                    X_ts = DATA[setting][repeat]['X_ts'].astype('float32')

                    y_tr = DATA[setting][repeat]['y_tr'].astype('float32')
                    y_vl = DATA[setting][repeat]['y_vl'].astype('float32')
                    y_ts = DATA[setting][repeat]['y_ts'].astype('float32')

                    _, _, Xz_tr, _, Xr_tr, _, = ds.preprocess_Y(Yin=X_tr,
                                                                data_type='Q')
                    _, _, Xz_vl, _, Xr_vl, _, = ds.preprocess_Y(Yin=X_vl,
                                                                data_type='Q')
                    _, _, Xz_ts, _, Xr_ts, _, = ds.preprocess_Y(Yin=X_ts,
                                                                data_type='Q')

                    # Different Pre-processing methods
                    if data_type == "NP".lower():
                        print("No Pre-Proc.")

                        alg_x_test_labels, y_test = run_the_algorithm(
                            X_train=X_tr,
                            y_train=y_tr,
                            X_val=X_vl,
                            y_val=y_vl,
                            X_test=X_ts,
                            y_test=y_ts,
                            n_epochs=n_epochs,
                            repeat=repeat,
                            ds_name=name,
                            setting=setting)

                    elif data_type == "z".lower():

                        print("Z-score")

                        alg_x_test_labels, y_test = run_the_algorithm(
                            X_train=Xz_tr,
                            y_train=y_tr,
                            X_val=Xz_vl,
                            y_val=y_vl,
                            X_test=Xz_ts,
                            y_test=y_ts,
                            n_epochs=n_epochs,
                            repeat=repeat,
                            ds_name=name,
                            setting=setting)

                    elif data_type == "rng".lower():

                        print("Rng")

                        alg_x_test_labels, y_test = run_the_algorithm(
                            X_train=Xr_tr,
                            y_train=y_tr,
                            X_val=Xr_vl,
                            y_val=y_vl,
                            X_test=Xr_ts,
                            y_test=y_ts,
                            n_epochs=n_epochs,
                            repeat=repeat,
                            ds_name=name,
                            setting=setting)

                    alg_ms[setting][repeat] = alg_x_test_labels
                    gt_ms[setting][repeat] = y_test

                print("Algorithm is applied on the" + str(setting) +
                      "data set!")

            return alg_ms, gt_ms
        def apply_aec(data_type, with_noise):  # auto-encode cluster

            # Global initialization
            AE_ms = {}  # K-means results
            GT_ms = {}  # Ground Truth

            if setting_ != 'all':

                for setting, repeats in DATA.items():

                    if str(setting) == setting_:

                        print(
                            "setting:",
                            setting,
                        )

                        AE_ms[setting] = {}
                        GT_ms[setting] = {}

                        for repeat, matrices in repeats.items():
                            print("repeat:", repeat)

                            X_tr = DATA[setting][repeat]['X_tr'].astype(
                                'float32')
                            X_vl = DATA[setting][repeat]['X_vl'].astype(
                                'float32')
                            X_ts = DATA[setting][repeat]['X_ts'].astype(
                                'float32')

                            y_tr = DATA[setting][repeat]['y_tr'].astype(
                                'float32')
                            y_vl = DATA[setting][repeat]['y_vl'].astype(
                                'float32')
                            y_ts = DATA[setting][repeat]['y_ts'].astype(
                                'float32')

                            _, _, Xz_tr, _, Xr_tr, _, = ds.preprocess_Y(
                                Yin=X_tr, data_type='Q')
                            _, _, Xz_vl, _, Xr_vl, _, = ds.preprocess_Y(
                                Yin=X_vl, data_type='Q')
                            _, _, Xz_ts, _, Xr_ts, _, = ds.preprocess_Y(
                                Yin=X_ts, data_type='Q')

                            # Different Pre-processing methods
                            if data_type == "NP".lower() and with_noise == 0:

                                print("No Pre-Proc.")

                                AE_X_test_labels, y_test = run_ae(
                                    X_train=X_tr,
                                    y_train=y_tr,
                                    X_val=X_vl,
                                    y_val=y_vl,
                                    X_test=X_ts,
                                    y_test=y_ts,
                                    n_epochs=n_epochs,
                                    latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat,
                                    name=name,
                                    setting=setting)

                            elif data_type == "z".lower() and with_noise == 0:

                                print("Z-score")

                                AE_X_test_labels, y_test = run_ae(
                                    X_train=Xz_tr,
                                    y_train=y_tr,
                                    X_val=Xz_vl,
                                    y_val=y_vl,
                                    X_test=Xz_ts,
                                    y_test=y_ts,
                                    n_epochs=n_epochs,
                                    latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat,
                                    name=name,
                                    setting=setting)

                            elif data_type == "rng".lower(
                            ) and with_noise == 0:

                                print("Rng")

                                AE_X_test_labels, y_test = run_ae(
                                    X_train=Xr_tr,
                                    y_train=y_tr,
                                    X_val=Xr_vl,
                                    y_val=y_vl,
                                    X_test=Xr_ts,
                                    y_test=y_ts,
                                    n_epochs=n_epochs,
                                    latent_dim_ratio=latent_dim_ratio,
                                    repeat=repeat,
                                    name=name,
                                    setting=setting)

                            AE_ms[setting][repeat] = AE_X_test_labels
                            GT_ms[setting][repeat] = y_test

                    print("Algorithm is applied on the" + setting_ +
                          "data set!")

            if setting_ == 'all':

                for setting, repeats in DATA.items():

                    print(
                        "setting:",
                        setting,
                    )

                    AE_ms[setting] = {}
                    GT_ms[setting] = {}

                    for repeat, matrices in repeats.items():

                        print("repeat:", repeat)

                        X_tr = DATA[setting][repeat]['X_tr'].astype('float32')
                        X_vl = DATA[setting][repeat]['X_vl'].astype('float32')
                        X_ts = DATA[setting][repeat]['X_ts'].astype('float32')

                        y_tr = DATA[setting][repeat]['y_tr'].astype('float32')
                        y_vl = DATA[setting][repeat]['y_vl'].astype('float32')
                        y_ts = DATA[setting][repeat]['y_ts'].astype('float32')

                        _, _, Xz_tr, _, Xr_tr, _, = ds.preprocess_Y(
                            Yin=X_tr, data_type='Q')
                        _, _, Xz_vl, _, Xr_vl, _, = ds.preprocess_Y(
                            Yin=X_vl, data_type='Q')
                        _, _, Xz_ts, _, Xr_ts, _, = ds.preprocess_Y(
                            Yin=X_ts, data_type='Q')

                        # Different Pre-processing methods
                        if data_type == "NP".lower() and with_noise == 0:

                            print("No Pre-Proc.")

                            AE_X_test_labels, y_test = run_ae(
                                X_train=X_tr,
                                y_train=y_tr,
                                X_val=X_vl,
                                y_val=y_vl,
                                X_test=X_ts,
                                y_test=y_ts,
                                n_epochs=n_epochs,
                                latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat,
                                name=name,
                                setting=setting)

                        elif data_type == "z".lower() and with_noise == 0:

                            print("Z-score")

                            AE_X_test_labels, y_test = run_ae(
                                X_train=Xz_tr,
                                y_train=y_tr,
                                X_val=Xz_vl,
                                y_val=y_vl,
                                X_test=Xz_ts,
                                y_test=y_ts,
                                n_epochs=n_epochs,
                                latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat,
                                name=name,
                                setting=setting)

                        elif data_type == "rng".lower() and with_noise == 0:

                            print("Rng")

                            AE_X_test_labels, y_test = run_ae(
                                X_train=Xr_tr,
                                y_train=y_tr,
                                X_val=Xr_vl,
                                y_val=y_vl,
                                X_test=Xr_ts,
                                y_test=y_ts,
                                n_epochs=n_epochs,
                                latent_dim_ratio=latent_dim_ratio,
                                repeat=repeat,
                                name=name,
                                setting=setting)

                        AE_ms[setting][repeat] = AE_X_test_labels
                        GT_ms[setting][repeat] = y_test

                print("Algorithm is applied on the entire data set!")

            return AE_ms, GT_ms