##################################
            # on small runs
            # ytrain[5:8] = 1
            # ytest[5:8] = 1
            ##################################

            #### balancing data ####
            # now resample majority down to minority to achieve equal
            # - new name in order to not interfere with hyperopt

            if Smote == False:  # TODO: Hvis kun undersampling
                Xtrain_new, ytrain_new = rand_undersample(Xtrain, ytrain, arg='majority',
                                                          state=random_state_val, multi=False)
            else: # TODO: Hvis SMOTE + undersampling
                # Using mix of undersampling and smote
                Xtrain_new, ytrain_new = balanceData(Xtrain, ytrain, ratio = 2, random_state_val=random_state_val)


            #plot Normal
            NoAug = Xtrain_new[np.where(ytrain_new == 1)]
            MeanData['NoAug'][artifact_name][fold] = NoAug

            #plotSpectro(NoAug, min_val, max_val, artifact_names[artifact], AugMethod = "None")


            #plot GAN
            GAN_X, GAN_y = useGAN(Xtrain_new, ytrain_new, aug_ratio, GAN_epochs = GAN_epochs, experiment_name = "Spectrograms")
            GAN_y = GAN_y[len(Xtrain_new):]
            GAN_X = GAN_X[len(Xtrain_new):][np.where(GAN_y == 1)]
            MeanData['GAN'][artifact_name][fold] = GAN_X
            #plotSpectro(GAN_X, min_val, max_val, artifact_names[artifact], AugMethod = "GAN")
Exemple #2
0
    def runActivePipeline(self,
                          model,
                          HO_evals,
                          active_ratio,
                          smote_ratios,
                          aug_ratios,
                          experiment,
                          experiment_name,
                          init_amount_percent,
                          n_pr_query_percent,
                          artifact_names=None,
                          GAN_epochs=100,
                          noise_experiment=None,
                          randomSampler=False,
                          DelNan_noiseFiles=False,
                          fast_run=False,
                          K=5,
                          random_state=0,
                          save_y_true=False):
        """
        Parameters:
            :param model (str): Model name. Should follow the naming of the defined models in the
                                models.models.py script, i.e. 'LR'.
            :param HO_evals (int): Number of queries to call in the optimization using Bayesian Optimization with Hyperopt.
            :param smote_ratios (np.array(float)): array of floats. For creating 100% additional samples of the minority
                                                    class, the float-value should be set to 1. Similarly 50% upsampling
                                                    means a float-value of 0.5. Compatible with multiple values.
            :param aug_ratios (np.array(float)): array of floats. For creating 100% augmented data compared to original
                                                    data, the float-value should be set to 1. Similarly 50% extra
                                                    augmented data means a float-value of 0.5. Compatible with multiple values.
            :param experiment (str): name of the wider experiment, i.e. MixUp_experiment_No5
            :param experiment_name (str): name of the pickles created.
                                            When doing Augmentation it should end with either _GAN / _Noise / _MixUp,
                                            such that the following command can work properly:

                                            if experiment_name.split("_")[-1] == 'GAN':

                                            When running the pipeline on a single artifact, one should manually write this in the
                                            'experiment_name', i.e. experiment + model + 'Null' + aug_method
            :param artifact_names (list(str)): a list of the artifacts that are to be investigated. Default artifact are
                                                ['eyem', 'chew', 'shiv', 'elpp', 'musc', 'null'] as the study is carried
                                                out using the TUH EEG Artifact corpus.
            :param GAN_epochs (int): number of epochs to use if the GAN augmentation technique is used.
            :param noise_experiment (str): directory containing the noise pickles to be used. Should be None when
                                            not experimenting with Noise Addition augmentation technique.
            :param DelNan_noiseFiles (bool): False per default. Should be True if using data that contains NaN-values.
            :param fast_run (bool): True if a quick run-through of the pipeline is needed in order to detect bugs or new
                                    functions.
            :param K (int): number of folds for the cross-validation (CV) applied. Default value is 5.
            :param random_state (int): random seed for the used functions such as the train_test_split in KFold CV.
            :param save_y_true (bool): True if a pickle of the true y-values in each CV-fold should be saved.

        Returns:
            :return: Saves pickles of dictionaries of results from an experiment.
                        The "results_x_.npy" file is a nested dictionary holding information about accuracy, F2-score,
                        sensitivity score and the predicted labels on the test data for the model throughout the specified
                        augmentation ratios, SMOTE-ratios and CV-folds for the specified artifact(s).

                        The "ho_trials_x_.npy" file is a nested dictionary holding information about Hyperopt calls for the
                        model throughout the specified augmentation ratios, SMOTE-ratios and CV-folds for the specified artifact(s).

                        The "y_true_x_.npy" file is a nested dictionary holding information about the true y-labels
                        throughout CV-folds for the specified artifact(s).
        """

        X = LoadNumpyPickles(pickle_path=self.pickle_path,
                             file_name=self.X_file,
                             windowsOS=self.windowsOS)
        y = LoadNumpyPickles(pickle_path=self.pickle_path,
                             file_name=self.y_file,
                             windowsOS=self.windowsOS)
        ID_frame = LoadNumpyPickles(pickle_path=self.pickle_path,
                                    file_name=self.ID_file,
                                    windowsOS=self.windowsOS)

        # extract a subset for faster running time
        # X, y, ID_frame = subset(X, y, ID_frame, no_indiv=30)

        # apply the inclusion principle
        X, y, ID_frame = binary(X, y, ID_frame)

        # The KFold will be splitted by
        individuals = np.unique(ID_frame)

        # Choose model
        model_dict = self.full_model_dict[model]
        HO_evals = HO_evals

        pickle_path_aug = self.pickle_path + r"\augmentation_pickles"

        if noise_experiment != None:
            X_noise, y_noise, ID_frame_noise = prepareNoiseAddition(
                pickle_path_aug,
                noise_experiment,
                self.X_file,
                self.y_file,
                self.ID_file,
                windowsOS=self.windowsOS)

            if DelNan_noiseFiles:
                X, y, ID_frame, X_noise, y_noise, ID_frame_noise = DeleteNanNoise(
                    X,
                    y,
                    ID_frame,
                    X_noise,
                    y_noise,
                    ID_frame_noise,
                    save_path=pickle_path_aug + noise_experiment,
                    windowsOS=self.windowsOS)

        # Dictionary holding keys and values for all functions from the models.py file. Used to "look up" functions in the CV
        # and hyperoptimization part
        function_dict = ActiveModels.__dict__

        random_state_val = random_state

        #### define classes ####
        if artifact_names is None:
            artifact_names = ['eyem', 'chew', 'shiv', 'elpp', 'musc', 'null']

        classes = len(artifact_names)

        # for hyperopt data to save
        def unpack(x):
            if x:
                return x[0]
            return np.nan

        ho_trials = {}  # fold, artifact, model, hyperopt iterations
        results = {}  # fold, artifact, model, scores
        y_true_dict = {}
        active_results = {}

        # setting fold details
        kf = KFold(
            n_splits=K, random_state=random_state_val, shuffle=True
        )  # random state + shuffle ensured same repeated experiments

        for aug_ratio in aug_ratios:
            if aug_ratio != 0:
                print("\n####---------------------------------------####")
                print("Running a", aug_ratio,
                      "ratio of (augmented / real) using",
                      experiment_name.split("_")[-1])
                print("####---------------------------------------####")
            #### Initializing dict for this Augmenation ratio:
            ho_trials[aug_ratio] = {}
            results[aug_ratio] = {}
            active_results[aug_ratio] = {}

            for ratio in smote_ratios:
                ratio += 1

                if ratio != 1:
                    print("\n####---------------------------------------####")
                    print("Running a", ratio - 1, "ratio of SMOTE")
                    print("####---------------------------------------####")

                i = 0  # CV fold index
                cross_val_time_start = time()

                #### initializing dict for this ratio ####
                ho_trials[aug_ratio][ratio] = {}  # for this fold
                results[aug_ratio][ratio] = {}
                active_results[aug_ratio][ratio] = {}

                for train_idx, test_idx in kf.split(individuals):
                    # single loop
                    # while i < 1:
                    #    trainindv, testindv = train_test_split(individuals, test_size=0.20, random_state=random_state_val, shuffle = True)
                    #   REMEMBER to # the other below
                    print("\n-----------------------------------------------")
                    print("Running {:d}-fold CV - fold {:d}/{:d}".format(
                        K, i + 1, K))
                    print("-----------------------------------------------")

                    #### initializing data ####
                    # their IDs
                    trainindv = individuals[train_idx]
                    testindv = individuals[test_idx]
                    # their indexes in train and test
                    train_indices = [
                        i for i, ID in enumerate(ID_frame) if ID in trainindv
                    ]
                    test_indices = [
                        i for i, ID in enumerate(ID_frame) if ID in testindv
                    ]
                    testID_list = [
                        ID for i, ID in enumerate(ID_frame) if ID in testindv
                    ]

                    X_test, y_test = X[test_indices, :], y[test_indices]

                    # Preparing for active learning
                    Xpool_orig, ypool_orig = X[
                        train_indices, :], y[train_indices]

                    init_amount = int(
                        np.round(init_amount_percent * len(Xpool_orig), 0))
                    n_pr_query = int(
                        np.round(n_pr_query_percent * len(Xpool_orig), 0))

                    order = np.random.permutation(range(len(Xpool_orig)))
                    trainset_orig = order[:init_amount]

                    X_train_orig = np.take(Xpool_orig, trainset_orig, axis=0)
                    y_train_orig = np.take(ypool_orig, trainset_orig, axis=0)

                    poolidx_orig = np.arange(len(Xpool_orig), dtype=np.int)
                    poolidx_orig = np.setdiff1d(poolidx_orig, trainset_orig)

                    trainData_balance = [
                    ]  #TODO: These empty list are positioned wrongly. Should be in the artifact loop.
                    testacc_al = []
                    testF2_al = []
                    testSens_al = []

                    # for hyperopt split

                    #### initializing dict for this fold ####
                    ho_trials[aug_ratio][ratio][i] = {}  # for this fold
                    results[aug_ratio][ratio][i] = {}
                    active_results[aug_ratio][ratio][i] = {}

                    y_true_dict[i] = {}

                    #### for each artifact ####
                    for artifact in range(classes):
                        print("\nTraining on the class: " +
                              artifact_names[artifact] + "\n")

                        trainset = trainset_orig
                        Xpool, ypool = Xpool_orig, ypool_orig
                        X_train, y_train = X_train_orig, y_train_orig
                        poolidx = poolidx_orig

                        query_round = 0
                        while len(X_train) <= active_ratio * len(Xpool_orig):

                            #### initializing hyperopt split #### #TODO: Det her er overflødigt, vi kan bare ændre HO_individuals til trainindv (tror jeg)
                            train_ID_frame = ID_frame[trainset]
                            HO_individuals = np.unique(train_ID_frame)

                            #### initializing data ####
                            # only include the artifact of interest
                            # new name for the ones with current artifact
                            Xtrain = X_train  # only to keep things similar
                            Xtest = X_test  # only to keep things similar
                            ytrain = y_train[:, artifact]
                            ytest = y_test[:, artifact]

                            ##################################
                            # on small runs
                            # ytrain[5:8] = 1
                            # ytest[5:8] = 1
                            ##################################

                            if fast_run:
                                ytrain[:3] = 1
                                ytest[:3] = 1

                            #### balancing data ####
                            # now resample majority down to minority to achieve equal
                            # - new name in order to not interfere with hyperopt
                            if ratio == 1:  # if no smote
                                Xtrain_new, ytrain_new = rand_undersample(
                                    Xtrain,
                                    ytrain,
                                    arg='majority',
                                    state=random_state_val,
                                    multi=False)
                            else:
                                # Using mix of undersampling and smote
                                Xtrain_new, ytrain_new = balanceData(
                                    Xtrain,
                                    ytrain,
                                    ratio,
                                    random_state_val=random_state_val)

                            # %% Data Augmentation step:
                            if aug_ratio != 0:

                                if experiment_name.split("_")[-1] == 'GAN':
                                    Xtrain_new, ytrain_new = useGAN(
                                        Xtrain_new, ytrain_new, aug_ratio,
                                        GAN_epochs, experiment_name)

                                if experiment_name.split("_")[-1] == "MixUp":
                                    Xtrain_new, ytrain_new = useMixUp(
                                        Xtrain_new, ytrain_new, aug_ratio)

                                if experiment_name.split("_")[-1] == "Noise":
                                    X_noise_new = X_noise[train_indices, :]
                                    y_noise_new = y_noise[train_indices, :]
                                    y_noise_new = y_noise_new[:, artifact]

                                    Xtrain_new, ytrain_new = useNoiseAddition(
                                        X_noise_new, y_noise_new, Xtrain_new,
                                        ytrain_new, aug_ratio,
                                        random_state_val)

                            #### creating test environment ####
                            Xtrain_new, ytrain_new = shuffle(
                                Xtrain_new,
                                ytrain_new,
                                random_state=random_state_val)
                            Xtest, ytest, testID_list = shuffle(
                                Xtest,
                                ytest,
                                testID_list,
                                random_state=random_state_val)

                            env = models(Xtrain_new,
                                         ytrain_new,
                                         Xtest,
                                         ytest,
                                         state=random_state_val)

                            #### initializing validation data for hyperopt ####
                            trainindv, testindv = train_test_split(
                                HO_individuals,
                                test_size=0.20,
                                random_state=random_state_val,
                                shuffle=True)

                            # indices of these individuals from ID_frame
                            HO_train_indices = [
                                i for i, ID in enumerate(train_ID_frame)
                                if ID in trainindv
                            ]
                            HO_test_indices = [
                                i for i, ID in enumerate(train_ID_frame)
                                if ID in testindv
                            ]

                            # constructing sets
                            HO_Xtrain, HO_ytrain = Xtrain[
                                HO_train_indices, :], ytrain[
                                    HO_train_indices]  # we keep the original and balance new later
                            HO_Xtest, HO_ytest = Xtrain[
                                HO_test_indices, :], ytrain[HO_test_indices]

                            if fast_run:
                                HO_ytrain[:2] = 1
                                HO_ytest[:2] = 1

                            #### initializing validation data for hyperopt ####

                            if ratio == 1:  # if no augmentation
                                HO_Xtrain_new, HO_ytrain_new = rand_undersample(
                                    HO_Xtrain,
                                    HO_ytrain,
                                    arg='majority',
                                    state=random_state_val,
                                    multi=False)
                            else:
                                # Using mix of undersampling and smote
                                HO_Xtrain_new, HO_ytrain_new = balanceData(
                                    HO_Xtrain,
                                    HO_ytrain,
                                    ratio,
                                    random_state_val=random_state_val)

                            if aug_ratio != 0:

                                if experiment_name.split("_")[-1] == 'GAN':
                                    HO_Xtrain_new, HO_ytrain_new = useGAN(
                                        HO_Xtrain_new, HO_ytrain_new,
                                        aug_ratio, GAN_epochs, experiment_name)

                                if experiment_name.split("_")[-1] == "MixUp":
                                    HO_Xtrain_new, HO_ytrain_new = useMixUp(
                                        HO_Xtrain_new, HO_ytrain_new,
                                        aug_ratio)

                                if experiment_name.split("_")[-1] == "Noise":
                                    X_noise_new = X_noise[HO_train_indices, :]
                                    y_noise_new = y_noise[HO_train_indices, :]
                                    y_noise_new = y_noise_new[:, artifact]

                                    HO_Xtrain_new, HO_ytrain_new = useNoiseAddition(
                                        X_noise_new, y_noise_new,
                                        HO_Xtrain_new, HO_ytrain_new,
                                        aug_ratio, random_state_val)

                            # Hyperopt environment
                            HO_Xtrain_new, HO_ytrain_new = shuffle(
                                HO_Xtrain_new,
                                HO_ytrain_new,
                                random_state=random_state_val)
                            HO_Xtest, HO_ytest = shuffle(
                                Xtest, ytest, random_state=random_state_val)

                            HO_env = models(HO_Xtrain_new,
                                            HO_ytrain_new,
                                            HO_Xtest,
                                            HO_ytest,
                                            state=random_state_val)

                            #### initializing dict for this artifact ####
                            ho_trials[aug_ratio][ratio][i][
                                artifact_names[artifact]] = {
                                }  # for this artifact
                            results[aug_ratio][ratio][i][
                                artifact_names[artifact]] = {}
                            y_true_dict[i][artifact_names[artifact]] = {}
                            active_results[aug_ratio][ratio][i][
                                artifact_names[artifact]] = {}

                            # https://medium.com/district-data-labs/parameter-tuning-with-hyperopt-faa86acdfdce
                            # https://towardsdatascience.com/hyperparameter-optimization-in-python-part-2-hyperopt-5f661db91324
                            # http://hyperopt.github.io/hyperopt/getting-started/search_spaces/
                            start_time = time()

                            name, space = model_dict[model]

                            #### HyperOpt ####
                            if space is not None:  # if hyperopt is defined

                                #### initializing dict for this model ####
                                ho_trials[aug_ratio][ratio][i][
                                    artifact_names[artifact]][model] = {
                                    }  # for this model

                                print('\nHyperOpt on: ',
                                      model)  # print model name

                                trials = Trials()

                                def objective(params):
                                    accuracy, f2_s, sensitivity, y_pred, weights = function_dict[
                                        name](HO_env,
                                              **params)  # hyperopt environment
                                    # it minimizes
                                    return -sensitivity

                                best = fmin(fn=objective,
                                            space=space,
                                            algo=tpe.suggest,
                                            max_evals=HO_evals,
                                            trials=trials)

                                #### saving evaluations ####
                                ho_trials[aug_ratio][ratio][i][artifact_names[
                                    artifact]][model] = pd.DataFrame([
                                        pd.Series(
                                            t["misc"]["vals"]).apply(unpack)
                                        for t in trials
                                    ])
                                ho_trials[aug_ratio][ratio][i][artifact_names[
                                    artifact]][model]["sensitivity"] = [
                                        -t["result"]["loss"] for t in trials
                                    ]
                                print('best parameter/s:', best)

                                # define best found function
                                f = function_dict[name](
                                    env, **best)  # now test environment

                            # without hyperopt
                            else:  # space is none
                                f = function_dict[name](env)
                            end_time = time()
                            took_time = (end_time - start_time)

                            print(model + ": \t" + str(f[:3]) +
                                  ". Time: {:f} seconds".format(took_time))
                            """ This part using the modAL library is currently broken. 

                            AL_env = ActiveModels(X_train=Xtrain, y_train=ytrain,
                                                  X_test=Xtest, y_test=ytest,
                                                  state=random_state_val)

                            f = AL_env.LR(**best)

                            modAL_learner = ActiveLearner(estimator=AL_env.model,
                                                          #query_strategy=expected_error_reduction,
                                                          X_training=AL_env.X_train, y_training=AL_env.y_train)

                            query_idx, query_inst = modAL_learner.query(X_pool=Xpool, n_instances=1)
                            modAL_learner.teach(Xpool[query_idx], ypool[query_idx])
                            """

                            acc, F2, sensitivity, y_pred, weights = f

                            if randomSampler:
                                order_sub = np.random.permutation(poolidx)
                                newIdxsPool = order_sub[:n_pr_query]

                                X_train = np.concatenate(
                                    (X_train, Xpool[newIdxsPool]))
                                y_train = np.concatenate(
                                    (y_train, ypool[newIdxsPool]))
                                poolidx = np.setdiff1d(poolidx, newIdxsPool)
                                print('Model: LR, %i samples (Random)' %
                                      (init_amount + query_round * n_pr_query))

                            else:
                                emc = norm_grad_x_LR(weights, Xpool[poolidx])

                                ypool_p_sort_idx = np.argsort(emc)
                                X_train = np.concatenate((X_train, Xpool[
                                    poolidx[ypool_p_sort_idx[-n_pr_query:]]]))
                                y_train = np.concatenate((y_train, ypool[
                                    poolidx[ypool_p_sort_idx[-n_pr_query:]]]))
                                poolidx = np.setdiff1d(
                                    poolidx, ypool_p_sort_idx[-n_pr_query:])
                                print('Model: LR, %i samples (EMC)' %
                                      (init_amount + query_round * n_pr_query))

                            counts = np.unique(ytrain, return_counts=True)[1]

                            trainData_balance.append(
                                (len(Xtrain), counts[1] / counts[0]))
                            testacc_al.append((len(Xtrain), acc))
                            testF2_al.append((len(Xtrain), F2))
                            testSens_al.append((len(Xtrain), sensitivity))

                            query_round += 1

                        active_results[aug_ratio][ratio][i][
                            artifact_names[artifact]][model] = {}

                        active_results[aug_ratio][ratio][i][artifact_names[
                            artifact]][model]['accuracy'] = testacc_al
                        active_results[aug_ratio][ratio][i][
                            artifact_names[artifact]][model]['F2'] = testF2_al
                        active_results[aug_ratio][ratio][i][artifact_names[
                            artifact]][model]['sensitivity'] = testSens_al
                        active_results[aug_ratio][ratio][i][artifact_names[
                            artifact]][model]['balance'] = trainData_balance

                        #### initializing dictionary of results for this model ####
                        results[aug_ratio][ratio][i][
                            artifact_names[artifact]][model] = {}

                        #### saving results[aug_ratio] ####
                        results[aug_ratio][ratio][i][
                            artifact_names[artifact]][model]['y_pred'] = y_pred
                        results[aug_ratio][ratio][i][
                            artifact_names[artifact]][model]['accuracy'] = acc
                        results[aug_ratio][ratio][i][artifact_names[artifact]][
                            model]['weighted_F2'] = F2
                        results[aug_ratio][ratio][i][artifact_names[artifact]][
                            model]['sensitivity'] = sensitivity

                        if aug_ratio == 0:
                            y_true_dict[i][artifact_names[artifact]][
                                "y_true"] = env.y_test
                            y_true_dict[i][artifact_names[artifact]][
                                "ID_list"] = testID_list

                    # new fold
                    i += 1

                cross_val_time_end = time()
                cross_val_time = cross_val_time_end - cross_val_time_start
                print("The cross-validation for ratio" + str(ratio - 1) +
                      " took " + str(np.round(cross_val_time, 3)) +
                      " seconds = " + str(np.round(cross_val_time / 60, 3)) +
                      " minutes")
                print('\n\n')
                results[aug_ratio][ratio]['time'] = cross_val_time

        #### saving data ####
        # Remember to change name of pickle when doing a new first_pilot

        if self.windowsOS:
            os.makedirs(self.pickle_path + r"\results\performance" + "\\" +
                        experiment,
                        exist_ok=True)
            os.makedirs(self.pickle_path + r"\results\hyperopt" + "\\" +
                        experiment,
                        exist_ok=True)
            os.makedirs(self.pickle_path + r"\results\y_true", exist_ok=True)

            SaveNumpyPickles(
                self.pickle_path + r"\results\performance" + "\\" + experiment,
                r"\results" + experiment_name, results, self.windowsOS)
            SaveNumpyPickles(
                self.pickle_path + r"\results\hyperopt" + "\\" + experiment,
                r"\ho_trials" + experiment_name, ho_trials, self.windowsOS)

            # Active pickles
            os.makedirs(self.pickle_path + r"\results\performance" +
                        r"\active" + experiment,
                        exist_ok=True)
            os.makedirs(self.pickle_path + r"\results\hyperopt" + r"\active" +
                        experiment,
                        exist_ok=True)

            SaveNumpyPickles(
                self.pickle_path + r"\results\performance" + r"\active" +
                experiment, r"\resultsActive" + experiment_name,
                active_results, self.windowsOS)

            if save_y_true:
                SaveNumpyPickles(
                    self.pickle_path + r"\results\y_true", r"\y_true_" +
                    str(K) + "fold_randomstate_" + str(random_state_val),
                    y_true_dict, self.windowsOS)

        else:
            os.makedirs(self.pickle_path + r"results/performance" + "/" +
                        experiment,
                        exist_ok=True)
            os.makedirs(self.pickle_path + r"results/hyperopt" + "/" +
                        experiment,
                        exist_ok=True)
            os.makedirs(self.pickle_path + r"results/y_true", exist_ok=True)

            SaveNumpyPickles(self.pickle_path + r"results/performance" + "/" +
                             experiment,
                             r"/results" + experiment_name,
                             results,
                             windowsOS=self.windowsOS)
            SaveNumpyPickles(self.pickle_path + r"results/hyperopt" + "/" +
                             experiment,
                             r"/ho_trials" + experiment_name,
                             ho_trials,
                             windowsOS=self.windowsOS)

            #Active results
            os.makedirs(self.pickle_path + r"/results/performance" +
                        "/active" + experiment,
                        exist_ok=True)
            os.makedirs(self.pickle_path + r"/results/hyperopt" + "/active" +
                        experiment,
                        exist_ok=True)

            SaveNumpyPickles(
                self.pickle_path + r"/results/performance" + r"/active" +
                experiment, r"/resultsActive" + experiment_name,
                active_results, self.windowsOS)

            if save_y_true:
                SaveNumpyPickles(self.pickle_path + r"results/y_true",
                                 r"/y_true_" + str(K) + "fold_randomstate_" +
                                 str(random_state_val) + y_true_dict,
                                 windowsOS=self.windowsOS)
        X_under, y_under = rand_undersample(X, y_arti, arg='majority', state=random_state_val, multi=False)
        #extracting samples
        X_under_test, y_under_test, indices = subsample(X_under, y_under, bal_n)
        X_under = np.delete(X_under, indices, axis=0)
        y_under = np.delete(y_under, indices, axis=0)
        #running pca
        pca_under, Xstd_under = runPCA(X_under, y_under)
        #finding standardizing values
        Xmean_under, Xerr_under = np.mean(X_under, axis=0), np.std(X_under, axis=0)
        #standardizing sample
        X_under_test = (X_under_test - Xmean_under) / Xerr_under
        #extracting samples
        X_under_sample, y_under_sample, indices = subsample(X_under_test, y_under_test, subsample_n)

        #SMOTE for this artifact
        X_smote, y_smote = balanceData(X, y_arti, ratio=aug_smote_ratio , random_state_val=random_state_val)
        #extracting samples
        X_smote_test, y_smote_test, indices = subsample(X_smote, y_smote, bal_n)
        X_smote = np.delete(X_smote, indices, axis=0)
        y_smote = np.delete(y_smote, indices, axis=0)
        #running pca
        pca_smote, Xstd_smote = runPCA(X_smote, y_smote)
        #finding standardizing values
        Xmean_smote, Xerr_smote = np.mean(X_smote, axis=0), np.std(X_smote, axis=0)
        #standardizing sample
        X_smote_test = (X_smote_test - Xmean_smote) / Xerr_smote
        #extracting samples
        X_smote_sample, y_smote_sample, indices = subsample(X_smote_test, y_smote_test, subsample_n)


        ### Explained variance ###