コード例 #1
0
ファイル: main.py プロジェクト: isukrit/LEAN_CLIP
with open(
        TARGET_DIRECTORY + cnn_model + '_' + mode + '_kfold_training_logs_' +
        class_subset + '.csv', 'a') as out_stream:
    out_stream.write(
        'Seed,Threshold,Fold,Best Epoch,Training Accuracy,Test Accuracy,Training Accuracy - Test Accuacy,Train Loss,Test Loss,Training Loss - Test Loss,MAE,AUC,Trainable Parameters\n'
    )

for SEED in seeds:

    np.random.seed(SEED)
    idx = np.arange(len(X__))
    np.random.shuffle(idx)  # randomize index
    X, Y, subject_groups = X__[idx], Y__[idx], np.array(subject_groups_)[idx]
    subject_groups = subject_groups.tolist()

    folds_indices = split_kfoldcv_sbj(Y.argmax(1), subject_groups, folds, SEED)

    fold_count = 0

    for train_index, val_index in folds_indices:  # for each fold

        if 'best_model_seed_' + str(SEED) + '_' + str(
                class_subset) + '_' + cnn_model + '_' + mode + '_' + str(
                    threshold) + '_fold_' + str(
                        fold_count) + '.h5' in os.listdir(TARGET_DIRECTORY +
                                                          'best_model/'):
            print('SEED_' + str(SEED) + '_fold_' + str(fold_count) +
                  ' done, skipping it....')
            fold_count += 1
            continue
コード例 #2
0
def generate_corr_matrix(X__, seeds, folds):
    """
    Generate a correlation matrix from the given dataset
    All seeds and folds are done in this function (multiprocessing takes up too much memory)
    Only the training set is used to generate the matrices
    
    Inputs:
    - X__: Numpy array of matrices containing the dataset (training set)
    - seeds: list of seed numbers to use 
    - folds: number of folds (int)
    """

    TARGET_DIRECTORY = '../data/corr_matrix/' + class_subset + '/'
    mkdir(TARGET_DIRECTORY)

    for SEED in seeds:

        np.random.seed(SEED)
        idx = np.arange(len(X__))
        np.random.shuffle(idx)  # randomize index

        X, Y, subject_groups = X__[idx], Y__[idx], np.array(
            subject_groups_)[idx]
        subject_groups = subject_groups.tolist()

        folds_indices = split_kfoldcv_sbj(Y.argmax(1), subject_groups, folds,
                                          SEED)

        fold_count = 0

        for train_index, val_index in folds_indices:  # for each fold

            if os.path.exists(TARGET_DIRECTORY + "corr_matrix_seed" +
                              str(SEED) + "_fold_" + str(fold_count) +
                              ".hdf5"):
                print("corr_matrix_seed" + str(SEED) + "_fold_" +
                      str(fold_count) +
                      " has already been generated, skipping it...")

            else:
                print("corr_matrix_seed" + str(SEED) + "_fold_" +
                      str(fold_count) + ".hdf5 not found!")

                X_ = corr_mx_flatten(X)

                X_train, Y_train = X_[train_index], Y[train_index]
                X_val, Y_val = X_[val_index], Y[val_index]

                corr_matrix = np.corrcoef(
                    X_train.T)  # Generate correlation matrix
                print('Correlation matrix generated for seed ' + str(SEED) +
                      ' fold ' + str(fold_count))

                corr_matrix = np.absolute(corr_matrix)
                corr_matrix[corr_matrix < SPARSE_THRESHOLD_CORR] = 0
                print("Number of non-zero elements in corr_matrix: " +
                      str(np.count_nonzero(corr_matrix)))

                g = h5py.File(
                    TARGET_DIRECTORY + "corr_matrix_seed" + str(SEED) +
                    "_fold_" + str(fold_count) + ".hdf5", "w")
                g.create_dataset('corr_matrix', data=corr_matrix)
                g.close()
                print("Wrote corr_matrix " + "corr_matrix_seed" + str(SEED) +
                      "_fold_" + str(fold_count) + " to " + TARGET_DIRECTORY)

            fold_count += 1