def features_saved_model(train_data, test_data, model, intermediate_layer,
                         results):

    if type(model) == str:
        results['model_is'] = model
        model_path = load_data.find_path(model)
        model = load_model(model_path)

    get_layer_output = K.function(
        [model.layers[0].input, K.learning_phase()],
        [model.layers[intermediate_layer].output])
    # output in test mode = 0
    features_train = get_layer_output([train_data, 0])[0]

    # output in train mode = 1
    features_test = get_layer_output([test_data, 1])[0]

    length = features_train.shape[1]
    width = features_train.shape[2]
    depth = features_train.shape[3]
    features_train = features_train.reshape(features_train.shape[0],
                                            length * width * depth)
    features_test = features_test.reshape(features_test.shape[0],
                                          length * width * depth)

    return features_train, features_test, results
def create_results_dir(results_directory):
    today = str(date.today()
                )  # To save the results in a directory with the date as a name

    Results_dir_path = load_data.find_path(results_directory)
    if os.path.exists(os.path.join(Results_dir_path, today)) == 0:
        os.mkdir(os.path.join(Results_dir_path, today))

    if len(os.listdir(os.path.join(Results_dir_path, today))) == 0:
        file_number = 1
    else:

        # latest_file = sorted(os.path.join(results_directory,today),key=x,reverse=True)
        print(os.path.join(Results_dir_path, today))
        dir_list = os.listdir(os.path.join(Results_dir_path, today))
        latest_file = sorted(list(map(int, dir_list)), reverse=True)
        print(latest_file)
        file_number = ((latest_file[0])) + 1
    os.mkdir(os.path.join(Results_dir_path, today, str(file_number)))
    result_path = os.path.join(Results_dir_path, today, str(file_number))
    print(result_path)
    return result_path
def converting_nii_to_npz(file_name):
    file_path = load_data.find_path(file_name)
    nii_file = data_augmentation.load_obj(file_path)
    np.savez(file_path[0:len(file_path) - 7] + '.npz', masked_voxels=nii_file)
Beispiel #4
0
def preprocessing(train_data, test_data, train_labels, test_labels, method,
                  save, file_name, output_dir):

    dim0_train = train_data.shape[0]
    dim1_train = train_data.shape[1]
    dim2_train = train_data.shape[2]
    dim3_train = train_data.shape[3]

    dim0_test = test_data.shape[0]
    dim1_test = test_data.shape[1]
    dim2_test = test_data.shape[2]
    dim3_test = test_data.shape[3]

    if method == 0:
        return
    elif method == 1:
        train_data = train_data.reshape(dim0_train,
                                        dim1_train * dim2_train * dim3_train)
        test_data = test_data.reshape(dim0_test,
                                      dim1_test * dim2_test * dim3_test)
        train_data = data_preprocessing.MinMax_scaler(train_data)
        test_data = data_preprocessing.MinMax_scaler(test_data)
        train_data, test_data = data_preprocessing.standarization(
            train_data, test_data)
        train_data, test_data = data_preprocessing.KSTest(
            train_data, test_data, 800)
        train_data = train_data.reshape(dim0_train, dim1_train, dim2_train,
                                        dim3_train)
        test_data = test_data.reshape(dim0_test, dim1_test, dim2_test,
                                      dim3_test)

    elif method == 2:
        for i in range(train_data.shape[0]):
            for j in range(train_data.shape[3]):
                train_data[i, :, :, j] = data_preprocessing.standarization(
                    train_data[i, :, :, j])
                train_data[i, :, :, j] = data_preprocessing.MinMax_scaler(
                    train_data[i, :, :, j])

                if i < test_data.shape[0]:
                    test_data[i, :, :, j] = data_preprocessing.standarization(
                        test_data[i, :, :, j])

                    test_data[i, :, :, j] = data_preprocessing.MinMax_scaler(
                        test_data[i, :, :, j])

    elif method == 3:
        train_data = train_data.reshape(dim0_train,
                                        dim1_train * dim2_train * dim3_train)
        test_data = test_data.reshape(dim0_test,
                                      dim1_test * dim2_test * dim3_test)
        train_data, test_data = data_preprocessing.KSTest(
            train_data, test_data, 500)
        train_data = train_data.reshape(dim0_train, dim1_train, dim2_train,
                                        dim3_train)
        test_data = test_data.reshape(dim0_test, dim1_test, dim2_test,
                                      dim3_test)

    elif method == 4:
        train_data = train_data.reshape(dim0_train, dim1_train * dim2_train,
                                        dim3_train)
        test_data = test_data.reshape(dim0_test, dim1_test * dim2_test,
                                      dim3_test)
        for i in range(dim3_train):
            train_data[:, :,
                       i] = data_preprocessing.MinMax_scaler(train_data[:, :,
                                                                        i])
            train_data[:, :,
                       i] = data_preprocessing.standarization(train_data[:, :,
                                                                         i])

            test_data[:, :,
                      i] = data_preprocessing.MinMax_scaler(test_data[:, :, i])
            test_data[:, :,
                      i] = data_preprocessing.standarization(test_data[:, :,
                                                                       i])

            train_data[:, :,
                       i], test_data[:, :, i] = data_preprocessing.KSTest(
                           train_data[:, :, i], test_data[:, :, i], 800)
        train_data = train_data.reshape(dim0_train, dim1_train, dim2_train,
                                        dim3_train)
        test_data = test_data.reshape(dim0_test, dim1_test, dim2_test,
                                      dim3_test)

    elif method == 5:
        train_data = train_data.reshape(dim0_train,
                                        dim1_train * dim2_train * dim3_train)
        train_data, train_labels, index = data_preprocessing.outliers(
            train_data, train_labels, 1)
        train_data = train_data.reshape(dim0_train - np.size(index),
                                        dim1_train, dim2_train, dim3_train)
    if save == 0:

        return train_data, test_data, train_labels, test_labels
    else:

        transposing_order = [1, 3, 2, 0]
        train_data = data_preprocessing.transposnig(train_data,
                                                    transposing_order)
        test_data = data_preprocessing.transposnig(test_data,
                                                   transposing_order)
        output_path = load_data.find_path(output_dir)
        np.savez(output_path + file_name + 'train_data.npz',
                 masked_voxels=train_data)
        np.savez(output_path + file_name + 'test_data.npz',
                 masked_voxels=test_data)
Beispiel #5
0
def main():
    # define input file names, directories, and parmaeters
    train_Con_file_name = 'CV_con.npz'
    train_AD_file_name = 'CV_pat.npz'
    #test_Con_file_name = 'CV_ADNI_CON.npz'
    #test_AD_file_name = 'CV_ADNI_AD.npz'
    mask_name = '4mm_brain_mask_bin_epl.nii.gz'
    results_directory = 'Output_results_directory'
    results_path = load_data.find_path(results_directory)
    number_of_cv = 5
    feature_selection_type = 'L2_penality'
    Hyperparameter_model__1 = 1000
    Hyperparameter_model__3 = 1000
    number_of_neighbours = 1
    model_name = 'gaussian_process'

    # loading input data and mask
    train_data, train_labels = load_data.train_data_3d(train_Con_file_name,
                                                       train_AD_file_name)
    #test_data, test_labels = load_data.test_data_3d(test_Con_file_name, test_AD_file_name)
    mask_4mm = load_data.mask(mask_name)
    original_mask = mask_4mm.get_fdata()

    # data preprocessing
    train_data = np.moveaxis(train_data.copy(), 3, 0)
    #test_data = np.moveaxis(test_data.copy(), 3, 0)
    train_data = train_data * original_mask
    #test_data = test_data * original_mask
    shape = np.shape(train_data)
    train_data_flattened = data_preprocessing.flatten(train_data.copy())
    #test_data_flattened = data_preprocessing.flatten(test_data.copy())
    orignal_mask_flatten = data_preprocessing.flatten(
        original_mask[np.newaxis, :, :, :].copy())
    orignal_mask_flatten = np.reshape(orignal_mask_flatten, (-1))
    train_data_flattened = data_preprocessing.MinMax_scaler(
        train_data_flattened.copy())
    #test_data_flattened = data_preprocessing.MinMax_scaler(test_data_flattened.copy())
    # train_data_flattened, test_data_flattened=data_preprocessing.MinMax_scaler_correct(train_data_flattened, test_data_flattened)
    train_data_flattened, test_data_flattened, train_labels, test_labels = train_test_split(
        train_data_flattened, train_labels, test_size=.2, random_state=42)
    train_data_inlier, train_labels_inlier, outlier_indices_train = data_preprocessing.outliers(
        train_data_flattened, train_labels, number_of_neighbours)
    test_data_inlier, test_labels_inlier, outlier_indices_test = data_preprocessing.novelty(
        train_data_inlier, train_labels_inlier, test_data_flattened,
        test_labels, number_of_neighbours)

    train_data_inlier_unflattened = data_preprocessing.deflatten(
        train_data_inlier, shape)
    train_data_outlier_unflattened = data_preprocessing.deflatten(
        train_data_flattened[outlier_indices_train], shape)
    train_data_inlier_unflattened = np.moveaxis(
        train_data_inlier_unflattened.copy(), 0, 3)
    train_data_outlier_unflattened = np.moveaxis(
        train_data_outlier_unflattened.copy(), 0, 3)
    trian_labels_outliers = train_labels[outlier_indices_train]
    train_data_inlier_noised = data_preprocessing.apply_noise_manytypes(
        train_data_inlier_unflattened.copy())
    train_data_inlier_filtered = data_preprocessing.apply_filter_manytypes(
        train_data_inlier_unflattened.copy())
    train_data_inlier_more = data_preprocessing.concatination(
        train_data_inlier_noised, train_data_inlier_filtered)
    #train_labels_inlier_more = data_preprocessing.dublicate(train_labels_inlier.copy(), 29) #to match length of data
    train_data_outlier_noised = data_preprocessing.apply_noise_manytypes(
        train_data_outlier_unflattened.copy())
    train_data_outlier_filtered = data_preprocessing.apply_filter_manytypes(
        train_data_outlier_unflattened.copy())
    train_data_outlier_more = data_preprocessing.concatination(
        train_data_outlier_noised, train_data_outlier_filtered)
    train_labels_outlier_more = data_preprocessing.dublicate(
        trian_labels_outliers[:, np.newaxis].copy(),
        29)  #to match length of data
    train_data_inlier_more = np.moveaxis(train_data_inlier_more.copy(), 3, 0)
    train_data_outlier_more = np.moveaxis(train_data_outlier_more.copy(), 3, 0)
    train_data_outlier_more_flattened = data_preprocessing.flatten(
        train_data_outlier_more.copy())
    # train_data_inlier_more_flattened = data_preprocessing.flatten(train_data_inlier_more.copy()) #uncomment to use noised inliers
    # train_data_inlier_inlier, train_labels_inlier_inlier, inlier_outlier_indices_train = data_preprocessing.novelty(
    #     train_data_inlier, train_labels_inlier,
    #     train_data_inlier_more_flattened,
    #     train_labels_inlier_more,
    #     number_of_neighbours)
    train_data_outlier_inlier, train_labels_outlier_inlier, outlier_outlier_indices_train = data_preprocessing.novelty(
        train_data_flattened[outlier_indices_train],
        train_labels[outlier_indices_train], train_data_outlier_more_flattened,
        train_labels_outlier_more, number_of_neighbours)
    train_data_inlier, train_labels_inlier = data_preprocessing.upsampling(
        train_data_inlier, train_labels_inlier[:, np.newaxis])

    train_data_inlier, train_labels_inlier = data_preprocessing.shuffling(
        train_data_inlier, train_labels_inlier)
    train_data_outlier_inlier, train_labels_outlier_inlier = data_preprocessing.upsampling(
        train_data_outlier_inlier, train_labels_outlier_inlier)

    train_data_outlier_inlier, train_labels_outlier_inlier = data_preprocessing.shuffling(
        train_data_outlier_inlier, train_labels_outlier_inlier)

    #Brain extraction of data
    train_data_inlier_brain = train_data_inlier[:,
                                                np.squeeze(np.where(
                                                    orignal_mask_flatten > 0),
                                                           axis=0)]
    test_data_inlier_brain = test_data_inlier[:,
                                              np.squeeze(np.where(
                                                  orignal_mask_flatten > 0),
                                                         axis=0)]
    train_data_outlier_inlier_brain = train_data_outlier_inlier[:,
                                                                np.
                                                                squeeze(np.where(
                                                                    orignal_mask_flatten
                                                                    > 0),
                                                                        axis=0
                                                                        )]
    test_data_outlier_brain = (
        test_data_flattened[outlier_indices_test]
    )[:, np.squeeze(np.where(orignal_mask_flatten > 0), axis=0)]
    concated_data = data_preprocessing.concat(train_data_inlier,
                                              train_data_outlier_inlier)
    concated_labels = data_preprocessing.concat(
        train_labels_inlier[:, np.newaxis],
        train_labels_outlier_inlier[:, np.newaxis])
    #Model stage 1 with high certainity
    model1_created_mask, model1_, model1_name, model1_weights = create_mask(
        train_data_inlier_brain,
        train_labels_inlier,
        number_of_cv,
        feature_selection_type,
        Hyperparameter_model__1,
        mask_threshold=4,
        model_type=model_name)
    #train_data_inlier_CVspace = data_preprocessing.coefficient_of_variance(train_data_inlier_brain * model1_created_mask)[:,np.newaxis]
    #test_data_inlier_CVspace = data_preprocessing.coefficient_of_variance(test_data_inlier_brain * model1_created_mask)[:,np.newaxis]
    #train_data_inlier_CVspace = np.sum(train_data_inlier_brain * model1_created_mask, axis=1)[:,np.newaxis]
    #test_data_inlier_CVspace = np.sum(test_data_inlier_brain * model1_created_mask, axis=1)[:,np.newaxis]
    train_data_inlier_CVspace = (train_data_inlier_brain * model1_created_mask)
    test_data_inlier_CVspace = (test_data_inlier_brain * model1_created_mask)

    model1_, model1_name = model_reduced(train_data_inlier_CVspace,
                                         train_labels_inlier,
                                         model1_created_mask,
                                         data_validation=None,
                                         labels_validation=None,
                                         model_type='gaussian_process')
    model1_test_accuracy, model1_F1_score, model1_auc, low_confidence_indices = generate_result.out_result_highprob(
        test_data_inlier_CVspace, test_labels_inlier, original_mask,
        model1_created_mask, model1_)
    #Model stage 2 with low certainity
    model2_, model2_name = model_reduced(train_data_inlier_CVspace,
                                         train_labels_inlier,
                                         model1_created_mask,
                                         data_validation=None,
                                         labels_validation=None,
                                         model_type=model_name)
    model2_test_accuracy, model2_F1_score, model2_auc = generate_result.out_result(
        test_data_inlier_CVspace[low_confidence_indices],
        test_labels_inlier[low_confidence_indices], original_mask,
        model1_created_mask, model2_)
    #Model stage 3 with outliers
    model3_created_mask, model3_, model3_name, model3_weights = create_mask(
        concated_data,
        concated_labels,
        number_of_cv,
        feature_selection_type,
        Hyperparameter_model__3,
        mask_threshold=3,
        model_type=model_name)
    #concated_data_cv = data_preprocessing.coefficient_of_variance(
    #   concated_data[:,np.squeeze(np.where(orignal_mask_flatten>0),axis=0)].copy() * model3_created_mask[np.squeeze(np.where(orignal_mask_flatten > 0), axis=0)])[:, np.newaxis]
    #test_data_outlier_cv = data_preprocessing.coefficient_of_variance(
    #    test_data_outlier_brain *model3_created_mask[np.squeeze(np.where(orignal_mask_flatten > 0), axis=0)])[:, np.newaxis]
    #concated_data_cv = np.sum(
    #                          concated_data[:,np.squeeze(np.where(orignal_mask_flatten>0),axis=0)].copy() * model3_created_mask[np.squeeze(np.where(orignal_mask_flatten > 0), axis=0)], axis=1)[:, np.newaxis]
    #test_data_outlier_cv = np.sum(
    #                              test_data_outlier_brain *model3_created_mask[np.squeeze(np.where(orignal_mask_flatten > 0), axis=0)], axis=1)[:, np.newaxis]
    concated_data_cv = (
        concated_data[:,
                      np.squeeze(np.where(
                          orignal_mask_flatten > 0), axis=0)].copy() *
        model3_created_mask[np.squeeze(np.where(orignal_mask_flatten > 0),
                                       axis=0)])
    test_data_outlier_cv = (test_data_outlier_brain *
                            model3_created_mask[np.squeeze(
                                np.where(orignal_mask_flatten > 0), axis=0)])

    model3_, model3_name = model_reduced(concated_data_cv,
                                         concated_labels,
                                         model3_created_mask,
                                         data_validation=None,
                                         labels_validation=None,
                                         model_type=model_name)
    model3_test_accuracy, model3_F1_score, model3_auc = generate_result.out_result(
        np.nan_to_num(test_data_outlier_cv),
        np.nan_to_num(test_labels[outlier_indices_test]),
        np.nan_to_num(original_mask),
        np.nan_to_num(model3_created_mask[np.squeeze(
            np.where(orignal_mask_flatten > 0), axis=0)]), model3_)
    testnum = len(test_labels)
    highcernum = (len(test_labels_inlier) -
                  len(test_labels_inlier[low_confidence_indices])) / testnum
    lowcernum = (len(test_labels_inlier[low_confidence_indices])) / testnum
    outnum = (len(test_labels[outlier_indices_test])) / testnum

    data_preprocessing_method = "Seperating outlier of training set and test set, then synthethise more data from training-outliers, then appling probability predictions. High probability " \
                                "samples model is used with predictions with high probability, then apply low probability model. Finally add noise to outliers and concatinate with inlier data " \
                                "to be used for outlier model"
    generate_result.print_result_3models(
        mask_4mm, results_path,
        model3_created_mask[np.squeeze(np.where(orignal_mask_flatten > 0),
                                       axis=0)], model3_, model3_name,
        model3_weights[np.squeeze(np.where(orignal_mask_flatten > 0), axis=0)],
        model3_test_accuracy, model3_auc, model3_F1_score,
        Hyperparameter_model__3, model2_, model2_name, model2_test_accuracy,
        model2_auc, model2_F1_score, model1_, model1_created_mask, model1_name,
        model1_weights, model1_test_accuracy, model1_auc, model1_F1_score,
        Hyperparameter_model__1, feature_selection_type,
        data_preprocessing_method, highcernum, lowcernum, outnum)