Exemple #1
0
def load_residualized_bmi_data(cache):
    if not (cache):
        #SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "SNPs.csv"), dtype='float64', index_col=0).as_matrix()
        BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "BMI.csv"),
                                     index_col=0).as_matrix()
        # Images
        h5file = tables.openFile(IMAGES_FILE)
        masked_images = bmi_utils.read_array(
            h5file, "/standard_mask/residualized_images_gender_center_TIV_pds"
        )  #images already masked
        print "Data loaded, processing"

        X = masked_images[0:50, :]
        z = BMI[0:50]

        np.save(os.path.join(SHARED_DIR, "X.npy"), X)
        np.save(os.path.join(SHARED_DIR, "z.npy"), z)
        h5file.close()
        print "Data saved"
    else:
        X = np.load(os.path.join(SHARED_DIR, "X.npy"))
        z = np.load(os.path.join(SHARED_DIR, "z.npy"))
        print "Data read from cache"

    return X, z
Exemple #2
0
def load_data(cache):
    if not (cache):
        # SNPs
        SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'SNPs.csv'),
                                      dtype='float64',
                                      index_col=0).as_matrix()
        # BMI
        BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'),
                                     index_col=0).as_matrix()

        # Load images that have already been masked
        h5file = tables.openFile(IMAGES_FILE)
        masked_images = bmi_utils.read_array(
            h5file, '/standard_mask/residualized_images_gender_center_TIV_pds')
        print "Data loaded"

        X = masked_images
        Y = SNPs
        z = BMI
        np.save(os.path.join(SHARED_DIR, 'X.npy'), X)
        np.save(os.path.join(SHARED_DIR, 'Y.npy'), Y)
        np.save(os.path.join(SHARED_DIR, 'z.npy'), z)
        h5file.close()
        print "Data saved"
    else:
        X = np.load(os.path.join(SHARED_DIR, 'X.npy'))
        Y = np.load(os.path.join(SHARED_DIR, 'Y.npy'))
        z = np.load(os.path.join(SHARED_DIR, 'z.npy'))
        print "Data read from cache"
    return X, Y, z
Exemple #3
0
def load_data(cache):
    if not (cache):
        BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "BMI.csv"),
                                     index_col=0).as_matrix()

        # Dataframe
        COFOUND = [
            "Subject", "Gender de Feuil2", "ImagingCentreCity", "tiv_gaser",
            "mean_pds"
        ]
        df = pd.io.parsers.read_csv(os.path.join(CLINIC_DATA_PATH,
                                                 "1534bmi-vincent2.csv"),
                                    index_col=0)
        df = df[COFOUND]

        # Conversion dummy coding
        design_mat = utils.make_design_matrix(df,
                                              regressors=COFOUND).as_matrix()

        # Keep only subjects for which we have all data and remove the 1. column containing subject_id from the numpy array design_mat
        subjects_id = np.genfromtxt(os.path.join(DATA_PATH, "subjects_id.csv"),
                                    dtype=None,
                                    delimiter=',',
                                    skip_header=1)
        design_mat = np.delete(
            np.delete(
                design_mat,
                np.where(
                    np.in1d(
                        design_mat[:, 0],
                        np.delete(
                            design_mat,
                            np.where(np.in1d(design_mat[:, 0], subjects_id)),
                            0))), 0), 0, 1)

        # Images
        h5file = tables.openFile(IMAGES_FILE)
        masked_images = bmi_utils.read_array(
            h5file, "/standard_mask/residualized_images_gender_center_TIV_pds"
        )  #images already masked
        print "Data loaded"

        # Concatenate images with covariates gender, imaging city centrr, tiv_gaser and mean pds status in order to do as though BMI had been residualized
        X = np.concatenate((design_mat, masked_images), axis=1)
        z = BMI
        np.save(os.path.join(SHARED_DIR, "X.npy"), X)
        np.save(os.path.join(SHARED_DIR, "z.npy"), z)
        h5file.close()
        print "Data saved"
    else:
        X = np.load(os.path.join(SHARED_DIR, "X.npy"))
        z = np.load(os.path.join(SHARED_DIR, "z.npy"))
        print "Data read from cache"
    return X, z
def load_residualized_bmi_data(cache):
    if not (cache):
        # BMI
        BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'),
                                     index_col=0).as_matrix()

        # Dataframe
        COFOUND = [
            'Gender de Feuil2', 'ImagingCentreCity', 'tiv_gaser', 'mean_pds'
        ]

        df = pd.io.parsers.read_csv(os.path.join(CLINIC_DATA_PATH,
                                                 'population.csv'),
                                    index_col=0)
        df = df[COFOUND]

        # Keep only subjects for which we have all data
        subjects_id = np.genfromtxt(os.path.join(DATA_PATH, 'subjects_id.csv'),
                                    dtype=None,
                                    delimiter=',',
                                    skip_header=1)

        clinic_data = df.loc[subjects_id]

        # Conversion dummy coding
        covar = utils.make_design_matrix(clinic_data,
                                         regressors=COFOUND).as_matrix()

        # Concatenate BMI and covariates
        # (gender, imaging city centre, tiv_gaser and mean pds status)
        design_mat = np.hstack((covar, BMI))

        # Images
        h5file = tables.openFile(IMAGES_FILE)
        masked_images = bmi_utils.read_array(
            h5file, '/standard_mask/residualized_images_gender_center_TIV_pds')
        print "Images loaded"

        X = design_mat
        # Center & scale X
        skl = StandardScaler()
        X = skl.fit_transform(X)
        Y = masked_images

        np.save(os.path.join(SHARED_DIR, 'X.npy'), X)
        np.save(os.path.join(SHARED_DIR, 'Y.npy'), Y)
        h5file.close()
        print "Data saved"
    else:
        X = np.load(os.path.join(SHARED_DIR, 'X.npy'))
        Y = np.load(os.path.join(SHARED_DIR, 'Y.npy'))
        print "Data read from cache"
    return X, Y
Exemple #5
0
def load_residualized_bmi_data(cache):
    if not (cache):

        BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'),
                                     index_col=0).as_matrix()

        # Dataframe
        COFOUNDS = [
            'Gender de Feuil2', 'ImagingCentreCity', 'tiv_gaser', 'mean_pds'
        ]

        df = pd.io.parsers.read_csv(os.path.join(SHFJ_DATA_PATH,
                                                 '1534bmi-vincent2.csv'),
                                    index_col=0)
        df = df[COFOUNDS]

        # Keep only subjects for whom we have all neuroimaging and genetic data
        subjects_id = np.genfromtxt(os.path.join(DATA_PATH, 'subjects_id.csv'),
                                    dtype=None,
                                    delimiter=',',
                                    skip_header=1)

        clinic_data = df.loc[subjects_id]

        # Conversion dummy coding
        covar = utils.make_design_matrix(clinic_data,
                                         regressors=COFOUNDS).as_matrix()

        # Load images that have already been masked
        h5file = tables.openFile(IMAGES_FILE)
        masked_images = bmi_utils.read_array(
            h5file, '/standard_mask/residualized_images_gender_center_TIV_pds')
        print "Data loaded - Processing"

        # Concatenate images and covariates
        # (gender, imaging city centre, tiv_gaser and mean pds status)
        # in order to do as though BMI had been residualized
        X = np.hstack((covar, masked_images))
        z = BMI

        np.save(os.path.join(SHARED_DIR, 'X.npy'), X)
        np.save(os.path.join(SHARED_DIR, 'z.npy'), z)

        h5file.close()

        print "Data saved"
    else:
        X = np.load(os.path.join(SHARED_DIR, 'X.npy'))
        z = np.load(os.path.join(SHARED_DIR, 'z.npy'))
        print "Data read from cache"
    return X, z
Exemple #6
0
def load_residualized_bmi_data(cache):
    if not(cache):
#        SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'SNPs.csv'),
#                                      dtype='float64',
#                                      index_col=0).as_matrix()
        BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'),
                                     index_col=0).as_matrix()

        # Dataframe
        df = pd.io.parsers.read_csv(os.path.join(CLINIC_DATA_PATH,
                                                 'normal_group.csv'),
                                    index_col=0)

        COFOUND = ['Gender de Feuil2',
                   'ImagingCentreCity',
                   'tiv_gaser',
                   'mean_pds']

        df = df[COFOUND]

        # Conversion dummy coding
        covar = utils.make_design_matrix(df, regressors=COFOUND).as_matrix()

        # Images
        h5file = tables.openFile(IMAGES_FILE)
        images_file = bmi_utils.read_array(h5file,
                "/standard_mask/residualized_images_gender_center_TIV_pds")
                 #images already masked

        masked_images = images_file[???????????, :]

        print "Data loaded - Processing"

        z = BMI
        # Concatenate images and covariates
        # (gender, imaging city centre, tiv_gaser and mean pds status)
        # in order to do as though BMI had been residualized.
        X_res = np.hstack((covar, masked_images))

        np.save(os.path.join(SHARED_DIR, "X_res.npy"), X_res)
        np.save(os.path.join(SHARED_DIR, "z.npy"), z)

        h5file.close()
        print "Data saved"
    else:
        X_res = np.load(os.path.join(SHARED_DIR, "X_res.npy"))
        z = np.load(os.path.join(SHARED_DIR, "z.npy"))
        print "Data read from cache"
    return X_res, z
    return cmd


#############
# Read data #
#############
# SNPs and BMI
SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "SNPs.csv"),
                              dtype='float64',
                              index_col=0).as_matrix()
BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "BMI.csv"),
                             index_col=0).as_matrix()

# Images
h5file = tables.openFile(IMAGES_FILE)
masked_images = bmi_utils.read_array(
    h5file, "/standard_mask/residualized_images_gender_center_TIV_pds")
print "Data loaded"

X = masked_images
Y = SNPs
Z = BMI

np.save(os.path.join(SHARED_DIR, "X.npy"), X)
np.save(os.path.join(SHARED_DIR, "Y.npy"), Y)
np.save(os.path.join(SHARED_DIR, "Z.npy"), Z)

####################################
# Create cross-validation workflow #
#  & data                          #
####################################
jobs = []