def load_residualized_bmi_data(cache): if not (cache): #SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "SNPs.csv"), dtype='float64', index_col=0).as_matrix() BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "BMI.csv"), index_col=0).as_matrix() # Images h5file = tables.openFile(IMAGES_FILE) masked_images = bmi_utils.read_array( h5file, "/standard_mask/residualized_images_gender_center_TIV_pds" ) #images already masked print "Data loaded, processing" X = masked_images[0:50, :] z = BMI[0:50] np.save(os.path.join(SHARED_DIR, "X.npy"), X) np.save(os.path.join(SHARED_DIR, "z.npy"), z) h5file.close() print "Data saved" else: X = np.load(os.path.join(SHARED_DIR, "X.npy")) z = np.load(os.path.join(SHARED_DIR, "z.npy")) print "Data read from cache" return X, z
def load_data(cache): if not (cache): # SNPs SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'SNPs.csv'), dtype='float64', index_col=0).as_matrix() # BMI BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'), index_col=0).as_matrix() # Load images that have already been masked h5file = tables.openFile(IMAGES_FILE) masked_images = bmi_utils.read_array( h5file, '/standard_mask/residualized_images_gender_center_TIV_pds') print "Data loaded" X = masked_images Y = SNPs z = BMI np.save(os.path.join(SHARED_DIR, 'X.npy'), X) np.save(os.path.join(SHARED_DIR, 'Y.npy'), Y) np.save(os.path.join(SHARED_DIR, 'z.npy'), z) h5file.close() print "Data saved" else: X = np.load(os.path.join(SHARED_DIR, 'X.npy')) Y = np.load(os.path.join(SHARED_DIR, 'Y.npy')) z = np.load(os.path.join(SHARED_DIR, 'z.npy')) print "Data read from cache" return X, Y, z
def load_data(cache): if not (cache): BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "BMI.csv"), index_col=0).as_matrix() # Dataframe COFOUND = [ "Subject", "Gender de Feuil2", "ImagingCentreCity", "tiv_gaser", "mean_pds" ] df = pd.io.parsers.read_csv(os.path.join(CLINIC_DATA_PATH, "1534bmi-vincent2.csv"), index_col=0) df = df[COFOUND] # Conversion dummy coding design_mat = utils.make_design_matrix(df, regressors=COFOUND).as_matrix() # Keep only subjects for which we have all data and remove the 1. column containing subject_id from the numpy array design_mat subjects_id = np.genfromtxt(os.path.join(DATA_PATH, "subjects_id.csv"), dtype=None, delimiter=',', skip_header=1) design_mat = np.delete( np.delete( design_mat, np.where( np.in1d( design_mat[:, 0], np.delete( design_mat, np.where(np.in1d(design_mat[:, 0], subjects_id)), 0))), 0), 0, 1) # Images h5file = tables.openFile(IMAGES_FILE) masked_images = bmi_utils.read_array( h5file, "/standard_mask/residualized_images_gender_center_TIV_pds" ) #images already masked print "Data loaded" # Concatenate images with covariates gender, imaging city centrr, tiv_gaser and mean pds status in order to do as though BMI had been residualized X = np.concatenate((design_mat, masked_images), axis=1) z = BMI np.save(os.path.join(SHARED_DIR, "X.npy"), X) np.save(os.path.join(SHARED_DIR, "z.npy"), z) h5file.close() print "Data saved" else: X = np.load(os.path.join(SHARED_DIR, "X.npy")) z = np.load(os.path.join(SHARED_DIR, "z.npy")) print "Data read from cache" return X, z
def load_residualized_bmi_data(cache): if not (cache): # BMI BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'), index_col=0).as_matrix() # Dataframe COFOUND = [ 'Gender de Feuil2', 'ImagingCentreCity', 'tiv_gaser', 'mean_pds' ] df = pd.io.parsers.read_csv(os.path.join(CLINIC_DATA_PATH, 'population.csv'), index_col=0) df = df[COFOUND] # Keep only subjects for which we have all data subjects_id = np.genfromtxt(os.path.join(DATA_PATH, 'subjects_id.csv'), dtype=None, delimiter=',', skip_header=1) clinic_data = df.loc[subjects_id] # Conversion dummy coding covar = utils.make_design_matrix(clinic_data, regressors=COFOUND).as_matrix() # Concatenate BMI and covariates # (gender, imaging city centre, tiv_gaser and mean pds status) design_mat = np.hstack((covar, BMI)) # Images h5file = tables.openFile(IMAGES_FILE) masked_images = bmi_utils.read_array( h5file, '/standard_mask/residualized_images_gender_center_TIV_pds') print "Images loaded" X = design_mat # Center & scale X skl = StandardScaler() X = skl.fit_transform(X) Y = masked_images np.save(os.path.join(SHARED_DIR, 'X.npy'), X) np.save(os.path.join(SHARED_DIR, 'Y.npy'), Y) h5file.close() print "Data saved" else: X = np.load(os.path.join(SHARED_DIR, 'X.npy')) Y = np.load(os.path.join(SHARED_DIR, 'Y.npy')) print "Data read from cache" return X, Y
def load_residualized_bmi_data(cache): if not (cache): BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'), index_col=0).as_matrix() # Dataframe COFOUNDS = [ 'Gender de Feuil2', 'ImagingCentreCity', 'tiv_gaser', 'mean_pds' ] df = pd.io.parsers.read_csv(os.path.join(SHFJ_DATA_PATH, '1534bmi-vincent2.csv'), index_col=0) df = df[COFOUNDS] # Keep only subjects for whom we have all neuroimaging and genetic data subjects_id = np.genfromtxt(os.path.join(DATA_PATH, 'subjects_id.csv'), dtype=None, delimiter=',', skip_header=1) clinic_data = df.loc[subjects_id] # Conversion dummy coding covar = utils.make_design_matrix(clinic_data, regressors=COFOUNDS).as_matrix() # Load images that have already been masked h5file = tables.openFile(IMAGES_FILE) masked_images = bmi_utils.read_array( h5file, '/standard_mask/residualized_images_gender_center_TIV_pds') print "Data loaded - Processing" # Concatenate images and covariates # (gender, imaging city centre, tiv_gaser and mean pds status) # in order to do as though BMI had been residualized X = np.hstack((covar, masked_images)) z = BMI np.save(os.path.join(SHARED_DIR, 'X.npy'), X) np.save(os.path.join(SHARED_DIR, 'z.npy'), z) h5file.close() print "Data saved" else: X = np.load(os.path.join(SHARED_DIR, 'X.npy')) z = np.load(os.path.join(SHARED_DIR, 'z.npy')) print "Data read from cache" return X, z
def load_residualized_bmi_data(cache): if not(cache): # SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'SNPs.csv'), # dtype='float64', # index_col=0).as_matrix() BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, 'BMI.csv'), index_col=0).as_matrix() # Dataframe df = pd.io.parsers.read_csv(os.path.join(CLINIC_DATA_PATH, 'normal_group.csv'), index_col=0) COFOUND = ['Gender de Feuil2', 'ImagingCentreCity', 'tiv_gaser', 'mean_pds'] df = df[COFOUND] # Conversion dummy coding covar = utils.make_design_matrix(df, regressors=COFOUND).as_matrix() # Images h5file = tables.openFile(IMAGES_FILE) images_file = bmi_utils.read_array(h5file, "/standard_mask/residualized_images_gender_center_TIV_pds") #images already masked masked_images = images_file[???????????, :] print "Data loaded - Processing" z = BMI # Concatenate images and covariates # (gender, imaging city centre, tiv_gaser and mean pds status) # in order to do as though BMI had been residualized. X_res = np.hstack((covar, masked_images)) np.save(os.path.join(SHARED_DIR, "X_res.npy"), X_res) np.save(os.path.join(SHARED_DIR, "z.npy"), z) h5file.close() print "Data saved" else: X_res = np.load(os.path.join(SHARED_DIR, "X_res.npy")) z = np.load(os.path.join(SHARED_DIR, "z.npy")) print "Data read from cache" return X_res, z
return cmd ############# # Read data # ############# # SNPs and BMI SNPs = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "SNPs.csv"), dtype='float64', index_col=0).as_matrix() BMI = pd.io.parsers.read_csv(os.path.join(DATA_PATH, "BMI.csv"), index_col=0).as_matrix() # Images h5file = tables.openFile(IMAGES_FILE) masked_images = bmi_utils.read_array( h5file, "/standard_mask/residualized_images_gender_center_TIV_pds") print "Data loaded" X = masked_images Y = SNPs Z = BMI np.save(os.path.join(SHARED_DIR, "X.npy"), X) np.save(os.path.join(SHARED_DIR, "Y.npy"), Y) np.save(os.path.join(SHARED_DIR, "Z.npy"), Z) #################################### # Create cross-validation workflow # # & data # #################################### jobs = []