def dump_in_hdf5(db_path, output, title): # TODO: use the number of subjects as the number of records of the table images_dir = data_api.get_images_dir_path(db_path) clinic_filename = data_api.get_clinic_file_path(db_path) # Open the output file h5file = tables.openFile(output, mode="w", title=title) # Open the clinic file csv_fd = open(clinic_filename) data = pandas.io.parsers.read_csv(csv_fd) n_subjects = data.shape[0] # Load mask mask_filename = data_api.get_mask_file_path(db_path) print "Loading mask {mask_filename}".format(mask_filename=mask_filename) babel_mask = nibabel.load(mask_filename) mask = babel_mask.get_data() binary_mask = mask != 0 useful_voxels = numpy.ravel_multi_index(numpy.where(binary_mask), mask.shape) n_useful_voxels = len(useful_voxels) print "Mask loaded ({n_useful_voxels} useful voxels per image)".format( n_useful_voxels=n_useful_voxels) # Load grey matter images (X), apply mask and concatenate them print "Loading {n_images} images, apply mask and flatten".format( n_images=n_subjects) image_filenames = [ os.path.join(images_dir, 'smwc1' + filename) for filename in data.Images ] masked_images = numpy.zeros((n_subjects, n_useful_voxels)) for (index, filename) in enumerate(image_filenames): # Load (as numpy array) image = nibabel.load(filename).get_data() # Apply mask (returns a flat image) masked_image = image[binary_mask] # Store in X masked_images[index, :] = masked_image # Store data data_api.write_images(h5file, masked_images) h5file.close()
N_FOLDS_NESTED = 5 N_FOLDS_EVAL = 10 OUT_DIR = os.path.join(DB_PATH, 'results', 'svm_feature_selection/') if not os.path.exists(OUT_DIR): os.makedirs(OUT_DIR) WF_NAME_PATTERN = "svm_feature_selection_{images}" ######################### # Oth step: access data # ######################### csv_file_name = data_api.get_clinic_file_path(DB_PATH) df = data_api.read_clinic_file(csv_file_name) babel_mask = nibabel.load(data_api.get_mask_file_path(DB_PATH)) mask = babel_mask.get_data() binary_mask = mask != 0 h5file = tables.openFile(LOCAL_PATH) #################### # Create workflows # #################### # Base workflow: SVM + feature selection svms = pipelines = epac.Methods(*[ epac.Pipe( sklearn.feature_selection.SelectKBest( k=k), sklearn.preprocessing.StandardScaler(), epac.Methods(*[