Ejemplo n.º 1
0
def dump_in_hdf5(db_path, output, title):
    # TODO: use the number of subjects as the number of records of the table
    images_dir = data_api.get_images_dir_path(db_path)
    clinic_filename = data_api.get_clinic_file_path(db_path)
    # Open the output file
    h5file = tables.openFile(output, mode="w", title=title)

    # Open the clinic file
    csv_fd = open(clinic_filename)
    data = pandas.io.parsers.read_csv(csv_fd)
    n_subjects = data.shape[0]

    # Load mask
    mask_filename = data_api.get_mask_file_path(db_path)
    print "Loading mask {mask_filename}".format(mask_filename=mask_filename)
    babel_mask = nibabel.load(mask_filename)
    mask = babel_mask.get_data()
    binary_mask = mask != 0
    useful_voxels = numpy.ravel_multi_index(numpy.where(binary_mask),
                                            mask.shape)
    n_useful_voxels = len(useful_voxels)
    print "Mask loaded ({n_useful_voxels} useful voxels per image)".format(
        n_useful_voxels=n_useful_voxels)

    # Load grey matter images (X), apply mask and concatenate them
    print "Loading {n_images} images, apply mask and flatten".format(
        n_images=n_subjects)
    image_filenames = [
        os.path.join(images_dir, 'smwc1' + filename)
        for filename in data.Images
    ]
    masked_images = numpy.zeros((n_subjects, n_useful_voxels))
    for (index, filename) in enumerate(image_filenames):
        # Load (as numpy array)
        image = nibabel.load(filename).get_data()
        # Apply mask (returns a flat image)
        masked_image = image[binary_mask]
        # Store in X
        masked_images[index, :] = masked_image

    # Store data
    data_api.write_images(h5file, masked_images)

    h5file.close()
    N_FOLDS_NESTED = 3
    N_FOLDS_EVAL = 3
else:
    N_FOLDS_NESTED = 5
    N_FOLDS_EVAL = 10

OUT_DIR = os.path.join(DB_PATH, 'results', 'svm_feature_selection/')
if not os.path.exists(OUT_DIR):
    os.makedirs(OUT_DIR)
WF_NAME_PATTERN = "svm_feature_selection_{images}"

#########################
# Oth step: access data #
#########################

csv_file_name = data_api.get_clinic_file_path(DB_PATH)
df = data_api.read_clinic_file(csv_file_name)

babel_mask = nibabel.load(data_api.get_mask_file_path(DB_PATH))
mask = babel_mask.get_data()
binary_mask = mask != 0

h5file = tables.openFile(LOCAL_PATH)

####################
# Create workflows #
####################

# Base workflow: SVM + feature selection
svms = pipelines = epac.Methods(*[
    epac.Pipe(
Ejemplo n.º 3
0
                    type=str,
                    default=DEFAULT_WF_NAME,
                    help='Name of the workflow (default: %s)' %
                    (DEFAULT_WF_NAME))

args = parser.parse_args()

if TEST_MODE:
    DB_PATH = '/volatile/DB/micro_subdepression/'
    LOCAL_PATH = '/volatile/DB/cache/micro_subdepression.hdf5'
else:
    DB_PATH = '/neurospin/brainomics/2013_imagen_subdepression'
    LOCAL_PATH = '/volatile/DB/cache/imagen_subdepression.hdf5'

# Compute a priori probabilities
clinic_file_path = data_api.get_clinic_file_path(DB_PATH)
df = data_api.read_clinic_file(clinic_file_path)
N_SUBJECTS = float(df.shape[0])
counts = df['group_sub_ctl'].value_counts()
N_CONTROL = float(counts['control'])
P_CONTROL = N_CONTROL / N_SUBJECTS
N_SUBDEP = float(counts['sub'])
P_SUBDEP = N_SUBDEP / N_SUBJECTS

OUT_DIR = os.path.join(DB_PATH, 'results', 'svm')
WORKFLOW_PATH = os.path.join(OUT_DIR, args.wf_name)
if not os.path.exists(WORKFLOW_PATH):
    raise Exception('{path} not found'.format(path=WORKFLOW_PATH))

svms_auto_cv = epac.map_reduce.engine.SomaWorkflowEngine.load_from_gui(
    WORKFLOW_PATH)