N_FOLDS_EVAL = 3
else:
    N_FOLDS_NESTED = 5
    N_FOLDS_EVAL = 10

OUT_DIR = os.path.join(DB_PATH, 'results', 'svm_feature_selection/')
if not os.path.exists(OUT_DIR):
    os.makedirs(OUT_DIR)
WF_NAME_PATTERN = "svm_feature_selection_{images}"

#########################
# Oth step: access data #
#########################

csv_file_name = data_api.get_clinic_file_path(DB_PATH)
df = data_api.read_clinic_file(csv_file_name)

babel_mask = nibabel.load(data_api.get_mask_file_path(DB_PATH))
mask = babel_mask.get_data()
binary_mask = mask != 0

h5file = tables.openFile(LOCAL_PATH)

####################
# Create workflows #
####################

# Base workflow: SVM + feature selection
svms = pipelines = epac.Methods(*[
    epac.Pipe(
        sklearn.feature_selection.SelectKBest(
예제 #2
0
                    default=DEFAULT_WF_NAME,
                    help='Name of the workflow (default: %s)' %
                    (DEFAULT_WF_NAME))

args = parser.parse_args()

if TEST_MODE:
    DB_PATH = '/volatile/DB/micro_subdepression/'
    LOCAL_PATH = '/volatile/DB/cache/micro_subdepression.hdf5'
else:
    DB_PATH = '/neurospin/brainomics/2013_imagen_subdepression'
    LOCAL_PATH = '/volatile/DB/cache/imagen_subdepression.hdf5'

# Compute a priori probabilities
clinic_file_path = data_api.get_clinic_file_path(DB_PATH)
df = data_api.read_clinic_file(clinic_file_path)
N_SUBJECTS = float(df.shape[0])
counts = df['group_sub_ctl'].value_counts()
N_CONTROL = float(counts['control'])
P_CONTROL = N_CONTROL / N_SUBJECTS
N_SUBDEP = float(counts['sub'])
P_SUBDEP = N_SUBDEP / N_SUBJECTS

OUT_DIR = os.path.join(DB_PATH, 'results', 'svm')
WORKFLOW_PATH = os.path.join(OUT_DIR, args.wf_name)
if not os.path.exists(WORKFLOW_PATH):
    raise Exception('{path} not found'.format(path=WORKFLOW_PATH))

svms_auto_cv = epac.map_reduce.engine.SomaWorkflowEngine.load_from_gui(
    WORKFLOW_PATH)
print "Workflow loaded"