def clf_gen(para_idx, subj_list, zstat_data, roi_data, working_dir, roi_header):
    """
    Train a new clfer based on the subject list and specified parameters.

    """
    # configuration
    iter_num = 5
    c_value = np.logspace(-3, 3, 10)
    gamma_value = np.logspace(-3, 3, 10)
    # set c and gamma
    c = c_value[para_idx/10]
    gamma = gamma_value[np.mod(para_idx, 10)]
    cv_idx = np.array_split(subj_list, iter_num)
    for i in range(iter_num):
        # divide subjects into train-set and test-set
        subj_train = list(cv_idx)
        subj_test = subj_train.pop(i)
        subj_train = np.concatenate(subj_train)
        # generate ROI mask and probabilioty map
        mask_data, prob_data = autoroi.make_priori(subj_train, zstat_data, roi_data)
        # generate training samples
        train_samples = []
        train_labels = []
        for subj_idx in subj_train:
            print 'Subject ' + str(subj_idx)
            marker, result = autoroi.segment(zstat_data[..., subj_idx])
            subj_samples, subj_labels, parcel_id = autoroi.sample_extract(
                                                    zstat_data[..., subj_idx],
                                                    roi_data[..., subj_idx],
                                                    mask_data,
                                                    prob_data,
                                                    marker,
                                                    result)
            train_samples += subj_samples
            train_labels += subj_labels
        # train classifier
        scaler, svc = autoroi.roi_svc(train_samples, train_labels, kernel='rbf', 
                                      c = c, gamma=gamma)
        # test test classifier performance on the test dataset
        test_subj_num = len(subj_test)
        auto_roi_data = np.zeros([91, 109, 91, test_subj_num])
        subj_count = 0
        for subj_idx in subj_test:
            marker, result = autoroi.segment(zstat_data[..., subj_idx])
            subj_samples, subj_labels, parcel_id = autoroi.sample_extract(
                                                    zstat_data[..., subj_idx],
                                                    roi_data[..., subj_idx],
                                                    mask_data,
                                                    prob_data,
                                                    marker,
                                                    result)
            # remove all parcels except samples
            all_parcel_id = np.unique(marker)
            all_parcel_id = all_parcel_id.tolist()
            all_parcel_id.pop(0)
            for idx in all_parcel_id:
                if idx not in parcel_id:
                    result[result==idx] = 0
            subj_samples = scaler.transform(subj_samples)
            predict_labels = svc.predict(subj_samples)
            parcel_num = len(parcel_id)
            auto_res = auto_roi_data[..., subj_count]
            for idx in range(parcel_num):
                auto_res[result==parcel_id[idx]] = predict_labels[idx]
            subj_count += 1
        # save predicted ROI
        roi_header.set_data_shape(auto_roi_data.shape)
        out_file_dir = os.path.join(working_dir, 'parameter_' + str(para_idx))
        if not os.path.exists(out_file_dir):
            os.system('mkdir ' + out_file_dir)
        out_file_name = os.path.join(out_file_dir, 'test_CV_iter_' + str(i) + '.nii.gz')
        mybase.save2nifti(auto_roi_data, roi_header, out_file_name)
def clf_validate(cv_idx, workingdir, zstat_data, roi_data, roi_header):
    """
    Train a new clfer based on the tuned parameters.

    """
    # get train- and test-dataset
    subj_num = 202
    subj_list = np.arange(subj_num)
    cv_subj_list = np.array_split(subj_list, 5)
    train_subj = list(cv_subj_list)
    test_subj = train_subj.pop(cv_idx)
    train_subj = np.concatenate(train_subj)
    # get tuned SVC parameters
    cv_dir = os.path.join(workingdir, 'Outer_CV_' + str(cv_idx))
    tuned_para_idx = get_tuned_parameter(cv_dir)
    # parameter configuration
    c_value = np.logspace(-3, 3, 10)
    gamma_value = np.logspace(-3, 3, 10)
    # set c and gamma
    c = c_value[tuned_para_idx/10]
    gamma = gamma_value[np.mod(tuned_para_idx, 10)]
    print 'Tuned parameters: ',
    print 'C - ' + str(c),
    print ' Gamma - ' + str(gamma)
    # -- start validation
    # generate ROI mask and probabilioty map
    mask_data, prob_data = autoroi.make_priori(train_subj, zstat_data, roi_data)
    # generate training samples
    train_samples = []
    train_labels = []
    for subj_idx in train_subj:
        print 'Subject ' + str(subj_idx)
        marker, result = autoroi.segment(zstat_data[..., subj_idx])
        subj_samples, subj_labels, parcel_id = autoroi.sample_extract(
                                                    zstat_data[..., subj_idx],
                                                    roi_data[..., subj_idx],
                                                    mask_data,
                                                    prob_data,
                                                    marker,
                                                    result)
        train_samples += subj_samples
        train_labels += subj_labels
    # train classifier
    scaler, svc = autoroi.roi_svc(train_samples, train_labels, kernel='rbf', 
                                      c = c, gamma=gamma)
    # test test classifier performance on the test dataset
    test_subj_num = len(test_subj)
    auto_roi_data = np.zeros([91, 109, 91, test_subj_num])
    subj_count = 0
    for subj_idx in test_subj:
        marker, result = autoroi.segment(zstat_data[..., subj_idx])
        subj_samples, subj_labels, parcel_id = autoroi.sample_extract(
                                                    zstat_data[..., subj_idx],
                                                    roi_data[..., subj_idx],
                                                    mask_data,
                                                    prob_data,
                                                    marker,
                                                    result)
        # remove all parcels except samples
        all_parcel_id = np.unique(marker)
        all_parcel_id = all_parcel_id.tolist()
        all_parcel_id.pop(0)
        for idx in all_parcel_id:
            if idx not in parcel_id:
                result[result==idx] = 0
        subj_samples = scaler.transform(subj_samples)
        predict_labels = svc.predict(subj_samples)
        parcel_num = len(parcel_id)
        auto_res = auto_roi_data[..., subj_count]
        for idx in range(parcel_num):
            auto_res[result==parcel_id[idx]] = predict_labels[idx]
        subj_count += 1
    # save predicted ROI
    roi_header.set_data_shape(auto_roi_data.shape)
    out_file_name = os.path.join(cv_dir, 'auto_test_data.nii.gz')
    mybase.save2nifti(auto_roi_data, roi_header, out_file_name)