def make_prior(subj_list, output_dir): """ Create label probability map and the mask based on the sebject ID. """ print 'Create a whole-fROI mask and several probabilistic map for' + \ ' each fROI.' db_dir = r'/nfs/t2/atlas/database' subj_num = len(subj_list) for i in range(subj_num): sid = subj_list[i] subj_dir = os.path.join(db_dir, sid, 'face-object') label_file = get_label_file(subj_dir) label_data = nib.load(label_file).get_data() img_header = nib.load(label_file).get_header() if not i: data_size = label_data.shape prob_data = np.zeros((data_size[0], data_size[1], data_size[2], 2)) temp = label_data.copy() temp[temp!=1] = 0 prob_data[..., 0] += temp temp = label_data.copy() temp[temp!=3] = 0 temp[temp!=0] = 1 prob_data[..., 1] += temp # calculate probability map prob_data = prob_data / subj_num mask_data = prob_data.sum(axis=3) mask_data[mask_data!=0] = 1 # save to file prob_file = os.path.join(output_dir, 'prob.nii.gz') mybase.save2nifti(prob_data, img_header, prob_file) mask_file = os.path.join(output_dir, 'mask.nii.gz') mybase.save2nifti(mask_data, img_header, mask_file) # return data return prob_data, mask_data
def clf_gen(para_idx, subj_list, zstat_data, roi_data, working_dir, roi_header): """ Train a new clfer based on the subject list and specified parameters. """ # configuration iter_num = 5 c_value = np.logspace(-3, 3, 10) gamma_value = np.logspace(-3, 3, 10) # set c and gamma c = c_value[para_idx/10] gamma = gamma_value[np.mod(para_idx, 10)] cv_idx = np.array_split(subj_list, iter_num) for i in range(iter_num): # divide subjects into train-set and test-set subj_train = list(cv_idx) subj_test = subj_train.pop(i) subj_train = np.concatenate(subj_train) # generate ROI mask and probabilioty map mask_data, prob_data = autoroi.make_priori(subj_train, zstat_data, roi_data) # generate training samples train_samples = [] train_labels = [] for subj_idx in subj_train: print 'Subject ' + str(subj_idx) marker, result = autoroi.segment(zstat_data[..., subj_idx]) subj_samples, subj_labels, parcel_id = autoroi.sample_extract( zstat_data[..., subj_idx], roi_data[..., subj_idx], mask_data, prob_data, marker, result) train_samples += subj_samples train_labels += subj_labels # train classifier scaler, svc = autoroi.roi_svc(train_samples, train_labels, kernel='rbf', c = c, gamma=gamma) # test test classifier performance on the test dataset test_subj_num = len(subj_test) auto_roi_data = np.zeros([91, 109, 91, test_subj_num]) subj_count = 0 for subj_idx in subj_test: marker, result = autoroi.segment(zstat_data[..., subj_idx]) subj_samples, subj_labels, parcel_id = autoroi.sample_extract( zstat_data[..., subj_idx], roi_data[..., subj_idx], mask_data, prob_data, marker, result) # remove all parcels except samples all_parcel_id = np.unique(marker) all_parcel_id = all_parcel_id.tolist() all_parcel_id.pop(0) for idx in all_parcel_id: if idx not in parcel_id: result[result==idx] = 0 subj_samples = scaler.transform(subj_samples) predict_labels = svc.predict(subj_samples) parcel_num = len(parcel_id) auto_res = auto_roi_data[..., subj_count] for idx in range(parcel_num): auto_res[result==parcel_id[idx]] = predict_labels[idx] subj_count += 1 # save predicted ROI roi_header.set_data_shape(auto_roi_data.shape) out_file_dir = os.path.join(working_dir, 'parameter_' + str(para_idx)) if not os.path.exists(out_file_dir): os.system('mkdir ' + out_file_dir) out_file_name = os.path.join(out_file_dir, 'test_CV_iter_' + str(i) + '.nii.gz') mybase.save2nifti(auto_roi_data, roi_header, out_file_name)
zstat_data = nib.load(zstat_file).get_data() roi_list = [1, 2, 3, 4] #out_dir = os.path.join(data_dir, 'peak_mask_1') #out_dir = os.path.join(gcss_dir, 'peak_mask_1') #out_dir = os.path.join(ma_dir, 'peak_mask_1') out_dir = os.path.join(group08_dir, 'predicted_files', 'peak_mask_1') for i in range(len(sessid)): ind_label = label_data[..., i].copy() ind_label = np.around(ind_label) ind_zstat = zstat_data[..., i].copy() peak_label = np.zeros(ind_label.shape) for roi in roi_list: tmp = ind_label.copy() tmp[tmp!=roi] = 0 tmp[tmp==roi] = 1 if not tmp.sum(): continue tmp_peak = np.zeros(tmp.shape) tmp_zstat = tmp * ind_zstat peak_coord = np.unravel_index(tmp_zstat.argmax(), tmp_zstat.shape) tmp_peak = imtool.cube_roi(tmp_peak, peak_coord[0], peak_coord[1], peak_coord[2], 1, roi) peak_label += tmp_peak * tmp out_file = os.path.join(out_dir, sessid[i] + '_atlas.nii.gz') mybase.save2nifti(peak_label, header, out_file)
def clf_validate(cv_idx, workingdir, zstat_data, roi_data, roi_header): """ Train a new clfer based on the tuned parameters. """ # get train- and test-dataset subj_num = 202 subj_list = np.arange(subj_num) cv_subj_list = np.array_split(subj_list, 5) train_subj = list(cv_subj_list) test_subj = train_subj.pop(cv_idx) train_subj = np.concatenate(train_subj) # get tuned SVC parameters cv_dir = os.path.join(workingdir, 'Outer_CV_' + str(cv_idx)) tuned_para_idx = get_tuned_parameter(cv_dir) # parameter configuration c_value = np.logspace(-3, 3, 10) gamma_value = np.logspace(-3, 3, 10) # set c and gamma c = c_value[tuned_para_idx/10] gamma = gamma_value[np.mod(tuned_para_idx, 10)] print 'Tuned parameters: ', print 'C - ' + str(c), print ' Gamma - ' + str(gamma) # -- start validation # generate ROI mask and probabilioty map mask_data, prob_data = autoroi.make_priori(train_subj, zstat_data, roi_data) # generate training samples train_samples = [] train_labels = [] for subj_idx in train_subj: print 'Subject ' + str(subj_idx) marker, result = autoroi.segment(zstat_data[..., subj_idx]) subj_samples, subj_labels, parcel_id = autoroi.sample_extract( zstat_data[..., subj_idx], roi_data[..., subj_idx], mask_data, prob_data, marker, result) train_samples += subj_samples train_labels += subj_labels # train classifier scaler, svc = autoroi.roi_svc(train_samples, train_labels, kernel='rbf', c = c, gamma=gamma) # test test classifier performance on the test dataset test_subj_num = len(test_subj) auto_roi_data = np.zeros([91, 109, 91, test_subj_num]) subj_count = 0 for subj_idx in test_subj: marker, result = autoroi.segment(zstat_data[..., subj_idx]) subj_samples, subj_labels, parcel_id = autoroi.sample_extract( zstat_data[..., subj_idx], roi_data[..., subj_idx], mask_data, prob_data, marker, result) # remove all parcels except samples all_parcel_id = np.unique(marker) all_parcel_id = all_parcel_id.tolist() all_parcel_id.pop(0) for idx in all_parcel_id: if idx not in parcel_id: result[result==idx] = 0 subj_samples = scaler.transform(subj_samples) predict_labels = svc.predict(subj_samples) parcel_num = len(parcel_id) auto_res = auto_roi_data[..., subj_count] for idx in range(parcel_num): auto_res[result==parcel_id[idx]] = predict_labels[idx] subj_count += 1 # save predicted ROI roi_header.set_data_shape(auto_roi_data.shape) out_file_name = os.path.join(cv_dir, 'auto_test_data.nii.gz') mybase.save2nifti(auto_roi_data, roi_header, out_file_name)
#mybase.save2nifti(pred_data, header, out_file) #start_num += sample_num # probability map smoothing and save to the nifti files prob_data = np.zeros((91, 109, 91, len(clf_classes))) for k in range(len(clf_classes)): prob_val = pred_prob[start_num:end_num, k] prob_data[..., k] = arlib.write2array(coords, prob_val) mask = np.sum(prob_data, axis=3) sm_prob_data = arlib.smooth_data(prob_data, 1) sm_pred_data = np.argmax(sm_prob_data, axis=3) sm_pred_data[sm_pred_data==2] = 3 sm_pred_data = sm_pred_data * mask out_file = os.path.join(pred_dir, test_sessid[subj_idx]+'_pred.nii.gz') mybase.save2nifti(sm_pred_data, header, out_file) start_num += sample_num print 'Mean CV score is %s'%(cv_score.mean()) print 'Mean FFA Dice is %s'%(ffa_dice.mean()) print 'Mean OFA Dice is %s'%(ofa_dice.mean()) ##-- effect of sample numbers (subjects number) #out_file = os.path.join(data_dir, 'subj_size_effect.txt') #f = open(out_file, 'wb') #f.write('subj_size,samples,label_1,label_3,score,ofa_dice,ffa_dice\n') ## subjects size range #subj_num = range(20, 160, 10) ## repeat number #repeat_num = 10 #