def fit_files_st(self, files_path_st, subjects_id_list_st, confounds_st, files_path, subjects_id_list, confounds, y, n_seeds, extra_var=[]): ''' Use a list of subject IDs and search for them in the path, grab the results per network. Same as fit_files() except that you can train and test on different set of data ''' if self.verbose: start = time.time() ### train subtypes self.st_crm = [] # for ii in [5,13]:#range(x.shape[1]): xw = [] for ii in range(n_seeds): print('Train seed ' + str(ii + 1)) if self.dynamic: [x_dyn, x_ref] = sbp_util.grab_rmap(subjects_id_list_st, files_path_st, ii, dynamic=self.dynamic) confounds_dyn = [] for jj in range(len(x_dyn)): confounds_dyn.append( (confounds_st[jj], ) * x_dyn[jj].shape[0]) confounds_dyn = np.vstack(confounds_dyn) x_dyn = np.vstack(x_dyn) else: x_ref = sbp_util.grab_rmap(subjects_id_list_st, files_path_st, ii, dynamic=self.dynamic) x_dyn = x_ref confounds_dyn = confounds_st del x_ref ## regress confounds crm = prediction.ConfoundsRm(confounds_dyn, x_dyn) ## extract subtypes st = subtypes.clusteringST() st.fit_network(crm.transform(confounds_dyn, x_dyn), nSubtypes=self.nSubtypes) # stage 2 st_s2 = subtypes.clusteringST() st_s2.fit_network(crm.transform(confounds_dyn, x_dyn), nSubtypes=self.nSubtypes_stage2) self.st_crm.append([crm, st, st_s2]) del x_dyn if self.verbose: print("Subtype extraction, Time elapsed: {}s)".format( int(time.time() - start)))
def fit(self, x_dyn, confounds_dyn, x, confounds, y, extra_var=[]): if self.verbose: start = time.time() ### train subtypes self.st_crm = [] for ii in range(x.shape[1]): crm = prediction.ConfoundsRm(confounds_dyn, x_dyn[:, ii, :]) # st st = subtypes.clusteringST() st.fit_network(crm.transform(confounds_dyn, x_dyn[:, ii, :]), nSubtypes=self.nSubtypes) # stage 2 st_s2 = subtypes.clusteringST() st_s2.fit_network(crm.transform(confounds_dyn, x_dyn[:, ii, :]), nSubtypes=self.nSubtypes_stage2) self.st_crm.append([crm, st, st_s2]) ### extract w values xw, xw2 = self.get_w(x, confounds) print('xw sub data', xw[0, :]) if self.verbose: print("Subtype extraction, Time elapsed: {}s)".format( int(time.time() - start))) ### Include extra covariates if len(extra_var) != 0: all_var = np.hstack((xw, extra_var)) all_var_s2 = np.hstack((xw2, extra_var)) else: all_var = xw all_var_s2 = xw2 ### prediction model if self.verbose: start = time.time() #self.tlp = TwoLevelsPrediction(self.verbose, stage1_model_type=self.stage1_model_type, gamma=self.gamma, # stage1_metric=self.stage1_metric, stage2_metric=self.stage2_metric, # s2_branches=self.s2_branches) self.tlp = TwoStagesPrediction( self.verbose, thresh_ratio=self.thresh_ratio, min_gamma=self.min_gamma, shuffle_test_split=self.shuffle_test_split, n_iter=self.n_iter, gamma_auto_adjust=self.gamma_auto_adjust, recurrent_modes=self.recurrent_modes) #self.tlp_recurrent = TwoStagesPrediction(self.verbose, thresh_ratio=self.thresh_ratio, min_gamma=self.min_gamma) self.tlp.fit(all_var, all_var_s2, y) # self.tlp_recurrent.fit_recurrent(all_var, all_var_s2, y) if self.verbose: print("Two Stages prediction, Time elapsed: {}s)".format( int(time.time() - start)))
def fit(self,x_dyn,confounds_dyn,x,confounds,y,extra_var=[]): if self.verbose: start = time.time() ### train subtypes self.st_crm = [] for ii in range(x.shape[1]): crm = prediction.ConfoundsRm(confounds_dyn,x_dyn[:,ii,:]) # st st=subtypes.clusteringST() st.fit_network(crm.transform(confounds_dyn,x_dyn[:,ii,:]),nSubtypes=self.nSubtypes) self.st_crm_s2.append([crm,st]) ### extract w values xw = self.get_w(x,confounds) print 'xw sub data',xw[0,:] if self.verbose: print("Subtype extraction, Time elapsed: {}s)".format(int(time.time() - start))) ### Include extra covariates if len(extra_var)!=0: all_var = np.hstack((xw,extra_var)) else: all_var = xw ### prediction model if self.verbose: start = time.time() tlp = TwoLevelsPrediction(self.verbose,stage1_model_type=self.stage1_model_type,gamma=self.gamma) tlp.fit(all_var,all_var,y) if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format(int(time.time() - start))) ### save parameters self.tlp = tlp
def fit_files_st(self,files_path_st,subjects_id_list_st,confounds_st,files_path,subjects_id_list,confounds,y,n_seeds,extra_var=[]): ''' Use a list of subject IDs and search for them in the path, grab the results per network. Same as fit_files() except that you can train and test on different set of data ''' if self.verbose: start = time.time() ### train subtypes self.st_crm = [] #for ii in [5,13]:#range(x.shape[1]): xw = [] for ii in range(n_seeds): print('Train seed '+str(ii+1)) if self.dynamic: [x_dyn,x_ref] = sbp_util.grab_rmap(subjects_id_list_st,files_path_st,ii,dynamic=self.dynamic) confounds_dyn = [] for jj in range(len(x_dyn)): confounds_dyn.append((confounds_st[jj],)*x_dyn[jj].shape[0]) confounds_dyn = np.vstack(confounds_dyn) x_dyn = np.vstack(x_dyn) else: x_ref = sbp_util.grab_rmap(subjects_id_list_st,files_path_st,ii,dynamic=self.dynamic) x_dyn = x_ref confounds_dyn = confounds_st del x_ref ## regress confounds crm = prediction.ConfoundsRm(confounds_dyn,x_dyn) ## extract subtypes st=subtypes.clusteringST() st.fit_network(crm.transform(confounds_dyn,x_dyn),nSubtypes=self.nSubtypes) self.st_crm.append([crm,st]) del x_dyn # compute the W xw = self.get_w_files(files_path,subjects_id_list,confounds) if self.verbose: print("Subtype extraction, Time elapsed: {}s)".format(int(time.time() - start))) ### Include extra covariates if len(extra_var)!=0: all_var = np.hstack((xw,extra_var)) else: all_var = xw ### prediction model if self.verbose: start = time.time() self.tlp = TwoLevelsPrediction(self.verbose,stage1_model_type=self.stage1_model_type,gamma=self.gamma) self.tlp.fit(all_var,all_var,y) if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format(int(time.time() - start)))
def fit(self, net_data_low_main, y, confounds, n_subtypes, n_subtypes_l1=3, flag_feature_select=True, extra_var=[], verbose=True): self.verbose = verbose ### regress confounds from the connectomes #net_data_low = net_data_low_main.copy() #cf_rm = prediction.ConfoundsRm(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2]))) #net_data_low_tmp = cf_rm.transform(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2]))) #net_data_low = net_data_low_tmp.reshape((net_data_low_tmp.shape[0],net_data_low.shape[1],net_data_low.shape[2])) self.scale_ref = net_data_low_main.mean(0).mean(1) #net_data_low = self.norm_subjects(net_data_low_main) self.cf_rm = prediction.ConfoundsRm(confounds, net_data_low_main) net_data_low = self.cf_rm.transform(confounds, net_data_low_main) #net_data_low += self.cf_rm.intercept() ### compute the subtypes if self.verbose: start = time.time() st_ = subtypes.clusteringST() st_.fit(net_data_low, n_subtypes_l1) xw = st_.transform(net_data_low) xw = np.nan_to_num(xw) print 'xw sub data', xw[0, :] self.st_l2 = subtypes.clusteringST() self.st_l2.fit(net_data_low, n_subtypes) xwl2 = self.st_l2.transform(net_data_low) xwl2 = np.nan_to_num(xwl2) #xwl2 = np.hstack((xwl2,confounds)) #xw = np.hstack((age_var,xw)) if self.verbose: print("Compute subtypes, Time elapsed: {}s)".format( int(time.time() - start))) ### feature selection if flag_feature_select: if verbose: start = time.time() contrast = np.hstack( ([0, 1], np.repeat(0, confounds.shape[1]))) #[0,1,0,0,0] x_ = np.vstack((np.ones_like(y), y, confounds.T)).T labels, regression_result = nsglm.session_glm(np.array(xw), x_) cont_results = nsglm.compute_contrast(labels, regression_result, contrast, contrast_type='t') pval = cont_results.p_value() results = smm.multipletests(pval, alpha=0.01, method='fdr_bh') w_select = np.where(results[0])[0] #w_select = w_select[np.argsort(pval[np.where(results[0])])] if len(w_select) < 10: w_select = np.argsort(pval)[:10] else: w_select = w_select[np.argsort(pval[np.where(results[0])])] else: # Cancel the selection w_select = np.where(xw[0, :] != 2)[0] #w_select = get_stable_w(xw[train_index,:],y_tmp[train_index],confounds[train_index,:],6) # Cancel the selection #w_select = np.where(results[0]!=-1)[0] #print("Feature selected: {})".format(w_select)) ### Include extra covariates if len(extra_var) != 0: all_var = np.hstack((xw[:, w_select], extra_var)) else: all_var = xw[:, w_select] if self.verbose: print("Feature selection, Time elapsed: {}s)".format( int(time.time() - start))) ### prediction model if self.verbose: start = time.time() tlp = TwoLevelsPrediction() tlp.fit(all_var, xwl2, y, model_type='svm', verbose=self.verbose) if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format( int(time.time() - start))) ### save parameters self.median_template = np.median(net_data_low, axis=0) self.st = st_ self.w_select = w_select self.tlp = tlp
def fit(self,net_data_low_main,y,confounds,n_subtypes,n_subtypes_l1=3,flag_feature_select=True,extra_var=[],verbose=True): self.verbose = verbose ### regress confounds from the connectomes #net_data_low = net_data_low_main.copy() #cf_rm = prediction.ConfoundsRm(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2]))) #net_data_low_tmp = cf_rm.transform(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2]))) #net_data_low = net_data_low_tmp.reshape((net_data_low_tmp.shape[0],net_data_low.shape[1],net_data_low.shape[2])) self.scale_ref = net_data_low_main.mean(0).mean(1) #net_data_low = self.norm_subjects(net_data_low_main) self.cf_rm = prediction.ConfoundsRm(confounds,net_data_low_main) net_data_low = self.cf_rm.transform(confounds,net_data_low_main) #net_data_low += self.cf_rm.intercept() ### compute the subtypes if self.verbose: start = time.time() st_ = subtypes.clusteringST() st_.fit_robust(net_data_low,n_subtypes_l1) xw = st_.transform(net_data_low) xw = np.nan_to_num(xw) self.st_l2 = subtypes.clusteringST() self.st_l2.fit_robust(net_data_low,n_subtypes) xwl2 = self.st_l2.transform(net_data_low) xwl2 = np.nan_to_num(xwl2) #xw = np.hstack((age_var,xw)) if self.verbose: print("Compute subtypes, Time elapsed: {}s)".format(int(time.time() - start))) ### feature selection if flag_feature_select: if verbose: start = time.time() contrast = np.hstack(([0,1],np.repeat(0,confounds.shape[1])))#[0,1,0,0,0] x_ = np.vstack((np.ones_like(y),y,confounds.T)).T labels, regression_result = nsglm.session_glm(np.array(xw),x_) cont_results = nsglm.compute_contrast(labels,regression_result, contrast,contrast_type='t') pval = cont_results.p_value() results = smm.multipletests(pval, alpha=0.01, method='fdr_bh') w_select = np.where(results[0])[0] #w_select = w_select[np.argsort(pval[np.where(results[0])])] if len(w_select)<10: w_select = np.argsort(pval)[:10] else: w_select = w_select[np.argsort(pval[np.where(results[0])])] else: # Cancel the selection w_select = np.where(xw[0,:]!=2)[0] #w_select = get_stable_w(xw[train_index,:],y_tmp[train_index],confounds[train_index,:],6) # Cancel the selection #w_select = np.where(results[0]!=-1)[0] #print("Feature selected: {})".format(w_select)) ### Include extra covariates if len(extra_var)!=0: all_var = np.hstack((xw[:,w_select],extra_var)) else: all_var = xw[:,w_select] if self.verbose: print("Feature selection, Time elapsed: {}s)".format(int(time.time() - start))) ### prediction model if self.verbose: start = time.time() tlp = TwoLevelsPrediction() tlp.fit(all_var,xwl2,y,model_type='svm',verbose=self.verbose) if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format(int(time.time() - start))) ### save parameters self.median_template = np.median(net_data_low,axis=0) self.st = st_ self.w_select = w_select self.tlp = tlp