Ejemplo n.º 1
0
    def fit_files_st(self,
                     files_path_st,
                     subjects_id_list_st,
                     confounds_st,
                     files_path,
                     subjects_id_list,
                     confounds,
                     y,
                     n_seeds,
                     extra_var=[]):
        '''
        Use a list of subject IDs and search for them in the path, grab the results per network.
        Same as fit_files() except that you can train and test on different set of data
        '''
        if self.verbose: start = time.time()
        ### train subtypes
        self.st_crm = []
        # for ii in [5,13]:#range(x.shape[1]):
        xw = []
        for ii in range(n_seeds):
            print('Train seed ' + str(ii + 1))
            if self.dynamic:
                [x_dyn, x_ref] = sbp_util.grab_rmap(subjects_id_list_st,
                                                    files_path_st,
                                                    ii,
                                                    dynamic=self.dynamic)
                confounds_dyn = []
                for jj in range(len(x_dyn)):
                    confounds_dyn.append(
                        (confounds_st[jj], ) * x_dyn[jj].shape[0])
                confounds_dyn = np.vstack(confounds_dyn)
                x_dyn = np.vstack(x_dyn)
            else:
                x_ref = sbp_util.grab_rmap(subjects_id_list_st,
                                           files_path_st,
                                           ii,
                                           dynamic=self.dynamic)
                x_dyn = x_ref
                confounds_dyn = confounds_st

            del x_ref
            ## regress confounds
            crm = prediction.ConfoundsRm(confounds_dyn, x_dyn)
            ## extract subtypes
            st = subtypes.clusteringST()
            st.fit_network(crm.transform(confounds_dyn, x_dyn),
                           nSubtypes=self.nSubtypes)
            # stage 2
            st_s2 = subtypes.clusteringST()
            st_s2.fit_network(crm.transform(confounds_dyn, x_dyn),
                              nSubtypes=self.nSubtypes_stage2)
            self.st_crm.append([crm, st, st_s2])
            del x_dyn

        if self.verbose:
            print("Subtype extraction, Time elapsed: {}s)".format(
                int(time.time() - start)))
Ejemplo n.º 2
0
    def fit(self, x_dyn, confounds_dyn, x, confounds, y, extra_var=[]):

        if self.verbose: start = time.time()
        ### train subtypes
        self.st_crm = []
        for ii in range(x.shape[1]):
            crm = prediction.ConfoundsRm(confounds_dyn, x_dyn[:, ii, :])
            # st
            st = subtypes.clusteringST()
            st.fit_network(crm.transform(confounds_dyn, x_dyn[:, ii, :]),
                           nSubtypes=self.nSubtypes)
            # stage 2
            st_s2 = subtypes.clusteringST()
            st_s2.fit_network(crm.transform(confounds_dyn, x_dyn[:, ii, :]),
                              nSubtypes=self.nSubtypes_stage2)
            self.st_crm.append([crm, st, st_s2])

        ### extract w values
        xw, xw2 = self.get_w(x, confounds)
        print('xw sub data', xw[0, :])
        if self.verbose:
            print("Subtype extraction, Time elapsed: {}s)".format(
                int(time.time() - start)))

        ### Include extra covariates
        if len(extra_var) != 0:
            all_var = np.hstack((xw, extra_var))
            all_var_s2 = np.hstack((xw2, extra_var))
        else:
            all_var = xw
            all_var_s2 = xw2

        ### prediction model
        if self.verbose: start = time.time()
        #self.tlp = TwoLevelsPrediction(self.verbose, stage1_model_type=self.stage1_model_type, gamma=self.gamma,
        #                               stage1_metric=self.stage1_metric, stage2_metric=self.stage2_metric,
        #                               s2_branches=self.s2_branches)
        self.tlp = TwoStagesPrediction(
            self.verbose,
            thresh_ratio=self.thresh_ratio,
            min_gamma=self.min_gamma,
            shuffle_test_split=self.shuffle_test_split,
            n_iter=self.n_iter,
            gamma_auto_adjust=self.gamma_auto_adjust,
            recurrent_modes=self.recurrent_modes)
        #self.tlp_recurrent = TwoStagesPrediction(self.verbose, thresh_ratio=self.thresh_ratio, min_gamma=self.min_gamma)
        self.tlp.fit(all_var, all_var_s2, y)
        # self.tlp_recurrent.fit_recurrent(all_var, all_var_s2, y)
        if self.verbose:
            print("Two Stages prediction, Time elapsed: {}s)".format(
                int(time.time() - start)))
Ejemplo n.º 3
0
    def fit(self,x_dyn,confounds_dyn,x,confounds,y,extra_var=[]):

        if self.verbose: start = time.time()
        ### train subtypes
        self.st_crm = []
        for ii in range(x.shape[1]):
            crm = prediction.ConfoundsRm(confounds_dyn,x_dyn[:,ii,:])
            # st
            st=subtypes.clusteringST()
            st.fit_network(crm.transform(confounds_dyn,x_dyn[:,ii,:]),nSubtypes=self.nSubtypes)
            self.st_crm_s2.append([crm,st])

        ### extract w values
        xw = self.get_w(x,confounds)
        print 'xw sub data',xw[0,:]
        if self.verbose: print("Subtype extraction, Time elapsed: {}s)".format(int(time.time() - start)))

        ### Include extra covariates
        if len(extra_var)!=0:
            all_var = np.hstack((xw,extra_var))
        else:
            all_var = xw

        ### prediction model
        if self.verbose: start = time.time()
        tlp = TwoLevelsPrediction(self.verbose,stage1_model_type=self.stage1_model_type,gamma=self.gamma)
        tlp.fit(all_var,all_var,y)
        if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format(int(time.time() - start)))

        ### save parameters
        self.tlp = tlp
Ejemplo n.º 4
0
    def fit_files_st(self,files_path_st,subjects_id_list_st,confounds_st,files_path,subjects_id_list,confounds,y,n_seeds,extra_var=[]):
        '''
        Use a list of subject IDs and search for them in the path, grab the results per network.
        Same as fit_files() except that you can train and test on different set of data
        '''
        if self.verbose: start = time.time()
        ### train subtypes
        self.st_crm = []
        #for ii in [5,13]:#range(x.shape[1]):
        xw = []
        for ii in range(n_seeds):
            print('Train seed '+str(ii+1))
            if self.dynamic:
                [x_dyn,x_ref] = sbp_util.grab_rmap(subjects_id_list_st,files_path_st,ii,dynamic=self.dynamic)
                confounds_dyn = []
                for jj in range(len(x_dyn)):
                    confounds_dyn.append((confounds_st[jj],)*x_dyn[jj].shape[0])
                confounds_dyn = np.vstack(confounds_dyn)
                x_dyn = np.vstack(x_dyn)
            else:
                x_ref = sbp_util.grab_rmap(subjects_id_list_st,files_path_st,ii,dynamic=self.dynamic)
                x_dyn = x_ref
                confounds_dyn = confounds_st

            del x_ref
            ## regress confounds
            crm = prediction.ConfoundsRm(confounds_dyn,x_dyn)
            ## extract subtypes
            st=subtypes.clusteringST()
            st.fit_network(crm.transform(confounds_dyn,x_dyn),nSubtypes=self.nSubtypes)
            self.st_crm.append([crm,st])
            del x_dyn

        # compute the W
        xw = self.get_w_files(files_path,subjects_id_list,confounds)
        if self.verbose: print("Subtype extraction, Time elapsed: {}s)".format(int(time.time() - start)))

        ### Include extra covariates
        if len(extra_var)!=0:
            all_var = np.hstack((xw,extra_var))
        else:
            all_var = xw

        ### prediction model
        if self.verbose: start = time.time()
        self.tlp = TwoLevelsPrediction(self.verbose,stage1_model_type=self.stage1_model_type,gamma=self.gamma)
        self.tlp.fit(all_var,all_var,y)
        if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format(int(time.time() - start)))
Ejemplo n.º 5
0
    def fit(self,
            net_data_low_main,
            y,
            confounds,
            n_subtypes,
            n_subtypes_l1=3,
            flag_feature_select=True,
            extra_var=[],
            verbose=True):
        self.verbose = verbose
        ### regress confounds from the connectomes
        #net_data_low = net_data_low_main.copy()
        #cf_rm = prediction.ConfoundsRm(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2])))
        #net_data_low_tmp = cf_rm.transform(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2])))
        #net_data_low = net_data_low_tmp.reshape((net_data_low_tmp.shape[0],net_data_low.shape[1],net_data_low.shape[2]))
        self.scale_ref = net_data_low_main.mean(0).mean(1)
        #net_data_low = self.norm_subjects(net_data_low_main)
        self.cf_rm = prediction.ConfoundsRm(confounds, net_data_low_main)
        net_data_low = self.cf_rm.transform(confounds, net_data_low_main)
        #net_data_low += self.cf_rm.intercept()

        ### compute the subtypes
        if self.verbose: start = time.time()
        st_ = subtypes.clusteringST()
        st_.fit(net_data_low, n_subtypes_l1)
        xw = st_.transform(net_data_low)
        xw = np.nan_to_num(xw)

        print 'xw sub data', xw[0, :]
        self.st_l2 = subtypes.clusteringST()
        self.st_l2.fit(net_data_low, n_subtypes)
        xwl2 = self.st_l2.transform(net_data_low)
        xwl2 = np.nan_to_num(xwl2)
        #xwl2 = np.hstack((xwl2,confounds))
        #xw = np.hstack((age_var,xw))
        if self.verbose:
            print("Compute subtypes, Time elapsed: {}s)".format(
                int(time.time() - start)))

        ### feature selection
        if flag_feature_select:
            if verbose: start = time.time()
            contrast = np.hstack(
                ([0, 1], np.repeat(0, confounds.shape[1])))  #[0,1,0,0,0]
            x_ = np.vstack((np.ones_like(y), y, confounds.T)).T

            labels, regression_result = nsglm.session_glm(np.array(xw), x_)
            cont_results = nsglm.compute_contrast(labels,
                                                  regression_result,
                                                  contrast,
                                                  contrast_type='t')
            pval = cont_results.p_value()
            results = smm.multipletests(pval, alpha=0.01, method='fdr_bh')
            w_select = np.where(results[0])[0]
            #w_select = w_select[np.argsort(pval[np.where(results[0])])]
            if len(w_select) < 10:
                w_select = np.argsort(pval)[:10]
            else:
                w_select = w_select[np.argsort(pval[np.where(results[0])])]
        else:
            # Cancel the selection
            w_select = np.where(xw[0, :] != 2)[0]

        #w_select = get_stable_w(xw[train_index,:],y_tmp[train_index],confounds[train_index,:],6)
        # Cancel the selection
        #w_select = np.where(results[0]!=-1)[0]
        #print("Feature selected: {})".format(w_select))

        ### Include extra covariates
        if len(extra_var) != 0:
            all_var = np.hstack((xw[:, w_select], extra_var))
        else:
            all_var = xw[:, w_select]
        if self.verbose:
            print("Feature selection, Time elapsed: {}s)".format(
                int(time.time() - start)))

        ### prediction model
        if self.verbose: start = time.time()
        tlp = TwoLevelsPrediction()
        tlp.fit(all_var, xwl2, y, model_type='svm', verbose=self.verbose)
        if self.verbose:
            print("Two Levels prediction, Time elapsed: {}s)".format(
                int(time.time() - start)))

        ### save parameters
        self.median_template = np.median(net_data_low, axis=0)
        self.st = st_
        self.w_select = w_select
        self.tlp = tlp
Ejemplo n.º 6
0
    def fit(self,net_data_low_main,y,confounds,n_subtypes,n_subtypes_l1=3,flag_feature_select=True,extra_var=[],verbose=True):
        self.verbose = verbose
        ### regress confounds from the connectomes
        #net_data_low = net_data_low_main.copy()
        #cf_rm = prediction.ConfoundsRm(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2])))
        #net_data_low_tmp = cf_rm.transform(confounds,net_data_low.reshape((net_data_low.shape[0],net_data_low.shape[1]*net_data_low.shape[2])))
        #net_data_low = net_data_low_tmp.reshape((net_data_low_tmp.shape[0],net_data_low.shape[1],net_data_low.shape[2]))
        self.scale_ref = net_data_low_main.mean(0).mean(1)
        #net_data_low = self.norm_subjects(net_data_low_main)
        self.cf_rm = prediction.ConfoundsRm(confounds,net_data_low_main)
        net_data_low = self.cf_rm.transform(confounds,net_data_low_main)
        #net_data_low += self.cf_rm.intercept()


        ### compute the subtypes
        if self.verbose: start = time.time()
        st_ = subtypes.clusteringST()
        st_.fit_robust(net_data_low,n_subtypes_l1)
        xw = st_.transform(net_data_low)
        xw = np.nan_to_num(xw)

        self.st_l2 = subtypes.clusteringST()
        self.st_l2.fit_robust(net_data_low,n_subtypes)
        xwl2 = self.st_l2.transform(net_data_low)
        xwl2 = np.nan_to_num(xwl2)
        #xw = np.hstack((age_var,xw))
        if self.verbose: print("Compute subtypes, Time elapsed: {}s)".format(int(time.time() - start)))

        ### feature selection
        if flag_feature_select:
            if verbose: start = time.time()
            contrast = np.hstack(([0,1],np.repeat(0,confounds.shape[1])))#[0,1,0,0,0]
            x_ = np.vstack((np.ones_like(y),y,confounds.T)).T

            labels, regression_result  = nsglm.session_glm(np.array(xw),x_)
            cont_results = nsglm.compute_contrast(labels,regression_result, contrast,contrast_type='t')
            pval = cont_results.p_value()
            results = smm.multipletests(pval, alpha=0.01, method='fdr_bh')
            w_select = np.where(results[0])[0]
            #w_select = w_select[np.argsort(pval[np.where(results[0])])]
            if len(w_select)<10:
                w_select = np.argsort(pval)[:10]
            else:
                w_select = w_select[np.argsort(pval[np.where(results[0])])]
        else:
            # Cancel the selection
            w_select = np.where(xw[0,:]!=2)[0]

        #w_select = get_stable_w(xw[train_index,:],y_tmp[train_index],confounds[train_index,:],6)
        # Cancel the selection
        #w_select = np.where(results[0]!=-1)[0]
        #print("Feature selected: {})".format(w_select))

        ### Include extra covariates
        if len(extra_var)!=0:
            all_var = np.hstack((xw[:,w_select],extra_var))
        else:
            all_var = xw[:,w_select]
        if self.verbose: print("Feature selection, Time elapsed: {}s)".format(int(time.time() - start)))

        ### prediction model
        if self.verbose: start = time.time()
        tlp = TwoLevelsPrediction()
        tlp.fit(all_var,xwl2,y,model_type='svm',verbose=self.verbose)
        if self.verbose: print("Two Levels prediction, Time elapsed: {}s)".format(int(time.time() - start)))

        ### save parameters
        self.median_template = np.median(net_data_low,axis=0)
        self.st = st_
        self.w_select = w_select
        self.tlp = tlp