def time_segment_matching_accuracy(data, win_size=6): 
    nsubjs = len(data)
    (ndim, nsample) = data[0].shape
    accu = np.zeros(shape=nsubjs)
    nseg = nsample - win_size 
    # mysseg prediction prediction
    trn_data = np.zeros((ndim*win_size, nseg),order='f')
    # the trn data also include the tst data, but will be subtracted when 
    # calculating A
    for m in range(nsubjs):
        for w in range(win_size):
            trn_data[w*ndim:(w+1)*ndim,:] += data[m][:,w:(w+nseg)]
    for tst_subj in range(nsubjs):
        tst_data = np.zeros((ndim*win_size, nseg),order='f')
        for w in range(win_size):
            tst_data[w*ndim:(w+1)*ndim,:] = data[tst_subj][:,w:(w+nseg)]

        A =  np.nan_to_num(stats.zscore((trn_data - tst_data),axis=0, ddof=1))
        B =  np.nan_to_num(stats.zscore(tst_data,axis=0, ddof=1))

        # compute correlation matrix
        corr_mtx = compute_correlation(B.T,A.T)

        for i in range(nseg):
            for j in range(nseg):
                if abs(i-j)<win_size and i != j :
                    corr_mtx[i,j] = -np.inf
        max_idx =  np.argmax(corr_mtx, axis=1)
        accu[tst_subj] = sum(max_idx == range(nseg)) / float(nseg)

    return accu
Ejemplo n.º 2
0
def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
    store_file = "/ahba_data/store_max1_reduced.h5"
    subcortex_mask = "/ahba_data/subcortex_mask.npy"

    results_dfs = []
    with pd.HDFStore(store_file, 'r') as store:
        for donor_id in store.keys():
            print "Loading expression data (%s)" % donor_id
            expression_data = store.get(donor_id.replace(".", "_"))

            print "Getting statmap values (%s)" % donor_id
            nifti_values = reduced_stat_map_data[expression_data.columns]

            print "Removing missing values (%s)" % donor_id
            na_mask = np.isnan(nifti_values)
            if mask == "subcortex":
                na_mask = np.logical_or(na_mask,
                    np.isnan(np.load(subcortex_mask)[expression_data.columns]))
            elif mask == "cortex":
                na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
                    np.load(subcortex_mask)[expression_data.columns])))
            else:
                assert mask == "full"

            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
            expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True)

            print "z scoring (%s)" % donor_id
            expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns,
                                           index=expression_data.index)
            nifti_values = zscore(nifti_values)

            print "Calculating linear regressions (%s)" % donor_id
            regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
            results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)

            results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
                                                           names=['donor_id', 'parameter'])

            results_dfs.append(results_df)

        print "Concatenating results"
        results_df = pd.concat(results_dfs, axis=1)
        del results_dfs

    t, p = ttest_1samp(results_df, 0.0, axis=1)
    group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
    group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
    group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
    del results_df
    probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
    group_results_df = group_results_df.join(probe_info)
    group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
                                         "variance explained (mean)", "variance explained (std)"]]

    return group_results_df
Ejemplo n.º 3
0
    def run(self):
        
        self.results = []
        self.loader = ConnectivityDataLoader()
        self.X, self.y = self.loader.setup_analysis(self.path, 
                              self.roi_list, 
                              self.directory, 
                              self.condition_list, 
                              self.subjects).filter(self.filter_).get_data()

        
        X = self.X
        y = self.y
        
        X = zscore(X, axis=1) # Sample-wise
        y = zscore(np.float_(y))
        
        self.fs = FeatureSelectionIterator()
        self.fs.setup_analysis(self.fs_algorithm, self.fs_ranking_fx).run(X, y).select_first(80)
        
        self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, 
                                                  self.learner, 
                                                  self.error_fx)
        #Speedup stuff
        schema = ShuffleSplit(12, n_iter=1, test_size=0.25)
        self.perm_reg = RegressionAnalysis().setup_analysis(schema, 
                                                  self.learner, 
                                                  self.error_fx)
        
        self.perm = PermutationAnalysis().setup_analysis(self.reg, 
                                                    n_permutation=self.n_permutations,
                                                    dimension='features')
        

        
        for i,set_ in enumerate(self.fs):
            
            if i > 78:
                X_ = X[:,set_]
                y_ = y
                            
                reg_res = self.reg.run(X_, y_) # To be selected
                n_dist = self.perm.run(X_, y_)
                
                p_res = self.perm.pvalues(reg_res)
                
                self.results.append([reg_res, n_dist, p_res])

        #self.save()
        return self.results
def sfn(l, msk, myrad, bcast_var):
    # Arguments:
    # l -- a list of 4D arrays, containing data from a single searchlight
    # msk -- a 3D binary array, mask of this searchlight
    # myrad -- an integer, sl_rad
    # bcast_var -- whatever is broadcasted 

    # extract training and testing data
    train_data = []
    test_data = []
    d1,d2,d3,ntr = l[0].shape
    nvx = d1*d2*d3
    for s in l:
        train_data.append(np.reshape(s[:,:,:,:int(ntr/2)],(nvx,int(ntr/2))))
        test_data.append(np.reshape(s[:,:,:,int(ntr/2):],(nvx,ntr-int(ntr/2))))
    # train an srm model 
    srm = SRM(bcast_var[0],bcast_var[1])
    srm.fit(train_data)
    # transform test data
    shared_data = srm.transform(test_data)
    for s in range(len(l)):
        shared_data[s] = np.nan_to_num(stats.zscore(shared_data[s],axis=1,ddof=1))
    # run experiment
    accu = time_segment_matching_accuracy(shared_data)

    # return: can also return several values. In that case, the final output will be 
    # a 3D array of tuples
    return np.mean(accu) 
Ejemplo n.º 5
0
    def run(self):
        
        self.loader = ConnectivityDataLoader()
        self.X, self.y = self.loader.setup_analysis(self.path, 
                              self.roi_list, 
                              self.directory, 
                              self.condition_list, 
                              self.subjects).filter(self.filter_).get_data()
        
        X = self.X
        y = self.y
                             
        X = zscore(X, axis=1) # Sample-wise
        y = zscore(np.float_(y))       
        
        self.fs = FeatureSelectionIterator().setup_analysis(self.fs_algorithm, 
                          self.fs_ranking_fx)
        
        
        self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, 
                                                  self.learner, 
                                                  self.error_fx,
                                                  feature_selection=self.fs)
        
        self.perm = PermutationAnalysis().setup_analysis(self.reg, 
                                                    n_permutation=self.n_permutations,
                                                    dimension='labels')
        
        self.results = []

                
        reg_res = self.reg.run(X, y) # To be selected
        
        perm_res = self.perm.run(X, y)
                       
        self.results.append([reg_res, perm_res])
        
        #self.save()
        
        return self.results
Ejemplo n.º 6
0
def zscore_function(rep):
    """
    This function applies the zscore() transform to every value in the replications.

    Args:
        rep : LIST[rep1, rep2, rep3, ...]
            A repN is a biological data used to calc the likelihood result

    Returns:
         LIST[rep1, rep2, rep3, ...]
              The new transformed replications
    """
    rep = map(lambda x: np.asmatrix(np.transpose(np.array([sss.zscore(item) for item in np.transpose(np.asarray(x))]))),
              rep)
    return list(rep)
Ejemplo n.º 7
0
 def run(self):
     
     self.loader = ConnectivityDataLoader()
     self.X, self.y = self.loader.setup_analysis(self.path, 
                           self.roi_list, 
                           self.directory, 
                           self.condition_list, 
                           self.subjects).filter(self.filter_).get_data()
     
     X = self.X
     y = self.y
                          
     X = zscore(X, axis=1) # Sample-wise
     y = zscore(np.float_(y))       
     
     self.fs = FeatureSelectionIterator()
     self.fs.setup_analysis(self.fs_algorithm, 
                       self.fs_ranking_fx).run(X, y).select_first(80)
     
     
     
     self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, 
                                               self.learner, 
                                               self.error_fx)
     self.results = []
     for set_ in self.fs:
         X_ = X[:,set_]
         y_ = y
                     
         reg_res = self.reg.run(X_, y_) # To be selected
                    
         self.results.append([reg_res])
     
     self.save()
     
     return self.results
Ejemplo n.º 8
0
    def add_normalized_te(self, normed_prefix="norm"):
        """
        z-score normalize the TE values.

        Creates new columns corresponding to normed TEs,
        beginning with 'normed_prefix'.

        Normed TEs are first logged (base 2) and then z-score
        normalized.
        """
        print "Normalizing TE..."
        te_cols = [c for c in self.table.columns \
                   if c.startswith("TE_")]
        for col in te_cols:
            normed_col = "%s_%s" %(normed_prefix, col)
            self.table[normed_col] = zscore(self.table[col].apply(log2).dropna())
Ejemplo n.º 9
0
def process_input(subjects_files, mask):
    """Process input to obtain data suitable for SRM"""

    mask_suffix = "_" + mask + ".nii.gz"
    srm_input = []
    for subject_files in subjects_files:
        srm_input_subject = []
        for path in subject_files:
            if path.name.endswith(mask_suffix):
                continue
            img = nib.load(str(path))
            mask_path = \
                str(path.with_suffix("").with_suffix("")) + mask_suffix
            mask = nib.load(str(mask_path))
            srm_input_subject.append(nilearn.masking.apply_mask(img, mask))
        srm_input.append(stats.zscore(np.concatenate(srm_input_subject),
                                      axis=0, ddof=1).T)
    return srm_input
Ejemplo n.º 10
0
def get_feature_weights_matrix(weights, sets, mask, indices):
    """
    Function used to compute the average weight matrix in case of
    several cross-validation folds and feature selection for each
    fold.
    
    Parameters
    ----------
    weights : ndarray shape n_folds x n_selected_features
        The weights matrix with the shape specified in the signature
    sets : ndarray shape n_folds x n_selected_features
        This represents the index in the square matrix of the feature selected 
        by the algorithm in each cross-validation fold
    mask : ndarray shape n_roi x n_roi 
        The mask matrix of the valid ROIs selected. Important: this matrix
        should be triangular with the lower part set to zero.
    indices : tuple
        This is equal to np.nonzero(mask)
        
    Returns
    -------
    matrix: ndarray n_roi x n_roi
        It returns the average weights across cross-validation fold in
        square form.
    
    """
    
    
    weights = weights.squeeze()
    filling_vector = np.zeros(np.count_nonzero(mask))
    counting_vector = np.zeros(np.count_nonzero(mask))
    
    for s, w in zip(sets, weights):
        filling_vector[s] += zscore(w)
        counting_vector[s] += 1
        
    avg_weigths = np.nan_to_num(filling_vector/counting_vector)
    mask[indices] = avg_weigths    
    matrix = np.nan_to_num(copy_matrix(mask, diagonal_filler=0))
    
    return matrix
def searchlight(coords, human_bounds, mask, subjs, song_idx, song_bounds,
                srm_k, hrf):
    """run searchlight 

       Create searchlight object and perform voxel function at each searchlight location
    
       Parameters
       ----------
       data1  : voxel by time ndarray (2D); leftout subject run 1
       data2  : voxel by time ndarray (2D); average of others run 1
       data3  : voxel by time ndarray (2D); leftout subject run 2
       data4  : voxel by time ndarray (2D); average of others run 2
       coords : voxel by xyz ndarray (2D, Vx3)
       K      : # of events for HMM (scalar)
       
       Returns
       -------
       3D data: brain (or ROI) filled with searchlight function scores (3D)

    """

    stride = 5
    radius = 5
    min_vox = srm_k
    nPerm = 1000
    SL_allvox = []
    SL_results = []
    datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
    for x in range(0, np.max(coords, axis=0)[0] + stride, stride):
        for y in range(0, np.max(coords, axis=0)[1] + stride, stride):
            for z in range(0, np.max(coords, axis=0)[2] + stride, stride):
                if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' +
                                      str(y) + '_' + str(z) + '.npy'):
                    continue
                D = distance.cdist(coords,
                                   np.array([x, y, z]).reshape((1, 3)))[:, 0]
                SL_vox = D <= radius
                data = []
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    subj_regs = np.genfromtxt(datadir + subjs[i] +
                                              '/EPI_mcf1.par')
                    motion = subj_regs.T
                    regr = linear_model.LinearRegression()
                    regr.fit(motion[:, 0:2511].T, subj_data[:, :, 0].T)
                    subj_data1 = subj_data[:, :, 0] - np.dot(
                        regr.coef_,
                        motion[:, 0:2511]) - regr.intercept_[:, np.newaxis]
                    data.append(
                        np.nan_to_num(stats.zscore(subj_data1, axis=1,
                                                   ddof=1)))
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    subj_regs = np.genfromtxt(datadir + subjs[i] +
                                              '/EPI_mcf2.par')
                    motion = subj_regs.T
                    regr = linear_model.LinearRegression()
                    regr.fit(motion[:, 0:2511].T, subj_data[:, :, 1].T)
                    subj_data2 = subj_data[:, :, 1] - np.dot(
                        regr.coef_,
                        motion[:, 0:2511]) - regr.intercept_[:, np.newaxis]
                    data.append(
                        np.nan_to_num(stats.zscore(subj_data2, axis=1,
                                                   ddof=1)))
                print("Running Searchlight")
                # only run function on searchlights with voxels greater than or equal to min_vox
                if data[0].shape[0] >= min_vox:
                    SL_match = HMM(data, human_bounds, song_idx, song_bounds,
                                   srm_k, hrf)
                    SL_results.append(SL_match)
                    SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
    voxmean = np.zeros((coords.shape[0], nPerm + 1))
    vox_SLcount = np.zeros(coords.shape[0])
    for sl in range(len(SL_results)):
        voxmean[SL_allvox[sl], :] += SL_results[sl]
        vox_SLcount[SL_allvox[sl]] += 1
    voxmean = voxmean / vox_SLcount[:, np.newaxis]
    vox_z = np.zeros((coords.shape[0], nPerm + 1))
    for p in range(nPerm + 1):
        vox_z[:,
              p] = (voxmean[:, p] - np.mean(voxmean[:, 1:], axis=1)) / np.std(
                  voxmean[:, 1:], axis=1)
    return vox_z, voxmean
def searchlight(coords, mask, subjs, set_srm):
    """run searchlight 

       Create searchlight object and perform voxel function at each searchlight location
    
       Parameters
       ----------
       coords : voxel by xyz ndarray (2D, Vx3)
       mask   : x x y x z (e.g. 91,109,91)
       subjs  : list of subject IDs        
 
       Returns
       -------
       3D data: brain (or ROI) filled with searchlight function scores (3D)

    """

    stride = 5
    radius = 5
    min_vox = 10
    nPerm = 1000
    SL_allvox = []
    SL_results = []
    voxISC = np.zeros(coords.shape[0])
    datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
    for x in range(0, np.max(coords, axis=0)[0] + stride, stride):
        for y in range(0, np.max(coords, axis=0)[1] + stride, stride):
            for z in range(0, np.max(coords, axis=0)[2] + stride, stride):
                if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' +
                                      str(y) + '_' + str(z) + '.npy'):
                    continue
                D = distance.cdist(coords,
                                   np.array([x, y, z]).reshape((1, 3)))[:, 0]
                SL_vox = D <= radius
                data = []
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 0], axis=1, ddof=1)))
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 1], axis=1, ddof=1)))
                print("Running Searchlight")
                SL_isc_mean_results = isc_srm(data, set_srm)
                SL_results.append(SL_isc_mean_results)
                SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
    voxmean = np.zeros((coords.shape[0]))
    vox_SLcount = np.zeros(coords.shape[0])
    for sl in range(len(SL_results)):
        voxmean[SL_allvox[sl]] += SL_results[sl]
        vox_SLcount[SL_allvox[sl]] += 1
    print("Voxmean: ", voxmean.shape)
    print("vox_SLcount: ", vox_SLcount)
    voxmean = voxmean / vox_SLcount

    return voxmean
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as patches
import numpy as np
import brainiak.eventseg.event
from scipy.stats import norm, zscore, pearsonr, stats
from scipy.signal import gaussian, convolve
from sklearn import decomposition
import numpy as np
from brainiak.funcalign.srm import SRM

datadir = '/tigress/jamalw/MES/prototype/link/scripts/chris_dartmouth/data/'

songs = ['St Pauls Suite', 'I Love Music', 'Moonlight Sonata', 'Change of the Gaurd','Waltz of Flowers','The Bird', 'Island', 'Allegro Moderato', 'Finlandia', 'Early Summer', 'Capriccio Espagnole', 'Symphony Fantastique', 'Boogie Stop Shuffle', 'My Favorite Things', 'Blue Monk','All Blues']

# Load in data
train = np.nan_to_num(stats.zscore(np.load(datadir + 'A1_run1_n25.npy'),axis=1,ddof=1))
test = np.nan_to_num(stats.zscore(np.load(datadir + 'A1_run2_n25.npy'),axis=1,ddof=1))

# Convert data into lists where each element is voxels by samples
train_list = []
test_list = []
for i in range(0,train.shape[2]):
    train_list.append(train[:,:,i])
    test_list.append(test[:,:,i])

# Initialize model
print('Building Model')
srm = SRM(n_iter=10, features=10)

# Fit model to training data (run 1)
print('Training Model')
Ejemplo n.º 14
0
    y = 1 / (1 + np.exp(-k*(x))) + c
    return y


def logarithm(x, a, b):
    y = a + np.log(x + b)
    return y

def exponential(x, k):
    y = np.exp(k * x)
    return y

X = X/X.std(axis=0)
y = y/y.std()

X = zscore(X, axis=0)
y = zscore(y, axis=0)

x_ = np.linspace(-1.5, 1.5, 100)
error_conn = []
mse_ = []
func = [exponential, sigmoid]
shift = 200
for i in range(100):
    mse__ = []
    pl.figure()
    pl.scatter(X[:,shift+i], y)
    for f in func:
        try:
            popt, pcov = curve_fit(f, X[:,shift + i], y)
        except RuntimeError:
Ejemplo n.º 15
0
def standardize_values(df, prop_name):
    std_dev = 3
    z_scores = stats.zscore(df.loc[:, prop_name])
    return df[np.abs(z_scores) < std_dev]
    ])

    human_bounds = np.load(ann_dirs + songs1[song_number] + '/' +
                           songs1[song_number] + '_beh_seg.npy') + hrf

    human_bounds = np.append(0, np.append(human_bounds, durs1[song_number]))

    start_run1 = song_bounds1[song_number]
    end_run1 = song_bounds1[song_number + 1]

    start_run2 = song_bounds2[songs2.index(songs1[song_number])]
    end_run2 = song_bounds2[songs2.index(songs1[song_number]) + 1]

    # Load in data
    run1 = stats.zscore(np.load(datadir + 'fdr_01_' + roi +
                                '_split_merge_no_srm_run1_n25.npy'),
                        axis=1,
                        ddof=1)
    run2 = stats.zscore(np.load(datadir + 'fdr_01_' + roi +
                                '_split_merge_no_srm_run2_n25.npy'),
                        axis=1,
                        ddof=1)

    #if do_srm == 0:
    run1DataAvg = np.mean(run1, axis=2)
    run2DataAvg = np.mean(run2, axis=2)

    song1 = run1DataAvg[:, start_run1:end_run1]
    song2 = run2DataAvg[:, start_run2:end_run2]
    #elif do_srm == 1:
    # Convert data into lists where each element is voxels by samples
    #run1_list = []
             features_orb_4, features_orb_5, features_orb_6, features_orb_7,
             features_orb_8, features_orb_9),
            axis=0)
    else:
        features = np.load("features.npy")
if load_features_flat:
    print("Loading flattened feature data")
    features_flatten = np.load("features_flatten.npy")
data_y = np.load("data_y.npy")
if limit_data:
    data_y = data_y[:image_count]

if remove_outliers:
    #Remove outliers
    print("Removing Outliers")
    z = np.abs(stats.zscore(data[:, 6].astype(int)))

    # print(np.where(z > threshold))
    print(data.shape)
    print(data_y.shape)

    data = data[(z < threshold)]
    data_y = data_y[(z < threshold)]

    print("Outliers Removed")
    print(data.shape)
    print(data_y.shape)

#used to reduce the image pool to run faster tests
np.random.rand(42)
if limit_data:
Ejemplo n.º 18
0
def compute_mesh_weights(mesh, weight_type='conformal', cot_threshold=None,
                         z_threshold=None):
    """
    compute a weight matrix
    W is sparse weight matrix and W(i,j) = 0 is vertex i and vertex j are not
    connected in the mesh.

    details are presented in:
    Desbrun, M., Meyer, M., & Alliez, P. (2002).
    Intrinsic parameterizations of surface meshes.
    Computer Graphics Forum, 21(3), 209–218.
    https://doi.org/10.1111/1467-8659.00580

    and
    Reuter, M., Biasotti, S., & Giorgi, D. (2009).
    Discrete Laplace–Beltrami operators for shape analysis and segmentation.
    Computers & …, 33(3), 381–390.
    https://doi.org/10.1016/j.cag.2009.03.005

    additional checks and thresholds are applied to ensure finite values

    :param mesh:
    :param weight_type: choice across conformal, fem, meanvalue, authalic
    :param cot_threshold:
    :param z_threshold:
    :return:
    """
#    cot_threshold=0.00001
#   print('angle threshold')
    print('    Computing mesh weights of type ' + weight_type)
    vert = mesh.vertices
    poly = mesh.faces

    Nbv = vert.shape[0]
    W = sparse.lil_matrix((Nbv, Nbv))
    femB = sparse.lil_matrix((Nbv, Nbv))
    if weight_type == 'conformal' or weight_type == 'fem':
        threshold = 0.0001  # np.spacing(1)??
        threshold_needed = 0
        for i in range(3):
            i1 = np.mod(i, 3)
            i2 = np.mod(i + 1, 3)
            i3 = np.mod(i + 2, 3)
            pp = vert[poly[:, i2], :] - vert[poly[:, i1], :]
            qq = vert[poly[:, i3], :] - vert[poly[:, i1], :]
            cr = np.cross(pp, qq)
            area = np.sqrt(np.sum(np.power(cr, 2), 1)) / 2
#             nopp = np.apply_along_axis(np.linalg.norm, 1, pp)
#             noqq = np.apply_along_axis(np.linalg.norm, 1, qq)
            noqq = np.sqrt(np.sum(qq * qq, 1))
            nopp = np.sqrt(np.sum(pp * pp, 1))
            thersh_nopp = np.where(nopp < threshold)[0]
            thersh_noqq = np.where(noqq < threshold)[0]
            if len(thersh_nopp) > 0:
                nopp[thersh_nopp] = threshold
                threshold_needed += len(thersh_nopp)
            if len(thersh_noqq) > 0:
                noqq[thersh_noqq] = threshold
                threshold_needed += len(thersh_noqq)
    #        print(np.min(noqq))
            pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose()
            qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose()
            ang = np.arccos(np.sum(pp * qq, 1))
            # ############## preventing infs in weights
            inds_zeros = np.where(ang == 0)[0]
            ang[inds_zeros] = threshold
            threshold_needed_angle = len(inds_zeros)
            ################################
            cot = 1 / np.tan(ang)
            if cot_threshold is not None:
                thresh_inds = cot < 0
                cot[thresh_inds] = cot_threshold
                threshold_needed_angle += np.count_nonzero(thresh_inds)
            W = W + sparse.coo_matrix((cot, (poly[:, i2], poly[:, i3])),
                                      shape=(Nbv, Nbv))
            W = W + sparse.coo_matrix((cot, (poly[:, i3], poly[:, i2])),
                                      shape=(Nbv, Nbv))
            femB = femB + sparse.coo_matrix((area / 12,
                                             (poly[:, i2], poly[:, i3])),
                                            shape=(Nbv, Nbv))
            femB = femB + sparse.coo_matrix((area / 12,
                                             (poly[:, i3], poly[:, i2])),
                                            shape=(Nbv, Nbv))

        # if weight_type == 'fem' :
        #     W.data = W.data/2

        nnz = W.nnz
        if z_threshold is not None:
            z_weights = sss.zscore(W.data)
            inds_out = np.where(np.abs(z_weights) > z_threshold)[0]
            W.data[inds_out] = np.mean(W.data)
            print('    -Zscore threshold needed for ', len(inds_out),
                  ' values = ', 100 * len(inds_out) / nnz, ' %')
            # inds_out_inf = np.where(z_weights < -z_thresh)[0]
            # inds_out_sup = np.where(z_weights > z_thresh)[0]
            # val_inf = np.max(W.data[inds_out_inf])
            # W.data[inds_out_inf] = val_inf
            # val_sup = np.min(W.data[inds_out_sup])
            # W.data[inds_out_sup] = val_sup
            # print('    -Zscore threshold needed for ',
            # len(inds_out_inf)+len(inds_out_sup),' values-')
        print('    -edge length threshold needed for ', threshold_needed,
              ' values = ', 100 * threshold_needed / nnz, ' %')
        if cot_threshold is not None:
            print('    -cot threshold needed for ', threshold_needed_angle,
                  ' values = ', 100 * threshold_needed_angle / nnz, ' %')

    if weight_type == 'meanvalue':
        for i in range(3):
            i1 = np.mod(i, 3)
            i2 = np.mod(i + 1, 3)
            i3 = np.mod(i + 2, 3)
            pp = vert[poly[:, i2], :] - vert[poly[:, i1], :]
            qq = vert[poly[:, i3], :] - vert[poly[:, i1], :]
            rr = vert[poly[:, i2], :] - vert[poly[:, i3], :]
            # normalize the vectors
            noqq = np.sqrt(np.sum(qq * qq, 1))
            nopp = np.sqrt(np.sum(pp * pp, 1))
            norr = np.sqrt(np.sum(rr * rr, 1))
            pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose()
            qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose()
            rr = rr / np.vstack((norr, np.vstack((norr, norr)))).transpose()
            # compute angles
            angi1 = np.arccos(np.sum(pp * qq, 1)) / 2
            qq = -qq
            angi2 = np.arccos(np.sum(rr * qq, 1)) / 2
            W = W + sparse.coo_matrix((np.tan(angi1) / norr,
                                       (poly[:, i1], poly[:, i3])),
                                      shape=(Nbv, Nbv))
            W = W + sparse.coo_matrix((np.tan(angi2) / norr,
                                       (poly[:, i3], poly[:, i1])),
                                      shape=(Nbv, Nbv))
        nnz = W.nnz
    if weight_type == 'authalic':
        for i in range(3):
            i1 = np.mod(i, 3)
            i2 = np.mod(i + 1, 3)
            i3 = np.mod(i + 2, 3)
            pp = vert[poly[:, i2], :] - vert[poly[:, i1], :]
            qq = vert[poly[:, i3], :] - vert[poly[:, i1], :]
            rr = vert[poly[:, i2], :] - vert[poly[:, i3], :]
            # normalize the vectors
            noqq = np.sqrt(np.sum(qq * qq, 1))
            nopp = np.sqrt(np.sum(pp * pp, 1))
            norr = np.sqrt(np.sum(rr * rr, 1))
            pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose()
            qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose()
            rr = rr / np.vstack((norr, np.vstack((norr, norr)))).transpose()
            # compute angles
            angi1 = np.arccos(np.sum(pp * qq, 1)) / 2
            cot1 = 1 / np.tan(angi1)
            qq = -qq
            angi2 = np.arccos(np.sum(rr * qq, 1)) / 2
            cot2 = 1 / np.tan(angi2)
            W = W + sparse.coo_matrix((cot1 / norr ** 2,
                                       (poly[:, i3], poly[:, i1])),
                                      shape=(Nbv, Nbv))
            W = W + sparse.coo_matrix((cot2 / norr ** 2,
                                       (poly[:, i1], poly[:, i3])),
                                      shape=(Nbv, Nbv))
        nnz = W.nnz
    li = np.hstack(W.data)
    nb_Nan = len(np.where(np.isnan(li))[0])
    nb_neg = len(np.where(li < 0)[0])
    print('    -number of Nan in weights: ',
          nb_Nan, ' = ', 100 * nb_Nan / nnz, ' %')
    print('    -number of Negative values in weights: ',
          nb_neg, ' = ', 100 * nb_neg / nnz, ' %')

    return W.tocsr(), femB.tocsr()
run1_masked = []
run2_masked = []
indices = np.where((mask_img > 0) & (parcels == 77))

for s in range(len(subjs)):
    # Load subjects nifti and motion data then clean (run1)
    print("Loading Run1 BOLD subj num: " + str(s + 1))
    run1 = nib.load(
        datadir + 'subjects/' + subjs[s] +
        '/analysis/run1.feat/trans_filtered_func_data.nii').get_data()[:, :, :,
                                                                       0:2511]
    print("Loading Run1 Motion Regressors")
    motion_run1 = np.genfromtxt(motion_dir + subjs[s] + '/EPI_mcf1.par')
    print("Cleaning Run1 BOLD Data")
    clean_run1 = stats.zscore(clean_data(run1[indices][:], motion_run1),
                              axis=1,
                              ddof=1)
    run1_masked.append(run1[indices][:])

    # Load subjects nifti and motion data then clean (run2)
    print("Loading Run2 BOLD subj num: " + str(s + 1))
    run2 = nib.load(
        datadir + 'subjects/' + subjs[s] +
        '/analysis/run2.feat/trans_filtered_func_data.nii').get_data()[:, :, :,
                                                                       0:2511]
    print("Loading Run2 Motion Regressors")
    motion_run2 = np.genfromtxt(motion_dir + subjs[s] + '/EPI_mcf2.par')
    print("Cleaning Run2 BOLD Data")
    clean_run2 = stats.zscore(clean_data(run2[indices][:], motion_run2),
                              axis=1,
                              ddof=1)
Ejemplo n.º 20
0
# Convert data to a list of arrays matching SRM input.
# Each element is a matrix of voxels by TRs.
# Also, concatenate data from both hemispheres in the brain.
movie_data = []
for s in range(subjects):
    if s % size == rank:
        movie_data.append(
            np.concatenate(
                [movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0))
    else:
        movie_data.append(None)

# Z-score the data
for subject in range(subjects):
    if movie_data[subject] is not None:
        movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1)

# Run SRM with the movie data
srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, comm=comm)
srm.fit(movie_data)


# We define a function to present the output of the experiment.
def plot_confusion_matrix(cm, title="Confusion Matrix"):
    """Plots a confusion matrix for each subject
    """
    import matplotlib.pyplot as plt
    import math
    plt.figure()
    subjects = len(cm)
    root_subjects = math.sqrt(subjects)
Ejemplo n.º 21
0
def calculate_gene_expression_similarity(reduced_stat_map_data):
    store_file = "/ahba_data/store_max1_reduced.h5"

    results_dfs = []
    with pd.HDFStore(store_file, 'r') as store:
        for donor_id in store.keys():
            print "Loading expression data (%s)" % donor_id
            expression_data = store.get(donor_id.replace(".", "_"))

            print "Getting statmap values (%s)" % donor_id
            nifti_values = reduced_stat_map_data[expression_data.columns]

            print "Removing missing values (%s)" % donor_id
            na_mask = np.isnan(nifti_values)
            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
            expression_data.drop(expression_data.columns[na_mask],
                                 axis=1,
                                 inplace=True)

            print "z scoring (%s)" % donor_id
            expression_data = pd.DataFrame(zscore(expression_data, axis=1),
                                           columns=expression_data.columns,
                                           index=expression_data.index)
            nifti_values = zscore(nifti_values)

            print "Calculating linear regressions (%s)" % donor_id
            regression_results = np.linalg.lstsq(
                np.c_[nifti_values, np.ones_like(nifti_values)],
                expression_data.T)
            results_df = pd.DataFrame({"slope": regression_results[0][0]},
                                      index=expression_data.index)

            results_df.columns = pd.MultiIndex.from_tuples(
                [(
                    donor_id[1:],
                    c,
                ) for c in results_df.columns],
                names=['donor_id', 'parameter'])

            results_dfs.append(results_df)

        print "Concatenating results"
        results_df = pd.concat(results_dfs, axis=1)
        del results_dfs

    t, p = ttest_1samp(results_df, 0.0, axis=1)
    group_results_df = pd.DataFrame({
        "t": t,
        "p": p
    },
                                    columns=['t', 'p'],
                                    index=expression_data.index)
    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(
        group_results_df.p, method='fdr_bh')
    group_results_df["variance explained (mean)"] = (
        results_df.xs('slope', axis=1, level=1)**2 * 100).mean(axis=1)
    group_results_df["variance explained (std)"] = (
        results_df.xs('slope', axis=1, level=1)**2 * 100).std(axis=1)
    del results_df
    probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv",
                             index_col=0).drop(['chromosome', "gene_id"],
                                               axis=1)
    group_results_df = group_results_df.join(probe_info)
    group_results_df = group_results_df[[
        "gene_symbol", "entrez_id.1", "gene_name", "t", "p",
        "p (FDR corrected)", "variance explained (mean)",
        "variance explained (std)"
    ]]

    return group_results_df
Ejemplo n.º 22
0
movie_file = sio.loadmat('data/sl_movie_data.mat')
movie_data = movie_file['data']

# Dataset size parameters
dim1,dim2,dim3,ntr,nsubj = movie_data.shape

# preprocess data, zscore and set NaN to 0
all_data = [] # first half train, second half test
for s in range(nsubj):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # To ignore "RuntimeWarning: invalid value encountered in true_divide"
        # There are some 0 voxels in the data which I have to keep, so there will be a warning 
        # when z-scoring the data. It should be safe to ignore this warning. If your data does
        # not contain 0 voxel, you can remove the 2 lines above
        train_tmp = np.nan_to_num(stats.zscore(movie_data[:,:,:,:int(ntr/2),s],axis=3,ddof=1))
        test_tmp = np.nan_to_num(stats.zscore(movie_data[:,:,:,int(ntr/2):,s],axis=3,ddof=1))
    all_data.append(np.concatenate((train_tmp,test_tmp),axis=3))

# print information
if rank == 0:
    print ('searchlight length is {}'.format(sl_rad))
    print ('number of features in SRM: {}'.format(nfeature))
    print ('number of subjects is: {}'.format(len(all_data)))
    print ('number of TR is: {}'.format(ntr))
    print ('brain data dimension is {}-by-{}-by-{}'.format(dim1,dim2,dim3))

# Generate mask: mask is a 3D binary array, with active voxels being 1. I simply set 
# all voxels to be active in this example, but you should set the mask to fit your ROI
# in practice.
mask = np.ones((dim1,dim2,dim3), dtype=np.bool)
    #        srm_k = initial_srm_k

    # run SRM on masked data
    if runNum == 0:
        shared_data = SRM_V1(run2, run1, srm_k, n_iter)
    elif runNum == 1:
        shared_data = SRM_V1(run1, run2, srm_k, n_iter)

    # perform cross-validation style HMM for n_folds
    for n in range(n_folds):
        np.random.seed(n)
        subj_list_shuffle = np.random.permutation(shared_data)

        # convert data from list to numpy array and z-score in time
        shared_data_stack = stats.zscore(np.dstack(subj_list_shuffle),
                                         axis=1,
                                         ddof=1)

        # split subjects into two groups
        others = np.mean(shared_data_stack[:, start_idx:end_idx, :13], axis=2)
        loo = np.mean(shared_data_stack[:, start_idx:end_idx, 13:], axis=2)

        # fit HMM to song data and return match data where first entry is true match score and all others are permutation scores
        print("Fitting HMM")
        WvA[n, :], bounds[n, :] = HMM(others, loo, human_bounds)

    # take average of WvA scores and bounds over folds
    avgWvA = fisher_mean(WvA, axis=0)
    avgBounds = np.mean(bounds, axis=0)

    # compute z-score
for i in range(int(np.max(parcels))):
    print("Parcel Num: ", str(i + 1))
    # get indices where mask and parcels overlap
    indices = np.where((mask_img.get_data() > 0) & (parcels == i + 1))

    # initialize list for storing masked data across subjects
    run1 = np.load(parcel_dir + "parcel" + str(i + 1) + "_run1.npy")
    run2 = np.load(parcel_dir + "parcel" + str(i + 1) + "_run2.npy")

    # run SRM on masked data
    if runNum == 0:
        shared_data = SRM_V1(run2, run1, srm_k, n_iter)
    elif runNum == 1:
        shared_data = SRM_V1(run1, run2, srm_k, n_iter)

    data = np.mean(stats.zscore(np.dstack(shared_data), axis=1, ddof=1),
                   axis=2)[:, start_idx:end_idx]

    # fit HMM to song data and return match data where first entry is true match score and all others are permutation scores
    print("Fitting HMM")
    SL_match = HMM(data, human_bounds)

    # compute z-score
    match_z = (SL_match[0] - np.mean(SL_match[1:])) / (np.std(SL_match[1:]))

    # compute z-score for euclid by flipping sign after z-scoring
    #match_z = ((SL_match[0] - np.mean(SL_match[1:])) / (np.std(SL_match[1:]))) * -1

    # convert z-score to p-value
    match_p = st.norm.sf(match_z)
songs_run2 = [
    'St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata',
    'Change_of_the_Guard', 'Waltz_of_Flowers', 'The_Bird', 'Island',
    'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole',
    'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things',
    'Blue_Monk', 'All_Blues'
]

durs_run2 = np.array([
    90, 180, 180, 90, 135, 180, 180, 225, 225, 135, 90, 135, 225, 225, 90, 135
])

# Load in data
run1 = np.nan_to_num(
    stats.zscore(np.load(datadir +
                         'fdr_01_lprec_full_split_merge_run1_n25.npy'),
                 axis=1,
                 ddof=1))
run2 = np.nan_to_num(
    stats.zscore(np.load(datadir +
                         'fdr_01_lprec_full_split_merge_run2_n25.npy'),
                 axis=1,
                 ddof=1))

nSubj = run1.shape[2]

# Convert data into lists where each element is voxels by samples
run1_list = []
run2_list = []
for i in range(0, nSubj):
    run1_list.append(run1[:, :, i])
    run2_list.append(run2[:, :, i])
Ejemplo n.º 26
0
# Original series.
X0, X1 = gen_series()
fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
ax0.plot(X0, '-r.')
ax0.set_title('Time series X1(t) vs t')
ax1.plot(X1, '-r.')
ax1.set_title('Time series X1(t) vs t')
plt.show()


"""An essential and necessary step for MSSA is to normalize both time series.
That means to remove the mean value and to divide it by the standard deviation
(for each series separately)."""

X0_zs, X1_zs = stats.zscore(X0), stats.zscore(X1)


# Using shifted time series.
def shift(arr, n, order='forward'):
    if isinstance(arr, np.ndarray):
        arr = arr.tolist()
    if order == 'forward':
        shifted = arr[n:] + [0] * n
    elif order == 'reversed':
        shifted = [0] * n + arr[:-n]
    else:
        print("Order %s not recognized.  Try forward or reversed" % order)

    return shifted
Ejemplo n.º 27
0
def analyze_results(directory, 
                    conditions, 
                    n_permutations=1000.):
    
    
    """Write the results of the regression analysis

    Parameters
    ----------
    directory : string or list of strings
        Path or list of paths where put results.
    
    condition : string or list of strings
        Conditions to be analyzed.


    Returns
    -------
    fig : instance of matplotlib.pyplot.Figure
        The figure handle.

    """
    
    res_path = '/media/robbis/DATA/fmri/monks/0_results/'
    subjects = np.loadtxt('/media/robbis/DATA/fmri/monks/attributes_struct.txt',
                      dtype=np.str)

    path = '/media/robbis/DATA/fmri/monks/'
    roi_list = []
    roi_list = np.loadtxt('/media/robbis/DATA/fmri/templates_fcmri/findlab_rois.txt', 
                          delimiter=',',
                          dtype=np.str)
    
    if isinstance(directory, str):
        directory = [directory]
        
    if isinstance(conditions, str):
        conditions = [conditions]
        
    
    for dir_ in directory:
        for cond_ in conditions:
            
            fname_ = os.path.join(res_path, dir_, cond_+'_values_1000_50.npz')
            
            results_ = np.load(fname_)
            values_ = results_['arr_0'].tolist()
            errors_ = values_['error']      #values_['errors_']
            sets_ = values_['features']     #values_['sets_']
            weights_ = values_['weights']   #values_['weights_']
            samples_ = values_['subjects']  #values_['samples_']
            
            fname_ = os.path.join(res_path, dir_, cond_+'_permutation_1000_50.npz')
            
            results_ = np.load(fname_)
            values_p = results_['arr_0'].tolist()
            errors_p = values_p['error']        #values_p['errors_p']
            sets_p = values_p['features']       #values_p['sets_p']
            weights_p = values_p['weights']     #values_p['weights_p']
            samples_p = values_p['subjects']    #values_p['samples_p']
            
            errors_p = np.nanmean(errors_p, axis=1)
                        
            print('-----------'+dir_+'-------------')
            print(cond_)
            print ('MSE = '+str(errors_[:,0].mean())+' -- p '+ \
                str(np.count_nonzero(errors_p[:,0] < errors_[:,0].mean())/n_permutations))
            print('COR = '+str(np.nanmean(errors_[:,1]))+' -- p '+ \
                str(np.count_nonzero(errors_p[:,1] > np.nanmean(errors_[:,1]))/n_permutations))
                
            directory_ = dir_
            learner_ = "SVR_C_1" 
        
            prename = "%s_%s" %(cond_, learner_)
            
            ######## Get matrix infos ###############
            
            conn_test = ConnectivityLoader(res_path, 
                                         subjects, 
                                         directory_, 
                                         roi_list)
            
            # Get nan mask to correctly fill matrix
            nan_mask = conn_test.get_results(['Samatha', 'Vipassana'])
            # Transform matrix into float of ones
            mask_ = np.float_(~np.bool_(nan_mask))
            # Get the upper part of the matrix
            mask_ = np.triu(mask_, k=1)
            mask_indices = np.nonzero(mask_)
            n_bins = np.count_nonzero(mask_)
            
            
            ###### Plot of distributions of errors and permutations #########
            #errors_p = np.nanmean(errors_p, axis=1)
            
            fig_ = pl.figure()
            bpp = pl.boxplot(errors_p, showfliers=False, showmeans=True, patch_artist=True)
            bpv = pl.boxplot(errors_, showfliers=False, showmeans=True, patch_artist=True)
            fname = "%s_perm_1000_boxplot.png" %(prename)
           
            
            for box_, boxp_ in zip(bpv['boxes'], bpp['boxes']):
                box_.set_facecolor('lightgreen')
                boxp_.set_facecolor('lightslategrey')
              
              
            pl.xticks(np.array([1,2]), ['MSE', 'COR'])
            
            pl.savefig(os.path.join(res_path, directory_, fname))
            pl.close()
            
            n_permutations = np.float(errors_p[:,0].shape[0])
            
            
            ##### Plot of connection distributions ########
            
            pl.figure()
            h_values_p, _ = np.histogram(sets_p.flatten(), bins=np.arange(0, n_bins+1))
            #pl.plot(zscore(h_values_p))
            
            pl.hist(zscore(h_values_p), bins=25)
            
            fname = "%s_features_set_dist.png" %(prename)
            pl.savefig(os.path.join(res_path, directory_, fname))
            
            pl.figure()
            h_values_, _ = np.histogram(sets_.flatten(), bins=np.arange(0, n_bins+1))
            pl.plot(zscore(h_values_))
                
            
            fname = "%s_features_set_cross_validation.png" %(prename)
            pl.savefig(os.path.join(res_path, directory_, fname))
            
            pl.close('all')
            
            
            ######## Plot connectivity stuff ###########
            
            weights_ = weights_.squeeze()
            filling_vector = np.zeros(np.count_nonzero(mask_))
            counting_vector = np.zeros(np.count_nonzero(mask_))
            
            for s, w in zip(sets_, weights_):
                filling_vector[s] += zscore(w)
                counting_vector[s] += 1
            
            # Calculate the average weights and then zscore
            avg_weigths = np.nan_to_num(filling_vector/counting_vector)
            
            mask_[mask_indices] = avg_weigths
            
            matrix_ = np.nan_to_num(copy_matrix(mask_, diagonal_filler=0))
        
            names_lr, colors_lr, index_, coords, _ = get_atlas_info(dir_)
            
            '''
            matrix_[matrix_ == 0] = np.nan
            matrix_[np.abs(matrix_) < 1] = np.nan
            '''
            size_w = np.zeros_like(matrix_)
            size_w[mask_indices] = np.abs(avg_weigths)
            size_w = np.nan_to_num(copy_matrix(size_w, diagonal_filler=0))
            size_w = np.sum(size_w, axis=0)
            
            f, _ = plot_connectivity_circle_edited(matrix_[index_][:,index_], 
                                            names_lr[index_], 
                                            node_colors=colors_lr[index_],
                                            node_size=2*size_w[index_]**2,
                                            con_thresh = 1.4,
                                            title=cond_,
                                            node_angles=circular_layout(names_lr, 
                                                                        list(names_lr),
                                                                        ),
                                            fontsize_title=19,
                                            fontsize_names=13,
                                            fontsize_colorbar=13,
                                            colorbar_size=0.3,
                                            colormap='bwr',
                                            #colormap=cm_,
                                            vmin=-3.,
                                            vmax=3.,
                                            fig=pl.figure(figsize=(16,16))
                                            )
            
            
            fname = "%s_features_weight.png" %(prename)
            f.savefig(os.path.join(res_path, directory_, fname),
                      facecolor='black',
                      dpi=150)
            for d_ in ['x', 'y', 'z']:
                fname = "%s_connectome_feature_weight_%s.png" %(prename, d_)
                fname = os.path.join(res_path, directory_, fname)
                plot_connectome(matrix_, 
                                coords, 
                                colors_lr, 
                                2*size_w**2,
                                1.4,
                                fname,
                                #cmap=pl.cm.bwr,
                                title=None,
                                display_=d_,
                                #max_=3.,
                                #min_=3. 
                                )
            fname = "%s_connections_list_feature_weights.txt" %(prename)
            fname = os.path.join(res_path, directory_, fname)
            #print_connections(matrix_, names_lr, fname)
            
            #########
            mask_ = np.float_(~np.bool_(nan_mask))
            mask_ = np.triu(mask_, k=1)
            mask_indices = np.nonzero(mask_)
            mask_[mask_indices] = h_values_
            matrix_ = np.nan_to_num(copy_matrix(mask_, diagonal_filler=0))
            
            size_ = np.zeros_like(matrix_)
            size_[mask_indices] = counting_vector
            size_ = np.nan_to_num(copy_matrix(size_, diagonal_filler=0))
            size_ = np.sum(size_, axis=0)
            
            f, _ = plot_connectivity_circle_edited(matrix_[index_][:,index_], 
                                            names_lr[index_], 
                                            node_colors=colors_lr[index_],
                                            node_size=size_[index_]*5,
                                            con_thresh = 15.,
                                            title=cond_,
                                            node_angles=circular_layout(names_lr, 
                                                                        list(names_lr),
                                                                        ),
                                            fontsize_title=19,
                                            fontsize_names=13,
                                            fontsize_colorbar=13,
                                            colorbar_size=0.3,
                                            #colormap='bwr',
                                            #colormap='terrain',
                                            #vmin=40,
                                            fig=pl.figure(figsize=(16,16))
                                            )
            
            fname = "%s_features_choices.png" %(prename)
            f.savefig(os.path.join(res_path, directory_, fname),
                      facecolor='black',
                      dpi=150)
            
            for d_ in ['x', 'y', 'z']:
                fname = "%s_connectome_feature_choices_%s.png" %(prename, d_)
                fname = os.path.join(res_path, directory_, fname)
                
                plot_connectome(matrix_, 
                                coords, 
                                colors_lr, 
                                4.*size_,
                                15.,
                                fname,
                                title=None,
                                max_=50.,
                                min_=0.,
                                display_=d_
                                )
                
            fname = "%s_connections_list_feature_choices.txt" %(prename)
            fname = os.path.join(res_path, directory_, fname)
            #print_connections(matrix_, names_lr,fname)
            
            pl.close('all')
Ejemplo n.º 28
0
def removeOutliers(data, max=4):
    # Remove outliers
    z_scores = stats.zscore(data)
    abs_z_scores = np.abs(z_scores)
    filtered_entries = (abs_z_scores < max).all(axis=1)
    return data[filtered_entries]
Ejemplo n.º 29
0
import matplotlib.patches as patches
import numpy as np
import brainiak.eventseg.event
from scipy.stats import zscore, pearsonr, stats
from scipy.signal import gaussian, convolve
from sklearn import decomposition
import numpy as np
from brainiak.funcalign.srm import SRM
import nibabel as nib

datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/chris_dartmouth/data/'

# Load in data
train = np.nan_to_num(
    stats.zscore(np.load(datadir + 'precuneus_k12ish_run1_n25.npy'),
                 axis=1,
                 ddof=1))
test = np.nan_to_num(
    stats.zscore(np.load(datadir + 'precuneus_k12ish_run2_n25.npy'),
                 axis=1,
                 ddof=1))

# Convert data into lists where each element is voxels by samples
train_list = []
test_list = []
for i in range(0, train.shape[2]):
    train_list.append(train[:, :, i])
    test_list.append(test[:, :, i])

# Initialize model
print('Building Model')
subjects = movie_data_left.shape[2]

# Convert data to a list of arrays matching SRM input.
# Each element is a matrix of voxels by TRs.
# Also, concatenate data from both hemispheres in the brain.
movie_data = []
for s in range(subjects):
    if s % size == rank:
        movie_data.append(np.concatenate([movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0))
    else:
        movie_data.append(None)

# Z-score the data
for subject in range(subjects):
    if movie_data[subject] is not None:
        movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1)

# Run SRM with the movie data
srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, comm=comm)
srm.fit(movie_data)

# We define a function to present the output of the experiment.
def plot_confusion_matrix(cm, title="Confusion Matrix"):
    """Plots a confusion matrix for each subject
    """
    import matplotlib.pyplot as plt
    import math
    plt.figure()
    subjects = len(cm)
    root_subjects = math.sqrt(subjects)
    cols = math.ceil(root_subjects)
Ejemplo n.º 31
0
def HMM(X, K, loo_idx, song_idx, song_bounds):
    """fit hidden markov model
  
       Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations              

       Parameters
       ----------
       A: voxel by time ndarray (2D)
       B: voxel by time ndarray (2D)
       C: voxel by time ndarray (2D)
       D: voxel by time ndarray (2D)
       K: # of events for HMM (scalar)
 
       Returns
       -------
       z: z-score after performing permuted cross-validation analysis      

    """

    w = 6
    srm_k = 45
    nPerm = 1000
    within_across = np.zeros(nPerm + 1)
    run1 = [X[i] for i in np.arange(0, int(len(X) / 2))]
    run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))]
    print('Building Model')
    srm = SRM(n_iter=10, features=srm_k)
    print('Training Model')
    srm.fit(run1)
    print('Testing Model')
    shared_data = srm.transform(run2)
    shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1)
    others = np.mean(shared_data[:, :,
                                 np.arange(shared_data.shape[-1]) != loo_idx],
                     axis=2)
    loo = shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1],
                      loo_idx]
    nTR = loo.shape[1]

    # Fit to all but one subject
    ev = brainiak.eventseg.event.EventSegment(K)
    ev.fit(others[:, song_bounds[song_idx]:song_bounds[song_idx + 1]].T)
    events = np.argmax(ev.segments_[0], axis=1)

    ####
    # plot searchlights
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    shared_data = srm.transform(run2)
    avg_response = sum(shared_data) / len(shared_data)
    plt.figure(figsize=(10, 10))
    plt.imshow(np.corrcoef(avg_response[:, 0:89].T))
    bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0]
    ax = plt.gca()
    bounds_aug = np.concatenate(([0], bounds, [nTR]))
    for i in range(len(bounds_aug) - 1):
        rect1 = patches.Rectangle((bounds_aug[i], bounds_aug[i]),
                                  bounds_aug[i + 1] - bounds_aug[i],
                                  bounds_aug[i + 1] - bounds_aug[i],
                                  linewidth=3,
                                  edgecolor='w',
                                  facecolor='none',
                                  label='Model Fit')
        ax.add_patch(rect1)
    plt.title('HMM Fit to A1 SRM K = ' + str(srm_k),
              fontsize=18,
              fontweight='bold')
    plt.savefig('plots/St_Pauls SRM K = ' + str(srm_k))
    ####

    # Compute correlations separated by w in time
    corrs = np.zeros(nTR - w)
    for t in range(nTR - w):
        corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0]

    # Compute within vs across boundary correlations, for real and permuted bounds
    for p in range(nPerm + 1):
        within = corrs[events[:-w] == events[w:]].mean()
        across = corrs[events[:-w] != events[w:]].mean()
        within_across[p] = within - across

        np.random.seed(p)
        events = np.zeros(nTR, dtype=np.int)
        events[np.random.choice(nTR, K - 1, replace=False)] = 1
        events = np.cumsum(events)

    return within_across
def HMM(X, K, song_idx, song_bounds):
    """fit hidden markov model
  
       Fit HMM to average data and cross-validate with leftout subjects using within song and between song average correlations              

       Parameters
       ----------
       A: list of 50 (contains 2 runs per subject) 2D (voxels x full time course) arrays
       B: # of events for HMM (scalar)
       song_idx: song index (scalar)
       C: voxel by time ndarray (2D)
       D: array of song boundaries (1D)
 
       Returns
       -------
       wVa score: final score after performing cross-validation of leftout subjects      

    """

    w = 6
    nPerm = 1000
    hrf = 5
    within_across = np.zeros(nPerm + 1)
    run1 = [X[i] for i in np.arange(0, int(len(X) / 2))]
    run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))]
    print('Building Model')
    srm = SRM(n_iter=10, features=30)
    print('Training Model')
    srm.fit(run1)
    print('Testing Model')
    shared_data = srm.transform(run2)
    shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1)
    others = np.mean(shared_data[:, song_bounds[song_idx] +
                                 hrf:song_bounds[song_idx + 1] + hrf, :13],
                     axis=2)
    loo = np.mean(shared_data[:, song_bounds[song_idx] +
                              hrf:song_bounds[song_idx + 1] + hrf, 13:],
                  axis=2)
    nTR = loo.shape[1]

    # Fit to all but one subject
    ev = brainiak.eventseg.event.EventSegment(K)
    ev.fit(others.T)
    events = np.argmax(ev.segments_[0], axis=1)

    # Compute correlations separated by w in time
    corrs = np.zeros(nTR - w)
    for t in range(nTR - w):
        corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0]

    # Compute within vs across boundary correlations, for real and permuted bounds
    for p in range(nPerm + 1):
        within = corrs[events[:-w] == events[w:]].mean()
        across = corrs[events[:-w] != events[w:]].mean()
        within_across[p] = within - across

        np.random.seed(p)
        events = np.zeros(nTR, dtype=np.int)
        events[np.random.choice(nTR, K - 1, replace=False)] = 1
        events = np.cumsum(events)
    print((within_across[0] - np.mean(within_across[1:])) /
          np.std(within_across[1:]))
    return within_across
try:
    retcode = call(cmd, shell=True)
    if retcode < 0:
        print("File download was terminated by signal", -retcode, file=sys.stderr)
    else:
        print("File download returned", retcode, file=sys.stderr)
except OSError as e:
    print("File download failed:", e, file=sys.stderr)

#get fMRI data and scanner RAS coordinates
all_data = scipy.io.loadmat(file_name)
data = all_data['data']
R = all_data['R']

# Z-score the data
data = stats.zscore(data, axis=1, ddof=1)
n_voxel, n_tr = data.shape

# Run TFA with downloaded data
from brainiak.factoranalysis.tfa import TFA
# uncomment below line to get help message on TFA
#help(TFA)

tfa = TFA(K=5,
        max_num_voxel=int(n_voxel*0.5),
        max_num_tr=int(n_tr*0.5),
        verbose=True)
tfa.fit(data, R)

print("\n centers of latent factors are:")
print(tfa.get_centers(tfa.local_posterior_))
Ejemplo n.º 34
0
def searchlight(coords, K, mask, loo_idx, subjs, song_idx, song_bounds):
    """run searchlight 

       Create searchlight object and perform voxel function at each searchlight location
    
       Parameters
       ----------
       data1  : voxel by time ndarray (2D); leftout subject run 1
       data2  : voxel by time ndarray (2D); average of others run 1
       data3  : voxel by time ndarray (2D); leftout subject run 2
       data4  : voxel by time ndarray (2D); average of others run 2
       coords : voxel by xyz ndarray (2D, Vx3)
       K      : # of events for HMM (scalar)
       
       Returns
       -------
       3D data: brain (or ROI) filled with searchlight function scores (3D)

    """

    stride = 5
    radius = 5
    min_vox = 10
    nPerm = 1000
    SL_allvox = []
    SL_results = []
    datadir = '/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
    for x in range(0, np.max(coords, axis=0)[0] + stride, stride):
        for y in range(0, np.max(coords, axis=0)[1] + stride, stride):
            for z in range(0, np.max(coords, axis=0)[2] + stride, stride):
                if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' +
                                      str(y) + '_' + str(z) + '.npy'):
                    continue
                D = distance.cdist(coords,
                                   np.array([x, y, z]).reshape((1, 3)))[:, 0]
                SL_vox = D <= radius
                data = []
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 0], axis=1, ddof=1)))
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 1], axis=1, ddof=1)))
                print("Running Searchlight")
                SL_within_across = HMM(data, K, loo_idx, song_idx, song_bounds)
                SL_results.append(SL_within_across)
                SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
    voxmean = np.zeros((coords.shape[0], nPerm + 1))
    vox_SLcount = np.zeros(coords.shape[0])
    for sl in range(len(SL_results)):
        voxmean[SL_allvox[sl], :] += SL_results[sl]
        vox_SLcount[SL_allvox[sl]] += 1
    voxmean = voxmean / vox_SLcount[:, np.newaxis]
    vox_z = np.zeros((coords.shape[0], nPerm + 1))
    for p in range(nPerm + 1):
        vox_z[:,
              p] = (voxmean[:, p] - np.mean(voxmean[:, 1:], axis=1)) / np.std(
                  voxmean[:, 1:], axis=1)
    return vox_z, voxmean
Ejemplo n.º 35
0
    #nn = n.loadtxt('notas_compositores.txt')
    #nn = n.loadtxt('notas_filosofos.txt')
    #nn = n.loadtxt('notas_aleatorias.txt')
    nn = n.array([[r.uniform(1,9) for x in range(_nc)] for y in range(_na)])
    #nn_lista = nn.tolist()
    #print 'QTD NOTAS'
    #print [sum([[int(round(z)) for z in x].count(y) for x in nn_lista]) for y in range(1,10)]

    print '\nNOTAS'
    for i in range(len(nn)):
        print '%s & %s \\' % (agentes[i], ' & '.join([str(x) for x in nn[i]]))

    print 'MEDIA', n.mean(nn)

    print '\nZ-SCORES DAS NOTAS'
    _zs1 = sss.zscore(nn)
    for i in range(len(_zs1)):
        print [round(x, ndigits=2) for x in _zs1[i]]
    print 'MEDIA', n.mean(n.abs(_zs1))

    # cálculo da matriz de correlação
    # pré-processamento
    for i in xrange(nn.shape[1]):
        nn[:,i]=(nn[:,i]-nn[:,i].mean())/nn[:,i].std()

    # pearson
    print '\nMATRIZ DE COVARIANCIA'
    covm = n.cov(nn.T, bias=1)
    for i in range(len(covm)):
        print [round(x, ndigits=2) for x in covm[i]]
    print 'MEDIA', n.mean(n.abs(covm))
# run 1 times
song_bounds_run1 = np.array([0,225,314,494,628,718,898,1032,1122,1301,1436,1660,1749,1973, 2198,2377,2511])

songs_run1 = ['Finlandia', 'Blue_Monk', 'I_Love_Music','Waltz_of_Flowers','Capriccio_Espagnole','Island','All_Blues','St_Pauls_Suite','Moonlight_Sonata','Symphony_Fantastique','Allegro_Moderato','Change_of_the_Guard','Boogie_Stop_Shuffle','My_Favorite_Things','The_Bird','Early_Summer']

durs_run1 = np.array([225,90,180,135,90,180,135,90,180,135,225,90,225,225,180,135])

# run 2 times
song_bounds_run2 = np.array([0,90,270,449,538,672,851,1031,1255,1480,1614,1704,1839,2063,2288,2377,2511])

songs_run2 = ['St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard','Waltz_of_Flowers','The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk','All_Blues']

durs_run2 = np.array([90,180,180,90,135,180,180,225,225,135,90,135,225,225,90,135])

# Load in data
run1 = np.nan_to_num(stats.zscore(np.load(datadir + 'fdr_01_bil_A1_split_merge_no_srm_run1_n25.npy'),axis=1,ddof=1))
run2 = np.nan_to_num(stats.zscore(np.load(datadir + 'fdr_01_bil_A1_split_merge_no_srm_run2_n25.npy'),axis=1,ddof=1))

nSubj = run1.shape[2]

nboot = 50

wVa_results = np.zeros((16,len(K_set),nboot))

np.random.seed(bootNum)

for b in range(nboot):
        resamp_subjs = np.random.choice(nSubj, size=nSubj, replace=True)
        run1_resample = run1[:,:,resamp_subjs]
        run2_resample = run2[:,:,resamp_subjs]
Ejemplo n.º 37
0
    #print 'QTD NOTAS'
    #print [sum([[int(round(z)) for z in x].count(y) for x in nn_lista]) for y in range(1,10)]

    print '\nNOTAS'
    for i in range(len(nn)):
        print '%s & %s \\' % (agentes[i], ' & '.join([str(x) for x in nn[i]]))

    print 'MEDIA', n.mean(nn)

    # cálculo da matriz de correlação
    # pré-processamento
    #for i in xrange(nn.shape[1]):
    #    nn[:,i]=(nn[:,i]-nn[:,i].mean())/nn[:,i].std()

    print '\nZ-SCORES'
    _zscores = sss.zscore(nn)
    for i in range(len(_zscores)):
        print [round(x, ndigits=2) for x in _zscores[i]]

    # considerando agora notas standardizadas
    nn = _zscores

    # pearson
    print '\nMATRIZ DE COVARIANCIA'
    covm = n.cov(nn.T, bias=1)
    for i in range(len(covm)):
        print [round(x, ndigits=2) for x in covm[i]]
    print 'MEDIA', n.mean(n.abs(covm))


    def _cov(x, y):
        ax.set_xticks([])
        ax.text(0.35, 1, f'Cluster {cluster}',
                 fontsize=32, va='bottom',transform=ax.transAxes)
    fig.show()

from pandas import DataFrame

dataframe = DataFrame(model._means,
                      columns=vectorizer.get_feature_names())
wordcloud(dataframe)

# Z-score charts

from scipy.stats import stats

zscore = stats.zscore(model._means, axis=0)
zscore_df = DataFrame(zscore, columns=vectorizer.get_feature_names())

import matplotlib.pyplot as pl
from pandas import concat

def zscore(dataframe, term_count=10):

    fig = pl.figure(figsize=(15, 20))
    for i in range(dataframe.shape[0]):
        cluster_df = dataframe.iloc[[i]].T
        cluster_df = cluster_df.rename(index=str, columns={i: 'Z-score'})
        cluster_df = cluster_df.sort_values(by=['Z-score'])
        if term_count:
            half_term_count = term_count // 2
            sliced_df = concat([cluster_df[:half_term_count],
    if idx % size == rank:
        #download data
        file_name = os.path.join(data_dir, 's' + str(idx) + '.mat')
        cmd = 'curl --location -o ' + file_name + url[idx]
        try:
            retcode = call(cmd, shell=True)
            if retcode < 0:
                print("File download was terminated by signal", -retcode, file=sys.stderr)
            else:
                print("File download returned", retcode, file=sys.stderr)
        except OSError as e:
            print("File download failed:", e, file=sys.stderr)
        all_data = scipy.io.loadmat(file_name)
        bold = all_data['data']
        # z-score the data
        bold = stats.zscore(bold, axis=1, ddof=1)
        data.append(bold)
        R.append(all_data['R'])

n_voxel, n_tr = data[0].shape

# Run HTFA with downloaded data
from brainiak.factoranalysis.htfa import HTFA
# uncomment below line to get help message on HTFA
#help(HTFA)

K = 5
htfa = HTFA(K=K,
        n_subj=n_subj,
        max_global_iter=5,
        max_local_iter=2,
Ejemplo n.º 40
0
    def __init__(
        self,
        data: pd.DataFrame,
        outcome_variable: str,
        regression_variables: List[str],
        covariates: Optional[List[str]] = None,
        min_n: int = 200,
        report_categorical_betas: bool = False,
        standardize_data: bool = False,
        encoding: str = "additive",
        edge_encoding_info: Optional[pd.DataFrame] = None,
        process_num: Optional[int] = None,
    ):
        # base class init
        # This takes in minimal regression params (data, outcome_variable, covariates) and
        # initializes additional parameters (outcome dtype, regression variables, error, and warnings)
        super().__init__(
            data=data,
            outcome_variable=outcome_variable,
            regression_variables=regression_variables,
            covariates=covariates,
        )

        # Custom init involving kwargs passed to this regression
        self.min_n = min_n
        self.report_categorical_betas = report_categorical_betas
        self.standardize_data = standardize_data
        if process_num is None:
            process_num = multiprocessing.cpu_count()
        self.process_num = process_num
        if encoding not in self.KNOWN_ENCODINGS:
            raise ValueError(f"Genotypes provided with unknown 'encoding': {encoding}")
        elif encoding == "edge" and edge_encoding_info is None:
            raise ValueError(
                "'edge_encoding_info' must be provided when using edge encoding"
            )
        else:
            self.encoding = encoding
            self.edge_encoding_info = edge_encoding_info

        # Ensure the data output type is compatible
        # Set 'self.family' and 'self.use_t' which are dependent on the outcome dtype
        if self.outcome_dtype == "categorical":
            raise NotImplementedError(
                "Categorical Outcomes are not yet supported for this type of regression."
            )
        elif self.outcome_dtype == "continuous":
            self.description += (
                f"Continuous Outcome (family = Gaussian): '{self.outcome_variable}'"
            )
            self.family = sm.families.Gaussian(link=sm.families.links.identity())
            self.use_t = True
        elif self.outcome_dtype == "binary":
            # Use the order according to the categorical
            counts = self.data[self.outcome_variable].value_counts().to_dict()
            categories = self.data[self.outcome_variable].cat.categories
            codes, categories = zip(*enumerate(categories))
            self.data[self.outcome_variable].replace(categories, codes, inplace=True)
            self.description += (
                f"Binary Outcome (family = Binomial): '{self.outcome_variable}'\n"
                f"\t{counts[categories[0]]:,} occurrences of '{categories[0]}' coded as 0\n"
                f"\t{counts[categories[1]]:,} occurrences of '{categories[1]}' coded as 1"
            )
            self.family = sm.families.Binomial(link=sm.families.links.logit())
            self.use_t = False
        else:
            raise ValueError(
                "The outcome variable's type could not be determined.  Please report this error."
            )

        # Log missing outcome values
        na_outcome_count = self.data[self.outcome_variable].isna().sum()
        self.description += f"\nUsing {len(self.data) - na_outcome_count:,} of {len(self.data):,} observations"
        if na_outcome_count > 0:
            self.description += (
                f"\n\t{na_outcome_count:,} are missing a value for the outcome variable"
            )

        # Standardize continuous variables in the data if needed
        # Use ddof=1 in the zscore calculation (used for StdErr) to match R
        if self.standardize_data:
            if self.outcome_dtype == "continuous":
                self.data[self.outcome_variable] = stats.zscore(
                    self.data[self.outcome_variable], nan_policy="omit", ddof=1
                )
            continuous_rvs = self.regression_variables["continuous"]
            self.data[continuous_rvs] = stats.zscore(
                self.data[continuous_rvs], nan_policy="omit", ddof=1
            )
            continuous_covars = [
                rv
                for rv, rv_type in self.covariate_types.items()
                if rv_type == "continuous"
            ]
            self.data[continuous_covars] = stats.zscore(
                self.data[continuous_covars], nan_policy="omit", ddof=1
            )

        # Finish updating description
        self.description += f"\nRegressing {sum([len(v) for v in self.regression_variables.values()]):,} variables"
        for k, v in self.regression_variables.items():
            self.description += f"\n\t{len(v):,} {k} variables"