Ejemplos de zscore en Python, ejemplos de scipy.stats.stats.zscore en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: searchlight_srm_example.py Proyecto: CameronTEllis/brainiak

def time_segment_matching_accuracy(data, win_size=6): 
    nsubjs = len(data)
    (ndim, nsample) = data[0].shape
    accu = np.zeros(shape=nsubjs)
    nseg = nsample - win_size 
    # mysseg prediction prediction
    trn_data = np.zeros((ndim*win_size, nseg),order='f')
    # the trn data also include the tst data, but will be subtracted when 
    # calculating A
    for m in range(nsubjs):
        for w in range(win_size):
            trn_data[w*ndim:(w+1)*ndim,:] += data[m][:,w:(w+nseg)]
    for tst_subj in range(nsubjs):
        tst_data = np.zeros((ndim*win_size, nseg),order='f')
        for w in range(win_size):
            tst_data[w*ndim:(w+1)*ndim,:] = data[tst_subj][:,w:(w+nseg)]

        A =  np.nan_to_num(stats.zscore((trn_data - tst_data),axis=0, ddof=1))
        B =  np.nan_to_num(stats.zscore(tst_data,axis=0, ddof=1))

        # compute correlation matrix
        corr_mtx = compute_correlation(B.T,A.T)

        for i in range(nseg):
            for j in range(nseg):
                if abs(i-j)<win_size and i != j :
                    corr_mtx[i,j] = -np.inf
        max_idx =  np.argmax(corr_mtx, axis=1)
        accu[tst_subj] = sum(max_idx == range(nseg)) / float(nseg)

    return accu

Ejemplo n.º 2

0

Mostrar archivo

Archivo: ahba.py Proyecto: NeuroVault/NeuroVault

def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
    store_file = "/ahba_data/store_max1_reduced.h5"
    subcortex_mask = "/ahba_data/subcortex_mask.npy"

    results_dfs = []
    with pd.HDFStore(store_file, 'r') as store:
        for donor_id in store.keys():
            print "Loading expression data (%s)" % donor_id
            expression_data = store.get(donor_id.replace(".", "_"))

            print "Getting statmap values (%s)" % donor_id
            nifti_values = reduced_stat_map_data[expression_data.columns]

            print "Removing missing values (%s)" % donor_id
            na_mask = np.isnan(nifti_values)
            if mask == "subcortex":
                na_mask = np.logical_or(na_mask,
                    np.isnan(np.load(subcortex_mask)[expression_data.columns]))
            elif mask == "cortex":
                na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
                    np.load(subcortex_mask)[expression_data.columns])))
            else:
                assert mask == "full"

            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
            expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True)

            print "z scoring (%s)" % donor_id
            expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns,
                                           index=expression_data.index)
            nifti_values = zscore(nifti_values)

            print "Calculating linear regressions (%s)" % donor_id
            regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
            results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)

            results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
                                                           names=['donor_id', 'parameter'])

            results_dfs.append(results_df)

        print "Concatenating results"
        results_df = pd.concat(results_dfs, axis=1)
        del results_dfs

    t, p = ttest_1samp(results_df, 0.0, axis=1)
    group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
    group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
    group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
    del results_df
    probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
    group_results_df = group_results_df.join(probe_info)
    group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
                                         "variance explained (mean)", "variance explained (std)"]]

    return group_results_df

Ejemplo n.º 3

0

Mostrar archivo

    def run(self):
        
        self.results = []
        self.loader = ConnectivityDataLoader()
        self.X, self.y = self.loader.setup_analysis(self.path, 
                              self.roi_list, 
                              self.directory, 
                              self.condition_list, 
                              self.subjects).filter(self.filter_).get_data()

        
        X = self.X
        y = self.y
        
        X = zscore(X, axis=1) # Sample-wise
        y = zscore(np.float_(y))
        
        self.fs = FeatureSelectionIterator()
        self.fs.setup_analysis(self.fs_algorithm, self.fs_ranking_fx).run(X, y).select_first(80)
        
        self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, 
                                                  self.learner, 
                                                  self.error_fx)
        #Speedup stuff
        schema = ShuffleSplit(12, n_iter=1, test_size=0.25)
        self.perm_reg = RegressionAnalysis().setup_analysis(schema, 
                                                  self.learner, 
                                                  self.error_fx)
        
        self.perm = PermutationAnalysis().setup_analysis(self.reg, 
                                                    n_permutation=self.n_permutations,
                                                    dimension='features')
        

        
        for i,set_ in enumerate(self.fs):
            
            if i > 78:
                X_ = X[:,set_]
                y_ = y
                            
                reg_res = self.reg.run(X_, y_) # To be selected
                n_dist = self.perm.run(X_, y_)
                
                p_res = self.perm.pvalues(reg_res)
                
                self.results.append([reg_res, n_dist, p_res])

        #self.save()
        return self.results

Ejemplo n.º 4

0

Mostrar archivo

Archivo: searchlight_srm_example.py Proyecto: CameronTEllis/brainiak

def sfn(l, msk, myrad, bcast_var):
    # Arguments:
    # l -- a list of 4D arrays, containing data from a single searchlight
    # msk -- a 3D binary array, mask of this searchlight
    # myrad -- an integer, sl_rad
    # bcast_var -- whatever is broadcasted 

    # extract training and testing data
    train_data = []
    test_data = []
    d1,d2,d3,ntr = l[0].shape
    nvx = d1*d2*d3
    for s in l:
        train_data.append(np.reshape(s[:,:,:,:int(ntr/2)],(nvx,int(ntr/2))))
        test_data.append(np.reshape(s[:,:,:,int(ntr/2):],(nvx,ntr-int(ntr/2))))
    # train an srm model 
    srm = SRM(bcast_var[0],bcast_var[1])
    srm.fit(train_data)
    # transform test data
    shared_data = srm.transform(test_data)
    for s in range(len(l)):
        shared_data[s] = np.nan_to_num(stats.zscore(shared_data[s],axis=1,ddof=1))
    # run experiment
    accu = time_segment_matching_accuracy(shared_data)

    # return: can also return several values. In that case, the final output will be 
    # a 3D array of tuples
    return np.mean(accu)

Ejemplo n.º 5

0

Mostrar archivo

    def run(self):
        
        self.loader = ConnectivityDataLoader()
        self.X, self.y = self.loader.setup_analysis(self.path, 
                              self.roi_list, 
                              self.directory, 
                              self.condition_list, 
                              self.subjects).filter(self.filter_).get_data()
        
        X = self.X
        y = self.y
                             
        X = zscore(X, axis=1) # Sample-wise
        y = zscore(np.float_(y))       
        
        self.fs = FeatureSelectionIterator().setup_analysis(self.fs_algorithm, 
                          self.fs_ranking_fx)
        
        
        self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, 
                                                  self.learner, 
                                                  self.error_fx,
                                                  feature_selection=self.fs)
        
        self.perm = PermutationAnalysis().setup_analysis(self.reg, 
                                                    n_permutation=self.n_permutations,
                                                    dimension='labels')
        
        self.results = []

                
        reg_res = self.reg.run(X, y) # To be selected
        
        perm_res = self.perm.run(X, y)
                       
        self.results.append([reg_res, perm_res])
        
        #self.save()
        
        return self.results

Ejemplo n.º 6

0

Mostrar archivo

Archivo: data_functions.py Proyecto: keme040994/CGA_Project

def zscore_function(rep):
    """
    This function applies the zscore() transform to every value in the replications.

    Args:
        rep : LIST[rep1, rep2, rep3, ...]
            A repN is a biological data used to calc the likelihood result

    Returns:
         LIST[rep1, rep2, rep3, ...]
              The new transformed replications
    """
    rep = map(lambda x: np.asmatrix(np.transpose(np.array([sss.zscore(item) for item in np.transpose(np.asarray(x))]))),
              rep)
    return list(rep)

Ejemplo n.º 7

0

Mostrar archivo

 def run(self):
     
     self.loader = ConnectivityDataLoader()
     self.X, self.y = self.loader.setup_analysis(self.path, 
                           self.roi_list, 
                           self.directory, 
                           self.condition_list, 
                           self.subjects).filter(self.filter_).get_data()
     
     X = self.X
     y = self.y
                          
     X = zscore(X, axis=1) # Sample-wise
     y = zscore(np.float_(y))       
     
     self.fs = FeatureSelectionIterator()
     self.fs.setup_analysis(self.fs_algorithm, 
                       self.fs_ranking_fx).run(X, y).select_first(80)
     
     
     
     self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, 
                                               self.learner, 
                                               self.error_fx)
     self.results = []
     for set_ in self.fs:
         X_ = X[:,set_]
         y_ = y
                     
         reg_res = self.reg.run(X_, y_) # To be selected
                    
         self.results.append([reg_res])
     
     self.save()
     
     return self.results

Ejemplo n.º 8

0

Mostrar archivo

Archivo: ExpressionTable.py Proyecto: hjanime/rnaseqlib

    def add_normalized_te(self, normed_prefix="norm"):
        """
        z-score normalize the TE values.

        Creates new columns corresponding to normed TEs,
        beginning with 'normed_prefix'.

        Normed TEs are first logged (base 2) and then z-score
        normalized.
        """
        print "Normalizing TE..."
        te_cols = [c for c in self.table.columns \
                   if c.startswith("TE_")]
        for col in te_cols:
            normed_col = "%s_%s" %(normed_prefix, col)
            self.table[normed_col] = zscore(self.table[col].apply(log2).dropna())

Ejemplo n.º 9

0

Mostrar archivo

Archivo: run.py Proyecto: BIDS-Apps/brainiak-srm

def process_input(subjects_files, mask):
    """Process input to obtain data suitable for SRM"""

    mask_suffix = "_" + mask + ".nii.gz"
    srm_input = []
    for subject_files in subjects_files:
        srm_input_subject = []
        for path in subject_files:
            if path.name.endswith(mask_suffix):
                continue
            img = nib.load(str(path))
            mask_path = \
                str(path.with_suffix("").with_suffix("")) + mask_suffix
            mask = nib.load(str(mask_path))
            srm_input_subject.append(nilearn.masking.apply_mask(img, mask))
        srm_input.append(stats.zscore(np.concatenate(srm_input_subject),
                                      axis=0, ddof=1).T)
    return srm_input

Ejemplo n.º 10

0

Mostrar archivo

def get_feature_weights_matrix(weights, sets, mask, indices):
    """
    Function used to compute the average weight matrix in case of
    several cross-validation folds and feature selection for each
    fold.
    
    Parameters
    ----------
    weights : ndarray shape n_folds x n_selected_features
        The weights matrix with the shape specified in the signature
    sets : ndarray shape n_folds x n_selected_features
        This represents the index in the square matrix of the feature selected 
        by the algorithm in each cross-validation fold
    mask : ndarray shape n_roi x n_roi 
        The mask matrix of the valid ROIs selected. Important: this matrix
        should be triangular with the lower part set to zero.
    indices : tuple
        This is equal to np.nonzero(mask)
        
    Returns
    -------
    matrix: ndarray n_roi x n_roi
        It returns the average weights across cross-validation fold in
        square form.
    
    """
    
    
    weights = weights.squeeze()
    filling_vector = np.zeros(np.count_nonzero(mask))
    counting_vector = np.zeros(np.count_nonzero(mask))
    
    for s, w in zip(sets, weights):
        filling_vector[s] += zscore(w)
        counting_vector[s] += 1
        
    avg_weigths = np.nan_to_num(filling_vector/counting_vector)
    mask[indices] = avg_weigths    
    matrix = np.nan_to_num(copy_matrix(mask, diagonal_filler=0))
    
    return matrix

Ejemplo n.º 11

0

Mostrar archivo

Archivo: searchlight_bound_match_shuffle_event_lengths.py Proyecto: jamalw/music_event_structures_bucket

def searchlight(coords, human_bounds, mask, subjs, song_idx, song_bounds,
                srm_k, hrf):
    """run searchlight 

       Create searchlight object and perform voxel function at each searchlight location
    
       Parameters
       ----------
       data1  : voxel by time ndarray (2D); leftout subject run 1
       data2  : voxel by time ndarray (2D); average of others run 1
       data3  : voxel by time ndarray (2D); leftout subject run 2
       data4  : voxel by time ndarray (2D); average of others run 2
       coords : voxel by xyz ndarray (2D, Vx3)
       K      : # of events for HMM (scalar)
       
       Returns
       -------
       3D data: brain (or ROI) filled with searchlight function scores (3D)

    """

    stride = 5
    radius = 5
    min_vox = srm_k
    nPerm = 1000
    SL_allvox = []
    SL_results = []
    datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
    for x in range(0, np.max(coords, axis=0)[0] + stride, stride):
        for y in range(0, np.max(coords, axis=0)[1] + stride, stride):
            for z in range(0, np.max(coords, axis=0)[2] + stride, stride):
                if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' +
                                      str(y) + '_' + str(z) + '.npy'):
                    continue
                D = distance.cdist(coords,
                                   np.array([x, y, z]).reshape((1, 3)))[:, 0]
                SL_vox = D <= radius
                data = []
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    subj_regs = np.genfromtxt(datadir + subjs[i] +
                                              '/EPI_mcf1.par')
                    motion = subj_regs.T
                    regr = linear_model.LinearRegression()
                    regr.fit(motion[:, 0:2511].T, subj_data[:, :, 0].T)
                    subj_data1 = subj_data[:, :, 0] - np.dot(
                        regr.coef_,
                        motion[:, 0:2511]) - regr.intercept_[:, np.newaxis]
                    data.append(
                        np.nan_to_num(stats.zscore(subj_data1, axis=1,
                                                   ddof=1)))
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    subj_regs = np.genfromtxt(datadir + subjs[i] +
                                              '/EPI_mcf2.par')
                    motion = subj_regs.T
                    regr = linear_model.LinearRegression()
                    regr.fit(motion[:, 0:2511].T, subj_data[:, :, 1].T)
                    subj_data2 = subj_data[:, :, 1] - np.dot(
                        regr.coef_,
                        motion[:, 0:2511]) - regr.intercept_[:, np.newaxis]
                    data.append(
                        np.nan_to_num(stats.zscore(subj_data2, axis=1,
                                                   ddof=1)))
                print("Running Searchlight")
                # only run function on searchlights with voxels greater than or equal to min_vox
                if data[0].shape[0] >= min_vox:
                    SL_match = HMM(data, human_bounds, song_idx, song_bounds,
                                   srm_k, hrf)
                    SL_results.append(SL_match)
                    SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
    voxmean = np.zeros((coords.shape[0], nPerm + 1))
    vox_SLcount = np.zeros(coords.shape[0])
    for sl in range(len(SL_results)):
        voxmean[SL_allvox[sl], :] += SL_results[sl]
        vox_SLcount[SL_allvox[sl]] += 1
    voxmean = voxmean / vox_SLcount[:, np.newaxis]
    vox_z = np.zeros((coords.shape[0], nPerm + 1))
    for p in range(nPerm + 1):
        vox_z[:,
              p] = (voxmean[:, p] - np.mean(voxmean[:, 1:], axis=1)) / np.std(
                  voxmean[:, 1:], axis=1)
    return vox_z, voxmean

Ejemplo n.º 12

0

Mostrar archivo

Archivo: searchlight_spatial_isc.py Proyecto: jamalw/music_event_structures_bucket

def searchlight(coords, mask, subjs, set_srm):
    """run searchlight 

       Create searchlight object and perform voxel function at each searchlight location
    
       Parameters
       ----------
       coords : voxel by xyz ndarray (2D, Vx3)
       mask   : x x y x z (e.g. 91,109,91)
       subjs  : list of subject IDs        
 
       Returns
       -------
       3D data: brain (or ROI) filled with searchlight function scores (3D)

    """

    stride = 5
    radius = 5
    min_vox = 10
    nPerm = 1000
    SL_allvox = []
    SL_results = []
    voxISC = np.zeros(coords.shape[0])
    datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
    for x in range(0, np.max(coords, axis=0)[0] + stride, stride):
        for y in range(0, np.max(coords, axis=0)[1] + stride, stride):
            for z in range(0, np.max(coords, axis=0)[2] + stride, stride):
                if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' +
                                      str(y) + '_' + str(z) + '.npy'):
                    continue
                D = distance.cdist(coords,
                                   np.array([x, y, z]).reshape((1, 3)))[:, 0]
                SL_vox = D <= radius
                data = []
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 0], axis=1, ddof=1)))
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 1], axis=1, ddof=1)))
                print("Running Searchlight")
                SL_isc_mean_results = isc_srm(data, set_srm)
                SL_results.append(SL_isc_mean_results)
                SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
    voxmean = np.zeros((coords.shape[0]))
    vox_SLcount = np.zeros(coords.shape[0])
    for sl in range(len(SL_results)):
        voxmean[SL_allvox[sl]] += SL_results[sl]
        vox_SLcount[SL_allvox[sl]] += 1
    print("Voxmean: ", voxmean.shape)
    print("vox_SLcount: ", vox_SLcount)
    voxmean = voxmean / vox_SLcount

    return voxmean

Ejemplo n.º 13

0

Mostrar archivo

Archivo: srm_hmm_fit_singles.py Proyecto: jamalw/music_event_structures_tigress

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as patches
import numpy as np
import brainiak.eventseg.event
from scipy.stats import norm, zscore, pearsonr, stats
from scipy.signal import gaussian, convolve
from sklearn import decomposition
import numpy as np
from brainiak.funcalign.srm import SRM

datadir = '/tigress/jamalw/MES/prototype/link/scripts/chris_dartmouth/data/'

songs = ['St Pauls Suite', 'I Love Music', 'Moonlight Sonata', 'Change of the Gaurd','Waltz of Flowers','The Bird', 'Island', 'Allegro Moderato', 'Finlandia', 'Early Summer', 'Capriccio Espagnole', 'Symphony Fantastique', 'Boogie Stop Shuffle', 'My Favorite Things', 'Blue Monk','All Blues']

# Load in data
train = np.nan_to_num(stats.zscore(np.load(datadir + 'A1_run1_n25.npy'),axis=1,ddof=1))
test = np.nan_to_num(stats.zscore(np.load(datadir + 'A1_run2_n25.npy'),axis=1,ddof=1))

# Convert data into lists where each element is voxels by samples
train_list = []
test_list = []
for i in range(0,train.shape[2]):
    train_list.append(train[:,:,i])
    test_list.append(test[:,:,i])

# Initialize model
print('Building Model')
srm = SRM(n_iter=10, features=10)

# Fit model to training data (run 1)
print('Training Model')

Ejemplo n.º 14

0

Mostrar archivo

Archivo: fcMRI_monks.py Proyecto: robbisg/mvpa_itab_wu

    y = 1 / (1 + np.exp(-k*(x))) + c
    return y


def logarithm(x, a, b):
    y = a + np.log(x + b)
    return y

def exponential(x, k):
    y = np.exp(k * x)
    return y

X = X/X.std(axis=0)
y = y/y.std()

X = zscore(X, axis=0)
y = zscore(y, axis=0)

x_ = np.linspace(-1.5, 1.5, 100)
error_conn = []
mse_ = []
func = [exponential, sigmoid]
shift = 200
for i in range(100):
    mse__ = []
    pl.figure()
    pl.scatter(X[:,shift+i], y)
    for f in func:
        try:
            popt, pcov = curve_fit(f, X[:,shift + i], y)
        except RuntimeError:

Ejemplo n.º 15

0

Mostrar archivo

def standardize_values(df, prop_name):
    std_dev = 3
    z_scores = stats.zscore(df.loc[:, prop_name])
    return df[np.abs(z_scores) < std_dev]

Ejemplo n.º 16

0

Mostrar archivo

Archivo: plot_voxel_hist_run1_to_run2.py Proyecto: jamalw/music_event_structures_bucket

    ])

    human_bounds = np.load(ann_dirs + songs1[song_number] + '/' +
                           songs1[song_number] + '_beh_seg.npy') + hrf

    human_bounds = np.append(0, np.append(human_bounds, durs1[song_number]))

    start_run1 = song_bounds1[song_number]
    end_run1 = song_bounds1[song_number + 1]

    start_run2 = song_bounds2[songs2.index(songs1[song_number])]
    end_run2 = song_bounds2[songs2.index(songs1[song_number]) + 1]

    # Load in data
    run1 = stats.zscore(np.load(datadir + 'fdr_01_' + roi +
                                '_split_merge_no_srm_run1_n25.npy'),
                        axis=1,
                        ddof=1)
    run2 = stats.zscore(np.load(datadir + 'fdr_01_' + roi +
                                '_split_merge_no_srm_run2_n25.npy'),
                        axis=1,
                        ddof=1)

    #if do_srm == 0:
    run1DataAvg = np.mean(run1, axis=2)
    run2DataAvg = np.mean(run2, axis=2)

    song1 = run1DataAvg[:, start_run1:end_run1]
    song2 = run2DataAvg[:, start_run2:end_run2]
    #elif do_srm == 1:
    # Convert data into lists where each element is voxels by samples
    #run1_list = []

Ejemplo n.º 17

0

Mostrar archivo

Archivo: origFeatureExtractor.py Proyecto: goutern/comicBookSalesPredictor

             features_orb_4, features_orb_5, features_orb_6, features_orb_7,
             features_orb_8, features_orb_9),
            axis=0)
    else:
        features = np.load("features.npy")
if load_features_flat:
    print("Loading flattened feature data")
    features_flatten = np.load("features_flatten.npy")
data_y = np.load("data_y.npy")
if limit_data:
    data_y = data_y[:image_count]

if remove_outliers:
    #Remove outliers
    print("Removing Outliers")
    z = np.abs(stats.zscore(data[:, 6].astype(int)))

    # print(np.where(z > threshold))
    print(data.shape)
    print(data_y.shape)

    data = data[(z < threshold)]
    data_y = data_y[(z < threshold)]

    print("Outliers Removed")
    print(data.shape)
    print(data_y.shape)

#used to reduce the image pool to run faster tests
np.random.rand(42)
if limit_data:

Ejemplo n.º 18

0

Mostrar archivo

Archivo: differential_geometry.py Proyecto: WoW55QQ/slam

def compute_mesh_weights(mesh, weight_type='conformal', cot_threshold=None,
                         z_threshold=None):
    """
    compute a weight matrix
    W is sparse weight matrix and W(i,j) = 0 is vertex i and vertex j are not
    connected in the mesh.

    details are presented in:
    Desbrun, M., Meyer, M., & Alliez, P. (2002).
    Intrinsic parameterizations of surface meshes.
    Computer Graphics Forum, 21(3), 209–218.
    https://doi.org/10.1111/1467-8659.00580

    and
    Reuter, M., Biasotti, S., & Giorgi, D. (2009).
    Discrete Laplace–Beltrami operators for shape analysis and segmentation.
    Computers & …, 33(3), 381–390.
    https://doi.org/10.1016/j.cag.2009.03.005

    additional checks and thresholds are applied to ensure finite values

    :param mesh:
    :param weight_type: choice across conformal, fem, meanvalue, authalic
    :param cot_threshold:
    :param z_threshold:
    :return:
    """
#    cot_threshold=0.00001
#   print('angle threshold')
    print('    Computing mesh weights of type ' + weight_type)
    vert = mesh.vertices
    poly = mesh.faces

    Nbv = vert.shape[0]
    W = sparse.lil_matrix((Nbv, Nbv))
    femB = sparse.lil_matrix((Nbv, Nbv))
    if weight_type == 'conformal' or weight_type == 'fem':
        threshold = 0.0001  # np.spacing(1)??
        threshold_needed = 0
        for i in range(3):
            i1 = np.mod(i, 3)
            i2 = np.mod(i + 1, 3)
            i3 = np.mod(i + 2, 3)
            pp = vert[poly[:, i2], :] - vert[poly[:, i1], :]
            qq = vert[poly[:, i3], :] - vert[poly[:, i1], :]
            cr = np.cross(pp, qq)
            area = np.sqrt(np.sum(np.power(cr, 2), 1)) / 2
#             nopp = np.apply_along_axis(np.linalg.norm, 1, pp)
#             noqq = np.apply_along_axis(np.linalg.norm, 1, qq)
            noqq = np.sqrt(np.sum(qq * qq, 1))
            nopp = np.sqrt(np.sum(pp * pp, 1))
            thersh_nopp = np.where(nopp < threshold)[0]
            thersh_noqq = np.where(noqq < threshold)[0]
            if len(thersh_nopp) > 0:
                nopp[thersh_nopp] = threshold
                threshold_needed += len(thersh_nopp)
            if len(thersh_noqq) > 0:
                noqq[thersh_noqq] = threshold
                threshold_needed += len(thersh_noqq)
    #        print(np.min(noqq))
            pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose()
            qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose()
            ang = np.arccos(np.sum(pp * qq, 1))
            # ############## preventing infs in weights
            inds_zeros = np.where(ang == 0)[0]
            ang[inds_zeros] = threshold
            threshold_needed_angle = len(inds_zeros)
            ################################
            cot = 1 / np.tan(ang)
            if cot_threshold is not None:
                thresh_inds = cot < 0
                cot[thresh_inds] = cot_threshold
                threshold_needed_angle += np.count_nonzero(thresh_inds)
            W = W + sparse.coo_matrix((cot, (poly[:, i2], poly[:, i3])),
                                      shape=(Nbv, Nbv))
            W = W + sparse.coo_matrix((cot, (poly[:, i3], poly[:, i2])),
                                      shape=(Nbv, Nbv))
            femB = femB + sparse.coo_matrix((area / 12,
                                             (poly[:, i2], poly[:, i3])),
                                            shape=(Nbv, Nbv))
            femB = femB + sparse.coo_matrix((area / 12,
                                             (poly[:, i3], poly[:, i2])),
                                            shape=(Nbv, Nbv))

        # if weight_type == 'fem' :
        #     W.data = W.data/2

        nnz = W.nnz
        if z_threshold is not None:
            z_weights = sss.zscore(W.data)
            inds_out = np.where(np.abs(z_weights) > z_threshold)[0]
            W.data[inds_out] = np.mean(W.data)
            print('    -Zscore threshold needed for ', len(inds_out),
                  ' values = ', 100 * len(inds_out) / nnz, ' %')
            # inds_out_inf = np.where(z_weights < -z_thresh)[0]
            # inds_out_sup = np.where(z_weights > z_thresh)[0]
            # val_inf = np.max(W.data[inds_out_inf])
            # W.data[inds_out_inf] = val_inf
            # val_sup = np.min(W.data[inds_out_sup])
            # W.data[inds_out_sup] = val_sup
            # print('    -Zscore threshold needed for ',
            # len(inds_out_inf)+len(inds_out_sup),' values-')
        print('    -edge length threshold needed for ', threshold_needed,
              ' values = ', 100 * threshold_needed / nnz, ' %')
        if cot_threshold is not None:
            print('    -cot threshold needed for ', threshold_needed_angle,
                  ' values = ', 100 * threshold_needed_angle / nnz, ' %')

    if weight_type == 'meanvalue':
        for i in range(3):
            i1 = np.mod(i, 3)
            i2 = np.mod(i + 1, 3)
            i3 = np.mod(i + 2, 3)
            pp = vert[poly[:, i2], :] - vert[poly[:, i1], :]
            qq = vert[poly[:, i3], :] - vert[poly[:, i1], :]
            rr = vert[poly[:, i2], :] - vert[poly[:, i3], :]
            # normalize the vectors
            noqq = np.sqrt(np.sum(qq * qq, 1))
            nopp = np.sqrt(np.sum(pp * pp, 1))
            norr = np.sqrt(np.sum(rr * rr, 1))
            pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose()
            qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose()
            rr = rr / np.vstack((norr, np.vstack((norr, norr)))).transpose()
            # compute angles
            angi1 = np.arccos(np.sum(pp * qq, 1)) / 2
            qq = -qq
            angi2 = np.arccos(np.sum(rr * qq, 1)) / 2
            W = W + sparse.coo_matrix((np.tan(angi1) / norr,
                                       (poly[:, i1], poly[:, i3])),
                                      shape=(Nbv, Nbv))
            W = W + sparse.coo_matrix((np.tan(angi2) / norr,
                                       (poly[:, i3], poly[:, i1])),
                                      shape=(Nbv, Nbv))
        nnz = W.nnz
    if weight_type == 'authalic':
        for i in range(3):
            i1 = np.mod(i, 3)
            i2 = np.mod(i + 1, 3)
            i3 = np.mod(i + 2, 3)
            pp = vert[poly[:, i2], :] - vert[poly[:, i1], :]
            qq = vert[poly[:, i3], :] - vert[poly[:, i1], :]
            rr = vert[poly[:, i2], :] - vert[poly[:, i3], :]
            # normalize the vectors
            noqq = np.sqrt(np.sum(qq * qq, 1))
            nopp = np.sqrt(np.sum(pp * pp, 1))
            norr = np.sqrt(np.sum(rr * rr, 1))
            pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose()
            qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose()
            rr = rr / np.vstack((norr, np.vstack((norr, norr)))).transpose()
            # compute angles
            angi1 = np.arccos(np.sum(pp * qq, 1)) / 2
            cot1 = 1 / np.tan(angi1)
            qq = -qq
            angi2 = np.arccos(np.sum(rr * qq, 1)) / 2
            cot2 = 1 / np.tan(angi2)
            W = W + sparse.coo_matrix((cot1 / norr ** 2,
                                       (poly[:, i3], poly[:, i1])),
                                      shape=(Nbv, Nbv))
            W = W + sparse.coo_matrix((cot2 / norr ** 2,
                                       (poly[:, i1], poly[:, i3])),
                                      shape=(Nbv, Nbv))
        nnz = W.nnz
    li = np.hstack(W.data)
    nb_Nan = len(np.where(np.isnan(li))[0])
    nb_neg = len(np.where(li < 0)[0])
    print('    -number of Nan in weights: ',
          nb_Nan, ' = ', 100 * nb_Nan / nnz, ' %')
    print('    -number of Negative values in weights: ',
          nb_neg, ' = ', 100 * nb_neg / nnz, ' %')

    return W.tocsr(), femB.tocsr()

Ejemplo n.º 19

0

Mostrar archivo

Archivo: hmm_parcels.py Proyecto: jamalw/music_event_structures_bucket

run1_masked = []
run2_masked = []
indices = np.where((mask_img > 0) & (parcels == 77))

for s in range(len(subjs)):
    # Load subjects nifti and motion data then clean (run1)
    print("Loading Run1 BOLD subj num: " + str(s + 1))
    run1 = nib.load(
        datadir + 'subjects/' + subjs[s] +
        '/analysis/run1.feat/trans_filtered_func_data.nii').get_data()[:, :, :,
                                                                       0:2511]
    print("Loading Run1 Motion Regressors")
    motion_run1 = np.genfromtxt(motion_dir + subjs[s] + '/EPI_mcf1.par')
    print("Cleaning Run1 BOLD Data")
    clean_run1 = stats.zscore(clean_data(run1[indices][:], motion_run1),
                              axis=1,
                              ddof=1)
    run1_masked.append(run1[indices][:])

    # Load subjects nifti and motion data then clean (run2)
    print("Loading Run2 BOLD subj num: " + str(s + 1))
    run2 = nib.load(
        datadir + 'subjects/' + subjs[s] +
        '/analysis/run2.feat/trans_filtered_func_data.nii').get_data()[:, :, :,
                                                                       0:2511]
    print("Loading Run2 Motion Regressors")
    motion_run2 = np.genfromtxt(motion_dir + subjs[s] + '/EPI_mcf2.par')
    print("Cleaning Run2 BOLD Data")
    clean_run2 = stats.zscore(clean_data(run2[indices][:], motion_run2),
                              axis=1,
                              ddof=1)

Ejemplo n.º 20

0

Mostrar archivo

# Convert data to a list of arrays matching SRM input.
# Each element is a matrix of voxels by TRs.
# Also, concatenate data from both hemispheres in the brain.
movie_data = []
for s in range(subjects):
    if s % size == rank:
        movie_data.append(
            np.concatenate(
                [movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0))
    else:
        movie_data.append(None)

# Z-score the data
for subject in range(subjects):
    if movie_data[subject] is not None:
        movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1)

# Run SRM with the movie data
srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, comm=comm)
srm.fit(movie_data)


# We define a function to present the output of the experiment.
def plot_confusion_matrix(cm, title="Confusion Matrix"):
    """Plots a confusion matrix for each subject
    """
    import matplotlib.pyplot as plt
    import math
    plt.figure()
    subjects = len(cm)
    root_subjects = math.sqrt(subjects)

Ejemplo n.º 21

0

Mostrar archivo

def calculate_gene_expression_similarity(reduced_stat_map_data):
    store_file = "/ahba_data/store_max1_reduced.h5"

    results_dfs = []
    with pd.HDFStore(store_file, 'r') as store:
        for donor_id in store.keys():
            print "Loading expression data (%s)" % donor_id
            expression_data = store.get(donor_id.replace(".", "_"))

            print "Getting statmap values (%s)" % donor_id
            nifti_values = reduced_stat_map_data[expression_data.columns]

            print "Removing missing values (%s)" % donor_id
            na_mask = np.isnan(nifti_values)
            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
            expression_data.drop(expression_data.columns[na_mask],
                                 axis=1,
                                 inplace=True)

            print "z scoring (%s)" % donor_id
            expression_data = pd.DataFrame(zscore(expression_data, axis=1),
                                           columns=expression_data.columns,
                                           index=expression_data.index)
            nifti_values = zscore(nifti_values)

            print "Calculating linear regressions (%s)" % donor_id
            regression_results = np.linalg.lstsq(
                np.c_[nifti_values, np.ones_like(nifti_values)],
                expression_data.T)
            results_df = pd.DataFrame({"slope": regression_results[0][0]},
                                      index=expression_data.index)

            results_df.columns = pd.MultiIndex.from_tuples(
                [(
                    donor_id[1:],
                    c,
                ) for c in results_df.columns],
                names=['donor_id', 'parameter'])

            results_dfs.append(results_df)

        print "Concatenating results"
        results_df = pd.concat(results_dfs, axis=1)
        del results_dfs

    t, p = ttest_1samp(results_df, 0.0, axis=1)
    group_results_df = pd.DataFrame({
        "t": t,
        "p": p
    },
                                    columns=['t', 'p'],
                                    index=expression_data.index)
    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(
        group_results_df.p, method='fdr_bh')
    group_results_df["variance explained (mean)"] = (
        results_df.xs('slope', axis=1, level=1)**2 * 100).mean(axis=1)
    group_results_df["variance explained (std)"] = (
        results_df.xs('slope', axis=1, level=1)**2 * 100).std(axis=1)
    del results_df
    probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv",
                             index_col=0).drop(['chromosome', "gene_id"],
                                               axis=1)
    group_results_df = group_results_df.join(probe_info)
    group_results_df = group_results_df[[
        "gene_symbol", "entrez_id.1", "gene_name", "t", "p",
        "p (FDR corrected)", "variance explained (mean)",
        "variance explained (std)"
    ]]

    return group_results_df

Ejemplo n.º 22

0

Mostrar archivo

movie_file = sio.loadmat('data/sl_movie_data.mat')
movie_data = movie_file['data']

# Dataset size parameters
dim1,dim2,dim3,ntr,nsubj = movie_data.shape

# preprocess data, zscore and set NaN to 0
all_data = [] # first half train, second half test
for s in range(nsubj):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # To ignore "RuntimeWarning: invalid value encountered in true_divide"
        # There are some 0 voxels in the data which I have to keep, so there will be a warning 
        # when z-scoring the data. It should be safe to ignore this warning. If your data does
        # not contain 0 voxel, you can remove the 2 lines above
        train_tmp = np.nan_to_num(stats.zscore(movie_data[:,:,:,:int(ntr/2),s],axis=3,ddof=1))
        test_tmp = np.nan_to_num(stats.zscore(movie_data[:,:,:,int(ntr/2):,s],axis=3,ddof=1))
    all_data.append(np.concatenate((train_tmp,test_tmp),axis=3))

# print information
if rank == 0:
    print ('searchlight length is {}'.format(sl_rad))
    print ('number of features in SRM: {}'.format(nfeature))
    print ('number of subjects is: {}'.format(len(all_data)))
    print ('number of TR is: {}'.format(ntr))
    print ('brain data dimension is {}-by-{}-by-{}'.format(dim1,dim2,dim3))

# Generate mask: mask is a 3D binary array, with active voxels being 1. I simply set 
# all voxels to be active in this example, but you should set the mask to fit your ROI
# in practice.
mask = np.ones((dim1,dim2,dim3), dtype=np.bool)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: hmm_parcels_sl.py Proyecto: jamalw/music_event_structures_tigress

    #        srm_k = initial_srm_k

    # run SRM on masked data
    if runNum == 0:
        shared_data = SRM_V1(run2, run1, srm_k, n_iter)
    elif runNum == 1:
        shared_data = SRM_V1(run1, run2, srm_k, n_iter)

    # perform cross-validation style HMM for n_folds
    for n in range(n_folds):
        np.random.seed(n)
        subj_list_shuffle = np.random.permutation(shared_data)

        # convert data from list to numpy array and z-score in time
        shared_data_stack = stats.zscore(np.dstack(subj_list_shuffle),
                                         axis=1,
                                         ddof=1)

        # split subjects into two groups
        others = np.mean(shared_data_stack[:, start_idx:end_idx, :13], axis=2)
        loo = np.mean(shared_data_stack[:, start_idx:end_idx, 13:], axis=2)

        # fit HMM to song data and return match data where first entry is true match score and all others are permutation scores
        print("Fitting HMM")
        WvA[n, :], bounds[n, :] = HMM(others, loo, human_bounds)

    # take average of WvA scores and bounds over folds
    avgWvA = fisher_mean(WvA, axis=0)
    avgBounds = np.mean(bounds, axis=0)

    # compute z-score

Ejemplo n.º 24

0

Mostrar archivo

Archivo: hmm_parcels_sl_shuffle_rotation.py Proyecto: jamalw/music_event_structures_bucket

for i in range(int(np.max(parcels))):
    print("Parcel Num: ", str(i + 1))
    # get indices where mask and parcels overlap
    indices = np.where((mask_img.get_data() > 0) & (parcels == i + 1))

    # initialize list for storing masked data across subjects
    run1 = np.load(parcel_dir + "parcel" + str(i + 1) + "_run1.npy")
    run2 = np.load(parcel_dir + "parcel" + str(i + 1) + "_run2.npy")

    # run SRM on masked data
    if runNum == 0:
        shared_data = SRM_V1(run2, run1, srm_k, n_iter)
    elif runNum == 1:
        shared_data = SRM_V1(run1, run2, srm_k, n_iter)

    data = np.mean(stats.zscore(np.dstack(shared_data), axis=1, ddof=1),
                   axis=2)[:, start_idx:end_idx]

    # fit HMM to song data and return match data where first entry is true match score and all others are permutation scores
    print("Fitting HMM")
    SL_match = HMM(data, human_bounds)

    # compute z-score
    match_z = (SL_match[0] - np.mean(SL_match[1:])) / (np.std(SL_match[1:]))

    # compute z-score for euclid by flipping sign after z-scoring
    #match_z = ((SL_match[0] - np.mean(SL_match[1:])) / (np.std(SL_match[1:]))) * -1

    # convert z-score to p-value
    match_p = st.norm.sf(match_z)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: hmm_K_sweep_paper.py Proyecto: jamalw/music_event_structures_bucket

songs_run2 = [
    'St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata',
    'Change_of_the_Guard', 'Waltz_of_Flowers', 'The_Bird', 'Island',
    'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole',
    'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things',
    'Blue_Monk', 'All_Blues'
]

durs_run2 = np.array([
    90, 180, 180, 90, 135, 180, 180, 225, 225, 135, 90, 135, 225, 225, 90, 135
])

# Load in data
run1 = np.nan_to_num(
    stats.zscore(np.load(datadir +
                         'fdr_01_lprec_full_split_merge_run1_n25.npy'),
                 axis=1,
                 ddof=1))
run2 = np.nan_to_num(
    stats.zscore(np.load(datadir +
                         'fdr_01_lprec_full_split_merge_run2_n25.npy'),
                 axis=1,
                 ddof=1))

nSubj = run1.shape[2]

# Convert data into lists where each element is voxels by samples
run1_list = []
run2_list = []
for i in range(0, nSubj):
    run1_list.append(run1[:, :, i])
    run2_list.append(run2[:, :, i])

Ejemplo n.º 26

0

Mostrar archivo

Archivo: mssa.py Proyecto: Zhiyu-Chen/python-ssa-mtm

# Original series.
X0, X1 = gen_series()
fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
ax0.plot(X0, '-r.')
ax0.set_title('Time series X1(t) vs t')
ax1.plot(X1, '-r.')
ax1.set_title('Time series X1(t) vs t')
plt.show()


"""An essential and necessary step for MSSA is to normalize both time series.
That means to remove the mean value and to divide it by the standard deviation
(for each series separately)."""

X0_zs, X1_zs = stats.zscore(X0), stats.zscore(X1)


# Using shifted time series.
def shift(arr, n, order='forward'):
    if isinstance(arr, np.ndarray):
        arr = arr.tolist()
    if order == 'forward':
        shifted = arr[n:] + [0] * n
    elif order == 'reversed':
        shifted = [0] * n + arr[:-n]
    else:
        print("Order %s not recognized.  Try forward or reversed" % order)

    return shifted

Ejemplo n.º 27

0

Mostrar archivo

def analyze_results(directory, 
                    conditions, 
                    n_permutations=1000.):
    
    
    """Write the results of the regression analysis

    Parameters
    ----------
    directory : string or list of strings
        Path or list of paths where put results.
    
    condition : string or list of strings
        Conditions to be analyzed.


    Returns
    -------
    fig : instance of matplotlib.pyplot.Figure
        The figure handle.

    """
    
    res_path = '/media/robbis/DATA/fmri/monks/0_results/'
    subjects = np.loadtxt('/media/robbis/DATA/fmri/monks/attributes_struct.txt',
                      dtype=np.str)

    path = '/media/robbis/DATA/fmri/monks/'
    roi_list = []
    roi_list = np.loadtxt('/media/robbis/DATA/fmri/templates_fcmri/findlab_rois.txt', 
                          delimiter=',',
                          dtype=np.str)
    
    if isinstance(directory, str):
        directory = [directory]
        
    if isinstance(conditions, str):
        conditions = [conditions]
        
    
    for dir_ in directory:
        for cond_ in conditions:
            
            fname_ = os.path.join(res_path, dir_, cond_+'_values_1000_50.npz')
            
            results_ = np.load(fname_)
            values_ = results_['arr_0'].tolist()
            errors_ = values_['error']      #values_['errors_']
            sets_ = values_['features']     #values_['sets_']
            weights_ = values_['weights']   #values_['weights_']
            samples_ = values_['subjects']  #values_['samples_']
            
            fname_ = os.path.join(res_path, dir_, cond_+'_permutation_1000_50.npz')
            
            results_ = np.load(fname_)
            values_p = results_['arr_0'].tolist()
            errors_p = values_p['error']        #values_p['errors_p']
            sets_p = values_p['features']       #values_p['sets_p']
            weights_p = values_p['weights']     #values_p['weights_p']
            samples_p = values_p['subjects']    #values_p['samples_p']
            
            errors_p = np.nanmean(errors_p, axis=1)
                        
            print('-----------'+dir_+'-------------')
            print(cond_)
            print ('MSE = '+str(errors_[:,0].mean())+' -- p '+ \
                str(np.count_nonzero(errors_p[:,0] < errors_[:,0].mean())/n_permutations))
            print('COR = '+str(np.nanmean(errors_[:,1]))+' -- p '+ \
                str(np.count_nonzero(errors_p[:,1] > np.nanmean(errors_[:,1]))/n_permutations))
                
            directory_ = dir_
            learner_ = "SVR_C_1" 
        
            prename = "%s_%s" %(cond_, learner_)
            
            ######## Get matrix infos ###############
            
            conn_test = ConnectivityLoader(res_path, 
                                         subjects, 
                                         directory_, 
                                         roi_list)
            
            # Get nan mask to correctly fill matrix
            nan_mask = conn_test.get_results(['Samatha', 'Vipassana'])
            # Transform matrix into float of ones
            mask_ = np.float_(~np.bool_(nan_mask))
            # Get the upper part of the matrix
            mask_ = np.triu(mask_, k=1)
            mask_indices = np.nonzero(mask_)
            n_bins = np.count_nonzero(mask_)
            
            
            ###### Plot of distributions of errors and permutations #########
            #errors_p = np.nanmean(errors_p, axis=1)
            
            fig_ = pl.figure()
            bpp = pl.boxplot(errors_p, showfliers=False, showmeans=True, patch_artist=True)
            bpv = pl.boxplot(errors_, showfliers=False, showmeans=True, patch_artist=True)
            fname = "%s_perm_1000_boxplot.png" %(prename)
           
            
            for box_, boxp_ in zip(bpv['boxes'], bpp['boxes']):
                box_.set_facecolor('lightgreen')
                boxp_.set_facecolor('lightslategrey')
              
              
            pl.xticks(np.array([1,2]), ['MSE', 'COR'])
            
            pl.savefig(os.path.join(res_path, directory_, fname))
            pl.close()
            
            n_permutations = np.float(errors_p[:,0].shape[0])
            
            
            ##### Plot of connection distributions ########
            
            pl.figure()
            h_values_p, _ = np.histogram(sets_p.flatten(), bins=np.arange(0, n_bins+1))
            #pl.plot(zscore(h_values_p))
            
            pl.hist(zscore(h_values_p), bins=25)
            
            fname = "%s_features_set_dist.png" %(prename)
            pl.savefig(os.path.join(res_path, directory_, fname))
            
            pl.figure()
            h_values_, _ = np.histogram(sets_.flatten(), bins=np.arange(0, n_bins+1))
            pl.plot(zscore(h_values_))
                
            
            fname = "%s_features_set_cross_validation.png" %(prename)
            pl.savefig(os.path.join(res_path, directory_, fname))
            
            pl.close('all')
            
            
            ######## Plot connectivity stuff ###########
            
            weights_ = weights_.squeeze()
            filling_vector = np.zeros(np.count_nonzero(mask_))
            counting_vector = np.zeros(np.count_nonzero(mask_))
            
            for s, w in zip(sets_, weights_):
                filling_vector[s] += zscore(w)
                counting_vector[s] += 1
            
            # Calculate the average weights and then zscore
            avg_weigths = np.nan_to_num(filling_vector/counting_vector)
            
            mask_[mask_indices] = avg_weigths
            
            matrix_ = np.nan_to_num(copy_matrix(mask_, diagonal_filler=0))
        
            names_lr, colors_lr, index_, coords, _ = get_atlas_info(dir_)
            
            '''
            matrix_[matrix_ == 0] = np.nan
            matrix_[np.abs(matrix_) < 1] = np.nan
            '''
            size_w = np.zeros_like(matrix_)
            size_w[mask_indices] = np.abs(avg_weigths)
            size_w = np.nan_to_num(copy_matrix(size_w, diagonal_filler=0))
            size_w = np.sum(size_w, axis=0)
            
            f, _ = plot_connectivity_circle_edited(matrix_[index_][:,index_], 
                                            names_lr[index_], 
                                            node_colors=colors_lr[index_],
                                            node_size=2*size_w[index_]**2,
                                            con_thresh = 1.4,
                                            title=cond_,
                                            node_angles=circular_layout(names_lr, 
                                                                        list(names_lr),
                                                                        ),
                                            fontsize_title=19,
                                            fontsize_names=13,
                                            fontsize_colorbar=13,
                                            colorbar_size=0.3,
                                            colormap='bwr',
                                            #colormap=cm_,
                                            vmin=-3.,
                                            vmax=3.,
                                            fig=pl.figure(figsize=(16,16))
                                            )
            
            
            fname = "%s_features_weight.png" %(prename)
            f.savefig(os.path.join(res_path, directory_, fname),
                      facecolor='black',
                      dpi=150)
            for d_ in ['x', 'y', 'z']:
                fname = "%s_connectome_feature_weight_%s.png" %(prename, d_)
                fname = os.path.join(res_path, directory_, fname)
                plot_connectome(matrix_, 
                                coords, 
                                colors_lr, 
                                2*size_w**2,
                                1.4,
                                fname,
                                #cmap=pl.cm.bwr,
                                title=None,
                                display_=d_,
                                #max_=3.,
                                #min_=3. 
                                )
            fname = "%s_connections_list_feature_weights.txt" %(prename)
            fname = os.path.join(res_path, directory_, fname)
            #print_connections(matrix_, names_lr, fname)
            
            #########
            mask_ = np.float_(~np.bool_(nan_mask))
            mask_ = np.triu(mask_, k=1)
            mask_indices = np.nonzero(mask_)
            mask_[mask_indices] = h_values_
            matrix_ = np.nan_to_num(copy_matrix(mask_, diagonal_filler=0))
            
            size_ = np.zeros_like(matrix_)
            size_[mask_indices] = counting_vector
            size_ = np.nan_to_num(copy_matrix(size_, diagonal_filler=0))
            size_ = np.sum(size_, axis=0)
            
            f, _ = plot_connectivity_circle_edited(matrix_[index_][:,index_], 
                                            names_lr[index_], 
                                            node_colors=colors_lr[index_],
                                            node_size=size_[index_]*5,
                                            con_thresh = 15.,
                                            title=cond_,
                                            node_angles=circular_layout(names_lr, 
                                                                        list(names_lr),
                                                                        ),
                                            fontsize_title=19,
                                            fontsize_names=13,
                                            fontsize_colorbar=13,
                                            colorbar_size=0.3,
                                            #colormap='bwr',
                                            #colormap='terrain',
                                            #vmin=40,
                                            fig=pl.figure(figsize=(16,16))
                                            )
            
            fname = "%s_features_choices.png" %(prename)
            f.savefig(os.path.join(res_path, directory_, fname),
                      facecolor='black',
                      dpi=150)
            
            for d_ in ['x', 'y', 'z']:
                fname = "%s_connectome_feature_choices_%s.png" %(prename, d_)
                fname = os.path.join(res_path, directory_, fname)
                
                plot_connectome(matrix_, 
                                coords, 
                                colors_lr, 
                                4.*size_,
                                15.,
                                fname,
                                title=None,
                                max_=50.,
                                min_=0.,
                                display_=d_
                                )
                
            fname = "%s_connections_list_feature_choices.txt" %(prename)
            fname = os.path.join(res_path, directory_, fname)
            #print_connections(matrix_, names_lr,fname)
            
            pl.close('all')

Ejemplo n.º 28

0

Mostrar archivo

def removeOutliers(data, max=4):
    # Remove outliers
    z_scores = stats.zscore(data)
    abs_z_scores = np.abs(z_scores)
    filtered_entries = (abs_z_scores < max).all(axis=1)
    return data[filtered_entries]

Ejemplo n.º 29

0

Mostrar archivo

import matplotlib.patches as patches
import numpy as np
import brainiak.eventseg.event
from scipy.stats import zscore, pearsonr, stats
from scipy.signal import gaussian, convolve
from sklearn import decomposition
import numpy as np
from brainiak.funcalign.srm import SRM
import nibabel as nib

datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/chris_dartmouth/data/'

# Load in data
train = np.nan_to_num(
    stats.zscore(np.load(datadir + 'precuneus_k12ish_run1_n25.npy'),
                 axis=1,
                 ddof=1))
test = np.nan_to_num(
    stats.zscore(np.load(datadir + 'precuneus_k12ish_run2_n25.npy'),
                 axis=1,
                 ddof=1))

# Convert data into lists where each element is voxels by samples
train_list = []
test_list = []
for i in range(0, train.shape[2]):
    train_list.append(train[:, :, i])
    test_list.append(test[:, :, i])

# Initialize model
print('Building Model')

Ejemplo n.º 30

0

Mostrar archivo

Archivo: srm_image_prediction_example_distributed.py Proyecto: CameronTEllis/brainiak

subjects = movie_data_left.shape[2]

# Convert data to a list of arrays matching SRM input.
# Each element is a matrix of voxels by TRs.
# Also, concatenate data from both hemispheres in the brain.
movie_data = []
for s in range(subjects):
    if s % size == rank:
        movie_data.append(np.concatenate([movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0))
    else:
        movie_data.append(None)

# Z-score the data
for subject in range(subjects):
    if movie_data[subject] is not None:
        movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1)

# Run SRM with the movie data
srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, comm=comm)
srm.fit(movie_data)

# We define a function to present the output of the experiment.
def plot_confusion_matrix(cm, title="Confusion Matrix"):
    """Plots a confusion matrix for each subject
    """
    import matplotlib.pyplot as plt
    import math
    plt.figure()
    subjects = len(cm)
    root_subjects = math.sqrt(subjects)
    cols = math.ceil(root_subjects)

Ejemplo n.º 31

0

Mostrar archivo

def HMM(X, K, loo_idx, song_idx, song_bounds):
    """fit hidden markov model
  
       Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations              

       Parameters
       ----------
       A: voxel by time ndarray (2D)
       B: voxel by time ndarray (2D)
       C: voxel by time ndarray (2D)
       D: voxel by time ndarray (2D)
       K: # of events for HMM (scalar)
 
       Returns
       -------
       z: z-score after performing permuted cross-validation analysis      

    """

    w = 6
    srm_k = 45
    nPerm = 1000
    within_across = np.zeros(nPerm + 1)
    run1 = [X[i] for i in np.arange(0, int(len(X) / 2))]
    run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))]
    print('Building Model')
    srm = SRM(n_iter=10, features=srm_k)
    print('Training Model')
    srm.fit(run1)
    print('Testing Model')
    shared_data = srm.transform(run2)
    shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1)
    others = np.mean(shared_data[:, :,
                                 np.arange(shared_data.shape[-1]) != loo_idx],
                     axis=2)
    loo = shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1],
                      loo_idx]
    nTR = loo.shape[1]

    # Fit to all but one subject
    ev = brainiak.eventseg.event.EventSegment(K)
    ev.fit(others[:, song_bounds[song_idx]:song_bounds[song_idx + 1]].T)
    events = np.argmax(ev.segments_[0], axis=1)

    ####
    # plot searchlights
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    shared_data = srm.transform(run2)
    avg_response = sum(shared_data) / len(shared_data)
    plt.figure(figsize=(10, 10))
    plt.imshow(np.corrcoef(avg_response[:, 0:89].T))
    bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0]
    ax = plt.gca()
    bounds_aug = np.concatenate(([0], bounds, [nTR]))
    for i in range(len(bounds_aug) - 1):
        rect1 = patches.Rectangle((bounds_aug[i], bounds_aug[i]),
                                  bounds_aug[i + 1] - bounds_aug[i],
                                  bounds_aug[i + 1] - bounds_aug[i],
                                  linewidth=3,
                                  edgecolor='w',
                                  facecolor='none',
                                  label='Model Fit')
        ax.add_patch(rect1)
    plt.title('HMM Fit to A1 SRM K = ' + str(srm_k),
              fontsize=18,
              fontweight='bold')
    plt.savefig('plots/St_Pauls SRM K = ' + str(srm_k))
    ####

    # Compute correlations separated by w in time
    corrs = np.zeros(nTR - w)
    for t in range(nTR - w):
        corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0]

    # Compute within vs across boundary correlations, for real and permuted bounds
    for p in range(nPerm + 1):
        within = corrs[events[:-w] == events[w:]].mean()
        across = corrs[events[:-w] != events[w:]].mean()
        within_across[p] = within - across

        np.random.seed(p)
        events = np.zeros(nTR, dtype=np.int)
        events[np.random.choice(nTR, K - 1, replace=False)] = 1
        events = np.cumsum(events)

    return within_across

Ejemplo n.º 32

0

Mostrar archivo

Archivo: searchlight_k_sweep_srm.py Proyecto: jamalw/music_event_structures_bucket

def HMM(X, K, song_idx, song_bounds):
    """fit hidden markov model
  
       Fit HMM to average data and cross-validate with leftout subjects using within song and between song average correlations              

       Parameters
       ----------
       A: list of 50 (contains 2 runs per subject) 2D (voxels x full time course) arrays
       B: # of events for HMM (scalar)
       song_idx: song index (scalar)
       C: voxel by time ndarray (2D)
       D: array of song boundaries (1D)
 
       Returns
       -------
       wVa score: final score after performing cross-validation of leftout subjects      

    """

    w = 6
    nPerm = 1000
    hrf = 5
    within_across = np.zeros(nPerm + 1)
    run1 = [X[i] for i in np.arange(0, int(len(X) / 2))]
    run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))]
    print('Building Model')
    srm = SRM(n_iter=10, features=30)
    print('Training Model')
    srm.fit(run1)
    print('Testing Model')
    shared_data = srm.transform(run2)
    shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1)
    others = np.mean(shared_data[:, song_bounds[song_idx] +
                                 hrf:song_bounds[song_idx + 1] + hrf, :13],
                     axis=2)
    loo = np.mean(shared_data[:, song_bounds[song_idx] +
                              hrf:song_bounds[song_idx + 1] + hrf, 13:],
                  axis=2)
    nTR = loo.shape[1]

    # Fit to all but one subject
    ev = brainiak.eventseg.event.EventSegment(K)
    ev.fit(others.T)
    events = np.argmax(ev.segments_[0], axis=1)

    # Compute correlations separated by w in time
    corrs = np.zeros(nTR - w)
    for t in range(nTR - w):
        corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0]

    # Compute within vs across boundary correlations, for real and permuted bounds
    for p in range(nPerm + 1):
        within = corrs[events[:-w] == events[w:]].mean()
        across = corrs[events[:-w] != events[w:]].mean()
        within_across[p] = within - across

        np.random.seed(p)
        events = np.zeros(nTR, dtype=np.int)
        events[np.random.choice(nTR, K - 1, replace=False)] = 1
        events = np.cumsum(events)
    print((within_across[0] - np.mean(within_across[1:])) /
          np.std(within_across[1:]))
    return within_across

Ejemplo n.º 33

0

Mostrar archivo

Archivo: latent_factor_from_tfa.py Proyecto: CameronTEllis/brainiak

try:
    retcode = call(cmd, shell=True)
    if retcode < 0:
        print("File download was terminated by signal", -retcode, file=sys.stderr)
    else:
        print("File download returned", retcode, file=sys.stderr)
except OSError as e:
    print("File download failed:", e, file=sys.stderr)

#get fMRI data and scanner RAS coordinates
all_data = scipy.io.loadmat(file_name)
data = all_data['data']
R = all_data['R']

# Z-score the data
data = stats.zscore(data, axis=1, ddof=1)
n_voxel, n_tr = data.shape

# Run TFA with downloaded data
from brainiak.factoranalysis.tfa import TFA
# uncomment below line to get help message on TFA
#help(TFA)

tfa = TFA(K=5,
        max_num_voxel=int(n_voxel*0.5),
        max_num_tr=int(n_tr*0.5),
        verbose=True)
tfa.fit(data, R)

print("\n centers of latent factors are:")
print(tfa.get_centers(tfa.local_posterior_))

Ejemplo n.º 34

0

Mostrar archivo

def searchlight(coords, K, mask, loo_idx, subjs, song_idx, song_bounds):
    """run searchlight 

       Create searchlight object and perform voxel function at each searchlight location
    
       Parameters
       ----------
       data1  : voxel by time ndarray (2D); leftout subject run 1
       data2  : voxel by time ndarray (2D); average of others run 1
       data3  : voxel by time ndarray (2D); leftout subject run 2
       data4  : voxel by time ndarray (2D); average of others run 2
       coords : voxel by xyz ndarray (2D, Vx3)
       K      : # of events for HMM (scalar)
       
       Returns
       -------
       3D data: brain (or ROI) filled with searchlight function scores (3D)

    """

    stride = 5
    radius = 5
    min_vox = 10
    nPerm = 1000
    SL_allvox = []
    SL_results = []
    datadir = '/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
    for x in range(0, np.max(coords, axis=0)[0] + stride, stride):
        for y in range(0, np.max(coords, axis=0)[1] + stride, stride):
            for z in range(0, np.max(coords, axis=0)[2] + stride, stride):
                if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' +
                                      str(y) + '_' + str(z) + '.npy'):
                    continue
                D = distance.cdist(coords,
                                   np.array([x, y, z]).reshape((1, 3)))[:, 0]
                SL_vox = D <= radius
                data = []
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 0], axis=1, ddof=1)))
                for i in range(len(subjs)):
                    subj_data = np.load(datadir + subjs[i] + '/' + str(x) +
                                        '_' + str(y) + '_' + str(z) + '.npy')
                    data.append(
                        np.nan_to_num(
                            stats.zscore(subj_data[:, :, 1], axis=1, ddof=1)))
                print("Running Searchlight")
                SL_within_across = HMM(data, K, loo_idx, song_idx, song_bounds)
                SL_results.append(SL_within_across)
                SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
    voxmean = np.zeros((coords.shape[0], nPerm + 1))
    vox_SLcount = np.zeros(coords.shape[0])
    for sl in range(len(SL_results)):
        voxmean[SL_allvox[sl], :] += SL_results[sl]
        vox_SLcount[SL_allvox[sl]] += 1
    voxmean = voxmean / vox_SLcount[:, np.newaxis]
    vox_z = np.zeros((coords.shape[0], nPerm + 1))
    for p in range(nPerm + 1):
        vox_z[:,
              p] = (voxmean[:, p] - np.mean(voxmean[:, 1:], axis=1)) / np.std(
                  voxmean[:, 1:], axis=1)
    return vox_z, voxmean

Ejemplo n.º 35

0

Mostrar archivo

Archivo: analisa_aleatorio4.py Proyecto: automata/metrics

    #nn = n.loadtxt('notas_compositores.txt')
    #nn = n.loadtxt('notas_filosofos.txt')
    #nn = n.loadtxt('notas_aleatorias.txt')
    nn = n.array([[r.uniform(1,9) for x in range(_nc)] for y in range(_na)])
    #nn_lista = nn.tolist()
    #print 'QTD NOTAS'
    #print [sum([[int(round(z)) for z in x].count(y) for x in nn_lista]) for y in range(1,10)]

    print '\nNOTAS'
    for i in range(len(nn)):
        print '%s & %s \\' % (agentes[i], ' & '.join([str(x) for x in nn[i]]))

    print 'MEDIA', n.mean(nn)

    print '\nZ-SCORES DAS NOTAS'
    _zs1 = sss.zscore(nn)
    for i in range(len(_zs1)):
        print [round(x, ndigits=2) for x in _zs1[i]]
    print 'MEDIA', n.mean(n.abs(_zs1))

    # cálculo da matriz de correlação
    # pré-processamento
    for i in xrange(nn.shape[1]):
        nn[:,i]=(nn[:,i]-nn[:,i].mean())/nn[:,i].std()

    # pearson
    print '\nMATRIZ DE COVARIANCIA'
    covm = n.cov(nn.T, bias=1)
    for i in range(len(covm)):
        print [round(x, ndigits=2) for x in covm[i]]
    print 'MEDIA', n.mean(n.abs(covm))

Ejemplo n.º 36

0

Mostrar archivo

Archivo: hmm_K_sweep_paper_no_srm.py Proyecto: jamalw/music_event_structures_bucket

# run 1 times
song_bounds_run1 = np.array([0,225,314,494,628,718,898,1032,1122,1301,1436,1660,1749,1973, 2198,2377,2511])

songs_run1 = ['Finlandia', 'Blue_Monk', 'I_Love_Music','Waltz_of_Flowers','Capriccio_Espagnole','Island','All_Blues','St_Pauls_Suite','Moonlight_Sonata','Symphony_Fantastique','Allegro_Moderato','Change_of_the_Guard','Boogie_Stop_Shuffle','My_Favorite_Things','The_Bird','Early_Summer']

durs_run1 = np.array([225,90,180,135,90,180,135,90,180,135,225,90,225,225,180,135])

# run 2 times
song_bounds_run2 = np.array([0,90,270,449,538,672,851,1031,1255,1480,1614,1704,1839,2063,2288,2377,2511])

songs_run2 = ['St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard','Waltz_of_Flowers','The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk','All_Blues']

durs_run2 = np.array([90,180,180,90,135,180,180,225,225,135,90,135,225,225,90,135])

# Load in data
run1 = np.nan_to_num(stats.zscore(np.load(datadir + 'fdr_01_bil_A1_split_merge_no_srm_run1_n25.npy'),axis=1,ddof=1))
run2 = np.nan_to_num(stats.zscore(np.load(datadir + 'fdr_01_bil_A1_split_merge_no_srm_run2_n25.npy'),axis=1,ddof=1))

nSubj = run1.shape[2]

nboot = 50

wVa_results = np.zeros((16,len(K_set),nboot))

np.random.seed(bootNum)

for b in range(nboot):
        resamp_subjs = np.random.choice(nSubj, size=nSubj, replace=True)
        run1_resample = run1[:,:,resamp_subjs]
        run2_resample = run2[:,:,resamp_subjs]

Ejemplo n.º 37

0

Mostrar archivo

Archivo: analisa_aleatorio3.py Proyecto: automata/metrics

    #print 'QTD NOTAS'
    #print [sum([[int(round(z)) for z in x].count(y) for x in nn_lista]) for y in range(1,10)]

    print '\nNOTAS'
    for i in range(len(nn)):
        print '%s & %s \\' % (agentes[i], ' & '.join([str(x) for x in nn[i]]))

    print 'MEDIA', n.mean(nn)

    # cálculo da matriz de correlação
    # pré-processamento
    #for i in xrange(nn.shape[1]):
    #    nn[:,i]=(nn[:,i]-nn[:,i].mean())/nn[:,i].std()

    print '\nZ-SCORES'
    _zscores = sss.zscore(nn)
    for i in range(len(_zscores)):
        print [round(x, ndigits=2) for x in _zscores[i]]

    # considerando agora notas standardizadas
    nn = _zscores

    # pearson
    print '\nMATRIZ DE COVARIANCIA'
    covm = n.cov(nn.T, bias=1)
    for i in range(len(covm)):
        print [round(x, ndigits=2) for x in covm[i]]
    print 'MEDIA', n.mean(n.abs(covm))


    def _cov(x, y):

Ejemplo n.º 38

0

Mostrar archivo

Archivo: clustering.py Proyecto: anubite/evaluation-of-comprehensibility

        ax.set_xticks([])
        ax.text(0.35, 1, f'Cluster {cluster}',
                 fontsize=32, va='bottom',transform=ax.transAxes)
    fig.show()

from pandas import DataFrame

dataframe = DataFrame(model._means,
                      columns=vectorizer.get_feature_names())
wordcloud(dataframe)

# Z-score charts

from scipy.stats import stats

zscore = stats.zscore(model._means, axis=0)
zscore_df = DataFrame(zscore, columns=vectorizer.get_feature_names())

import matplotlib.pyplot as pl
from pandas import concat

def zscore(dataframe, term_count=10):

    fig = pl.figure(figsize=(15, 20))
    for i in range(dataframe.shape[0]):
        cluster_df = dataframe.iloc[[i]].T
        cluster_df = cluster_df.rename(index=str, columns={i: 'Z-score'})
        cluster_df = cluster_df.sort_values(by=['Z-score'])
        if term_count:
            half_term_count = term_count // 2
            sliced_df = concat([cluster_df[:half_term_count],

Ejemplo n.º 39

0

Mostrar archivo

Archivo: latent_factor_from_htfa.py Proyecto: CameronTEllis/brainiak

    if idx % size == rank:
        #download data
        file_name = os.path.join(data_dir, 's' + str(idx) + '.mat')
        cmd = 'curl --location -o ' + file_name + url[idx]
        try:
            retcode = call(cmd, shell=True)
            if retcode < 0:
                print("File download was terminated by signal", -retcode, file=sys.stderr)
            else:
                print("File download returned", retcode, file=sys.stderr)
        except OSError as e:
            print("File download failed:", e, file=sys.stderr)
        all_data = scipy.io.loadmat(file_name)
        bold = all_data['data']
        # z-score the data
        bold = stats.zscore(bold, axis=1, ddof=1)
        data.append(bold)
        R.append(all_data['R'])

n_voxel, n_tr = data[0].shape

# Run HTFA with downloaded data
from brainiak.factoranalysis.htfa import HTFA
# uncomment below line to get help message on HTFA
#help(HTFA)

K = 5
htfa = HTFA(K=K,
        n_subj=n_subj,
        max_global_iter=5,
        max_local_iter=2,

Ejemplo n.º 40

0

Mostrar archivo

    def __init__(
        self,
        data: pd.DataFrame,
        outcome_variable: str,
        regression_variables: List[str],
        covariates: Optional[List[str]] = None,
        min_n: int = 200,
        report_categorical_betas: bool = False,
        standardize_data: bool = False,
        encoding: str = "additive",
        edge_encoding_info: Optional[pd.DataFrame] = None,
        process_num: Optional[int] = None,
    ):
        # base class init
        # This takes in minimal regression params (data, outcome_variable, covariates) and
        # initializes additional parameters (outcome dtype, regression variables, error, and warnings)
        super().__init__(
            data=data,
            outcome_variable=outcome_variable,
            regression_variables=regression_variables,
            covariates=covariates,
        )

        # Custom init involving kwargs passed to this regression
        self.min_n = min_n
        self.report_categorical_betas = report_categorical_betas
        self.standardize_data = standardize_data
        if process_num is None:
            process_num = multiprocessing.cpu_count()
        self.process_num = process_num
        if encoding not in self.KNOWN_ENCODINGS:
            raise ValueError(f"Genotypes provided with unknown 'encoding': {encoding}")
        elif encoding == "edge" and edge_encoding_info is None:
            raise ValueError(
                "'edge_encoding_info' must be provided when using edge encoding"
            )
        else:
            self.encoding = encoding
            self.edge_encoding_info = edge_encoding_info

        # Ensure the data output type is compatible
        # Set 'self.family' and 'self.use_t' which are dependent on the outcome dtype
        if self.outcome_dtype == "categorical":
            raise NotImplementedError(
                "Categorical Outcomes are not yet supported for this type of regression."
            )
        elif self.outcome_dtype == "continuous":
            self.description += (
                f"Continuous Outcome (family = Gaussian): '{self.outcome_variable}'"
            )
            self.family = sm.families.Gaussian(link=sm.families.links.identity())
            self.use_t = True
        elif self.outcome_dtype == "binary":
            # Use the order according to the categorical
            counts = self.data[self.outcome_variable].value_counts().to_dict()
            categories = self.data[self.outcome_variable].cat.categories
            codes, categories = zip(*enumerate(categories))
            self.data[self.outcome_variable].replace(categories, codes, inplace=True)
            self.description += (
                f"Binary Outcome (family = Binomial): '{self.outcome_variable}'\n"
                f"\t{counts[categories[0]]:,} occurrences of '{categories[0]}' coded as 0\n"
                f"\t{counts[categories[1]]:,} occurrences of '{categories[1]}' coded as 1"
            )
            self.family = sm.families.Binomial(link=sm.families.links.logit())
            self.use_t = False
        else:
            raise ValueError(
                "The outcome variable's type could not be determined.  Please report this error."
            )

        # Log missing outcome values
        na_outcome_count = self.data[self.outcome_variable].isna().sum()
        self.description += f"\nUsing {len(self.data) - na_outcome_count:,} of {len(self.data):,} observations"
        if na_outcome_count > 0:
            self.description += (
                f"\n\t{na_outcome_count:,} are missing a value for the outcome variable"
            )

        # Standardize continuous variables in the data if needed
        # Use ddof=1 in the zscore calculation (used for StdErr) to match R
        if self.standardize_data:
            if self.outcome_dtype == "continuous":
                self.data[self.outcome_variable] = stats.zscore(
                    self.data[self.outcome_variable], nan_policy="omit", ddof=1
                )
            continuous_rvs = self.regression_variables["continuous"]
            self.data[continuous_rvs] = stats.zscore(
                self.data[continuous_rvs], nan_policy="omit", ddof=1
            )
            continuous_covars = [
                rv
                for rv, rv_type in self.covariate_types.items()
                if rv_type == "continuous"
            ]
            self.data[continuous_covars] = stats.zscore(
                self.data[continuous_covars], nan_policy="omit", ddof=1
            )

        # Finish updating description
        self.description += f"\nRegressing {sum([len(v) for v in self.regression_variables.values()]):,} variables"
        for k, v in self.regression_variables.items():
            self.description += f"\n\t{len(v):,} {k} variables"