def choose_rep(self): """Choses representative subject to be used as target for BrainSync""" nsub = len(self.subids) subs = range(nsub) dist_mat = np.zeros((nsub, nsub)) for sub1no, sub2no in itertools.product(subs, subs): sub1 = self.fmri_data[sub1no] sub2 = self.fmri_data[sub2no] sub1 = StandardScaler().fit_transform(sub1.T) sub2 = StandardScaler().fit_transform(sub2.T) # .T to make it TxV sub2s, _ = brainSync(sub1, sub2) dist_mat[sub1no, sub2no] = np.linalg.norm(sub1.flatten() - sub2s.flatten()) print(sub1no, sub2no) self.ref_subno = np.argmin(np.sum(dist_mat, axis=1)) self.ref_data = self.fmri_data[self.ref_subno] # Save the reference subject and ref subject data np.savez_compressed( self.data_dir_fmri + '/processed/Refdata.npz', ref_data=self.ref_data, ref_subno=self.ref_subno) print('The most representative subject is %d' % self.ref_subno)
def sample_data(country): population = df.loc[(df.Country == country) & (df.New_deaths > 0), ['Date_reported', 'New_deaths']] after = population.loc[(population.Date_reported >= '2020-12-08 ') & (population.Date_reported <= today), ['New_deaths']] values = after.values values = values.reshape((len(values), 1)) scaled_features = StandardScaler().fit_transform(values) scaled_features = scaled_features.flatten() return scaled_features
def population_data(country): from sklearn.preprocessing import StandardScaler from math import sqrt population = df.loc[(df.Country == country) & (df.New_deaths > 0), ['New_deaths']] values = population.values values = values.reshape((len(values), 1)) scaled_features = StandardScaler().fit_transform(values) scaled_features = scaled_features.flatten() return scaled_features
normalizeFeatures = 'Mean' if normalizeFeatures == 'MeanAndStd': X_std = StandardScaler().fit_transform(vecPosMatrix) elif normalizeFeatures == 'Mean': X_std = vecPosMatrix - np.mean(vecPosMatrix,0) print "Making sklearn_pca" nComponents = 10 sklearn_pca = sklearnPCA(n_components=nComponents) print "Making Y_sklearn" # An important note -- we get a hash value for the raw data here # as a unique identifier of this dataset. Then we save the initial # PCA object to disk to reduce time on subsequent runs. sklearn_pca_hash = str(hash(tuple(X_std.flatten()[::100])))[-10:] #pca_result_hash = 'y_sklearn_hash_%s.cpickle' %(sklearn_pca_hash) pca_result_hash = 'pca_obj_hash_%s.cpickle' %(sklearn_pca_hash) if not(os.path.exists(pca_result_hash)): #Y_sklearn = sklearn_pca.fit_transform(X_std) sklearn_pca.fit(X_std) with open(pca_result_hash,'wb') as of: cPickle.dump(sklearn_pca, of) Y_sklearn = sklearn_pca.transform(X_std) #with open(pca_result_hash,'wb') as of: # cPickle.dump(Y_sklearn, of) else: #Y_sklearn = cPickle.load(open(pca_result_hash)) sklearn_pca = cPickle.load(open(pca_result_hash)) Y_sklearn = sklearn_pca.transform(X_std)
normalizeFeatures = 'Mean' if normalizeFeatures == 'MeanAndStd': X_std = StandardScaler().fit_transform(vecPosMatrix) elif normalizeFeatures == 'Mean': X_std = vecPosMatrix - np.mean(vecPosMatrix, 0) print("Making sklearn_pca") nComponents = 10 sklearn_pca = sklearnPCA(n_components=nComponents) print("Making Y_sklearn") # An important note -- we get a hash value for the raw data here # as a unique identifier of this dataset. Then we save the initial # PCA object to disk to reduce time on subsequent runs. sklearn_pca_hash = str(hash(tuple(X_std.flatten()[::100])))[-10:] #pca_result_hash = 'y_sklearn_hash_%s.cpickle' %(sklearn_pca_hash) pca_result_hash = 'pca_obj_hash_%s.cpickle' % (sklearn_pca_hash) if not (os.path.exists(pca_result_hash)): #Y_sklearn = sklearn_pca.fit_transform(X_std) sklearn_pca.fit(X_std) with open(pca_result_hash, 'wb') as of: pickle.dump(sklearn_pca, of) Y_sklearn = sklearn_pca.transform(X_std) #with open(pca_result_hash,'wb') as of: # cPickle.dump(Y_sklearn, of) else: #Y_sklearn = cPickle.load(open(pca_result_hash)) sklearn_pca = pickle.load(open(pca_result_hash)) Y_sklearn = sklearn_pca.transform(X_std)
def standardized(v_lst): df = pd.DataFrame({'val': v_lst}) df.fillna(df.mean(), inplace=True) v_lst = StandardScaler().fit_transform(df['val'].values.reshape(-1, 1)) v_lst = v_lst.flatten() return list(v_lst)
def TsExtractor(labels, labelmap, func, mask, global_signal=True, pca=False, outfile="reg_timeseries.tsv", outlabelmap="individual_gm_labelmap.nii.gz"): import nibabel as nib import pandas as pd import numpy as np func_data = nib.load(func).get_data() labelmap_data = nib.load(labelmap).get_data() mask_data = nib.load(mask).get_data() labelmap_data[mask_data == 0] = 0 # background outlab = nib.Nifti1Image(labelmap_data, nib.load(labelmap).affine) nib.save(outlab, outlabelmap) ret = [] if global_signal: indices = np.argwhere(mask_data > 0) X = [] for i in indices: x = func_data[i[0], i[1], i[2], :] if np.std(x) > 0.000001: X.append(x.tolist()) if len(X) == 0: x = np.repeat(0, func_data.shape[3]) elif pca: import sklearn.decomposition as decomp from sklearn.preprocessing import StandardScaler X = StandardScaler().fit_transform(np.transpose(X)) PCA = decomp.PCA(n_components=1, svd_solver="arpack") x = PCA.fit_transform(X).flatten() else: #from sklearn.preprocessing import StandardScaler #X = StandardScaler().fit_transform(np.transpose(X)) x = np.mean(X, axis=0) ret.append(x) for l in range(1, len(labels) + 1): indices = np.argwhere(labelmap_data == l) X = [] for i in indices: x = func_data[i[0], i[1], i[2], :] if np.std(x) > 0.000001: X.append(x.tolist()) X = np.array(X) if X.shape[0] == 0: x = np.repeat(0, func_data.shape[3]) elif X.shape[0] == 1: x = X.flatten() elif pca: import sklearn.decomposition as decomp from sklearn.preprocessing import StandardScaler X = StandardScaler().fit_transform(np.transpose(X)) PCA = decomp.PCA(n_components=1, svd_solver="arpack") x = PCA.fit_transform(X).flatten() else: #from sklearn.preprocessing import StandardScaler #X = StandardScaler().fit_transform(np.transpose(X)) x = np.mean(X, axis=0) ret.append(x) ret = np.transpose(np.array(ret)) if global_signal: labels = ["GlobSig"] + labels import pandas as pd ret = pd.DataFrame(data=ret, columns=labels) ret.to_csv(outfile, sep="\t", index=False) import os return os.path.join(os.getcwd(), outfile), labels, os.path.join(os.getcwd(), outlabelmap)