return k/c # import what we need import numpy as np import connectivity_utils as utils import scipy.linalg as la from scipy.spatial.distance import pdist, squareform from sklearn.preprocessing import normalize # set directories data_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/' kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/kernels/' # read in connectivity data and labels connectivity_data = utils.load_connectivity_data(data_dir) labels = np.array([utils.load_labels(data_dir), ]) # set negative connectivities to 0 edge_data = np.apply_along_axis(lambda x: [0 if element < 0 else element for element in x], 1, connectivity_data) #edge_data = edge_data[:6, :] # calculate the kernel using pdist K = squareform(pdist(edge_data, rw_overlap_kernel)) K[np.diag_indices(140)] = 1 K = np.hstack((np.transpose(labels), K)) np.savetxt(kernel_dir + 'K_rw_overlap.csv', K, delimiter=',') #C1 = edge_data[0, :] #C2 = edge_data[1, :]
# import dataset 1 sparse inverse covariance files dataset_1_cov_files = pd.read_csv(dataset_1_dir + 'sparse_inverse_covariance_files.csv').T # put these in a df dataset_1_cov = pd.DataFrame(data=dataset_1_cov_data) dataset_1_cov['file'] = dataset_1_cov_files.index # convert format of file name so they can be matched dataset_1_cov['file'] = dataset_1_cov['file'].apply( lambda x: x.split('/')[-1].split('_')[-1].zfill(7)) # import and process full dataset 1 files list and metadata to get labels # import original dataset 1 data, files dataset_1_data, dataset_1_files = utils.load_connectivity_data( '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/') # import dataset 1 labels dataset_1_labels = utils.load_labels( '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/') # put labels alongside files in a DF dataset_1_metadata = pd.DataFrame(columns=['file', 'label']) dataset_1_metadata['file'] = dataset_1_files dataset_1_metadata['label'] = dataset_1_labels # convert format of file name so they can be matched dataset_1_metadata['file'] = dataset_1_metadata['file'].apply( lambda x: x.split('/')[-1].split('_')[-1]) # join the DFs to match labels with spare inverse cov data
import numpy as np import connectivity_utils as utils from scipy.spatial.distance import pdist, squareform from sklearn.preprocessing import normalize from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel # set directories data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/' data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/' kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/' # number of subjects n = 333 # read in connectivity data and labels connectivity_data_1 = utils.load_connectivity_data(data_dir_1) labels_1 = np.array([ utils.load_labels(data_dir_1), ]) connectivity_data_2 = utils.load_connectivity_data(data_dir_2) labels_2 = np.array([ utils.load_labels(data_dir_2), ]) connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2)) labels = np.hstack((labels_1, labels_2)) # set negative connectivities to 0 edge_data = np.apply_along_axis( lambda x: [0 if element < 0 else element for element in x], 1, connectivity_data)
import numpy as np import connectivity_utils as utils # set directories data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/' data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/' kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/' # include negatively weighted edges or not include_negative_weights = True # standardise data with a z-transform standardise_data = False # read in connectivity data and labels connectivity_data_1 = utils.load_connectivity_data(data_dir_1, standardise_data) labels_1 = np.array([utils.load_labels(data_dir_1), ]) connectivity_data_2 = utils.load_connectivity_data(data_dir_2, standardise_data) labels_2 = np.array([utils.load_labels(data_dir_2), ]) connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2)) labels = np.hstack((labels_1, labels_2)) # save connectivity data np.savetxt(kernel_dir + 'connectivity_data.csv', connectivity_data, delimiter = ',') # map lower triangles of connectivities to an array edge_data = np.apply_along_axis(lambda x: x[np.ravel_multi_index(np.tril_indices(90, k=-1), (90, 90))], 1, connectivity_data) if not include_negative_weights :
return k / c # import what we need import numpy as np import connectivity_utils as utils import scipy.linalg as la from scipy.spatial.distance import pdist, squareform from sklearn.preprocessing import normalize # set directories data_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/' kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/kernels/' # read in connectivity data and labels connectivity_data = utils.load_connectivity_data(data_dir) labels = np.array([ utils.load_labels(data_dir), ]) # set negative connectivities to 0 edge_data = np.apply_along_axis( lambda x: [0 if element < 0 else element for element in x], 1, connectivity_data) #edge_data = edge_data[:6, :] # calculate the kernel using pdist K = squareform(pdist(edge_data, rw_overlap_kernel)) K[np.diag_indices(140)] = 1 K = np.hstack((np.transpose(labels), K))
from sklearn.cross_decomposition import CCA, PLSCanonical from sklearn import svm, cross_validation, metrics, decomposition # set directories data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/' data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/' kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/' # include negatively weighted edges or not include_negative_weights = True # standardise data with a z-transform standardise_data = False # read in connectivity data and labels connectivity_data_1 = utils.load_connectivity_data(data_dir_1, standardise_data) labels_1 = np.array([ utils.load_labels(data_dir_1), ]) connectivity_data_2 = utils.load_connectivity_data(data_dir_2, standardise_data) labels_2 = np.array([ utils.load_labels(data_dir_2), ]) labels = np.squeeze(np.hstack((labels_1, labels_2))) n_subjects = len(labels) n_folds = 10 connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
connectivity_data = np.genfromtxt(timecourse_dir + 'sparse_inverse_covariance_data.csv', delimiter=',') timecourse_files = pd.read_csv(timecourse_dir + 'sparse_inverse_covariance_files.csv').T.index.values timecourse_IDs = map(lambda x: int(x.split('/')[-1].split('_')[1][0:-4]), timecourse_files) labels = np.array([utils.load_labels(data_dir), ])[0] connectivity_files = glob.glob(data_dir + '*.txt') connectivity_IDs = map(lambda x: int(x.split('/')[-1][0:3]), connectivity_files) connectivity_IDs.sort() connectivity_in_timecourse = np.array([True if ID in timecourse_IDs else False for ID in connectivity_IDs]) timecourse_in_connectivity = np.array([True if ID in connectivity_IDs else False for ID in timecourse_IDs]) labels = labels[np.array(connectivity_in_timecourse)] #labels = np.expand_dims(labels, axis=1) timecourse_connectivity_data = timecourse_connectivity_data[timecourse_in_connectivity, :] timecourse_connectivity_matrices = np.reshape(timecourse_connectivity_data, (100, 90, 90)) # original connectivity matrices connectivity_data, connectivity_files = utils.load_connectivity_data('/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/') connectivity_data = connectivity_data[connectivity_in_timecourse, :] connectivity_matrices = np.reshape(connectivity_data, (100, 90, 90)) n_subjects = len(labels) print connectivity_matrices[0, :5 , :5] print timecourse_connectivity_matrices[0, :5 , :5] # create kernel matrices #sq_IG = np.zeros((n_subjects, n_subjects)) #sq_IG_timecourse = np.zeros((n_subjects, n_subjects)) #for i in range(n_subjects) : # print i # for j in range(i) :