def embed(self, DM): """Embed a distance matrix using MDS. Parameters ---------- M : :obj:`ndarray` The distance matrix to be embedded Returns ------- :obj:`ndarray` A :obj:`ndarray` of the embedding. """ mds = MDS(n_components=self.num_components, dissimilarity="precomputed") mds.fit(DM.getMatrix()) emb = mds.embedding_ emb = pd.DataFrame(emb) emb.index = DM.D.index emb.index.name = DM.D.index.name name = DM.DS.name + " " + \ DM.metric_name + " " + \ self.embedding_name EDS = lds.DataSet(emb, name) return EDS
def run_eeg(name): BASE = "data" DATASET = "%s/eeg" % (name) root = os.path.join(BASE, DATASET) bp = lds.BIDSParser(root) dataset_descriptor = bp.getModalityFrame("preprocessed", ".pkl").iloc[:6] # out_base = os.path.join(BASE, name, "eeg_derivatives") # out_emb_base = os.path.join(BASE, name, "eeg_embedded_deriatives") # os.makedirs(out_base + "/agg", exist_ok=True) # os.makedirs(out_emb_base + "/agg", exist_ok=True) eds = lds.EEGDataSet(dataset_descriptor) with open(os.path.join(BASE, name, 'eeg_ds.pkl'), 'wb') as pkl_loc: pkl.dump(eds, pkl_loc) # Create a lemur distance matrix based on the EEG data DM = lds.DistanceMatrix(eds, lms.FroCorr) DM.name = "eeg-DistanceMatrix" with open(os.path.join(BASE, name, 'eeg_dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) EEG_Embedded = MDSEmbedder.embed(DM) with open(os.path.join(BASE, name, 'eeg_embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(EEG_Embedded, pkl_loc) hgmm = lcl.HGMMClustering(EEG_Embedded, 4) hgmm.cluster() with open(os.path.join(root, 'hgmm_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(hgmm, pkl_loc) clustered = lcl.AdaptiveKMeans(EEG_Embedded) clustered.cluster() with open(os.path.join(root, 'km_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc) chanlocs = pd.read_csv("data/%s/eeg/chanlocs.csv" % (name)) with open(os.path.join(BASE, name, 'eeg_chanlocs.pkl'), 'wb') as pkl_loc: pkl.dump(chanlocs.as_matrix()[:, 1:4], pkl_loc) spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial") spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm) with open(os.path.join(BASE, name, 'eeg_spatial_dm.pkl'), 'wb') as pkl_loc: pkl.dump(spatialDM, pkl_loc)
eds = lds.EEGDataSet(dataset_descriptor) # Create a lemur distance matrix based on the EEG data DM = lds.DistanceMatrix(eds, lms.FroCorr) DM.name = "eeg-DistanceMatrix" # In[3]: # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) EEG_Embedded = MDSEmbedder.embed(DM) # In[4]: chanlocs = pd.read_csv("data/chanlocs.csv") spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial") spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm) # In[5]: for i in range(eds.n): single_ds = eds.getResourceDS(i) lpl.SparkLinePlotter(single_ds, mode="savediv", base_path=out_base).plot(sample_freq=500) # In[6]: for i in range(eds.n): single_ds = eds.getResourceDS(i) single_DM = lds.DataSet(single_ds.D.corr(), single_ds.name) lpl.SpatialConnectivity(single_DM, mode="savediv",
def run_modality(name, modality): # Set root paths and parse data BASE = 'data' DATASET = '%s/%s' % (name, modality) root = os.path.join(BASE, DATASET) bp = lds.BIDSParser(root) # For each modality, set specific settings if modality == 'eeg': modality_list = [('preprocessed', '.pkl')] DS_type = lds.EEGDataSet metric = lms.FroCorr elif modality == 'fmri': modality_list = [('func', 'nii.gz')] DS_type = lds.fMRIDataSet metric = lms.DiffAve elif modality == 'graph': modality_list = [('func', '.edgelist')] DS_type = lds.GraphDataSet metric = lms.FroCorr else: raise ValueError('Needs to be eeg, fmri, or graph') # If EEG, save important metadata pkls if modality == 'eeg': chanlocs = pd.read_csv("data/%s/eeg/chanlocs.csv" % (name)) with open(os.path.join("data/%s/eeg" % (name), 'chanlocs.pkl'), 'wb') as pkl_loc: pkl.dump(chanlocs.as_matrix()[:, 1:4], pkl_loc) spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial") spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm) with open(os.path.join("data/%s/eeg" % (name), 'spatial_dm.pkl'), 'wb') as pkl_loc: pkl.dump(spatialDM, pkl_loc) # Iterate through potential files for datatype, f_ext in modality_list: dataset_descriptor = bp.getModalityFrame(datatype, f_ext) DS = DS_type(dataset_descriptor) print(DS) curr_dir = os.path.join(root, datatype) if os.path.exists(curr_dir): continue else: os.makedirs(curr_dir) # Save the dataset with open(os.path.join(curr_dir, 'ds.pkl'), 'wb') as pkl_loc: pkl.dump(DS, pkl_loc) # Create a lemur distance matrix if modality == 'fmri': DM = lds.DistanceMatrix(DS, metric, True) else: DM = lds.DistanceMatrix(DS, metric) DM.name = "%s-DistanceMatrix" % (modality) with open(os.path.join(curr_dir, 'dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) embedded = MDSEmbedder.embed(DM) with open(os.path.join(curr_dir, 'embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(embedded, pkl_loc) ##### Clustering if DS.n > 10: clustered = lcl.HGMMClustering(embedded, 4) clustered.cluster() with open(os.path.join(curr_dir, 'hgmm_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc) clustered = lcl.AdaptiveKMeans(embedded) clustered.cluster() with open(os.path.join(curr_dir, 'km_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc) # Return modality list return bp, modality_list