def __init__(self, target, target_label, target_mids, target_surface): # Load input data, and initialie data attribtues self.ID = target # we load the surface as a source of control # we need to determine if our label file is the same size as the # surface or if it needs to be adjusted verts = ld.loadGii(target_surface, 0) # get correct number of vertices self.N = verts.shape[0] self.label = ld.loadGii(target_label, 0) self.mids = ld.loadMat(target_mids) - 1 if self.N != len(self.label): print('Warning: Target surface has more vertices that '\ 'label file. Adjusting label file.') self.label = ld.fixLabelSize(self.mids, self.label, self.N) # Initialize library attribtues # vertCounts contains the labels a vertex-of-interest maps to self.vertCounts = {} self.vertCounts = self.vertCounts.fromkeys(list(np.arange(self.N))) # labCounts contains the labels a label-of-interest maps to self.labCounts = {} self.labCounts = self.labCounts.fromkeys(list(self.label)) self.matchedSubjects = set([])
def loadTest(self, y, yMatch): """ Method to load the test data into the object. We might be interested in loading new test data, so we have explicitly defined this is as a method. Parameters: - - - - - y : SubjectFeatures object for a test brain yMatch : MatchingFeaturesTest object containing vertLib attribute detailing which labels each vertex in surface y maps to in the training data """ load = self.load save = self.save features = self.features # load test subject data, save as attribtues tObject = ld.loadH5(y, *['full']) ID = tObject.attrs['ID'] parsedData = ld.parseH5(tObject, features) tObject.close() data = parsedData[ID] mtd = cu.mergeFeatures(data, features) print 'Testing shape: {}'.format(mtd.shape) if self.scaled: scaler = self.scaler mtd = scaler.transform(mtd) threshed = ld.loadMat(yMatch) # Computing label-vertex memberships is time consuming # If already precomputed for given test data at specified threshold, # can supply path to load file. if load: if os.path.isfile(load): ltvm = ld.loadPick(load) # Otherwise, compute label-vertex memberships. else: ltvm = cu.vertexMemberships(threshed, 180) self.ltvm = ltvm # if save is provided, save label-vertex memberships to file if save: try: with open(save, "wb") as outFile: pickle.dump(self.labelToVertexMaps, outFile, -1) except IOError: print('Cannot save label-vertex memberships to file.') return [threshed, mtd, ltvm]
def predict(self, y, yMatch, yMids): """ Method to compute Mahalanobis distance of test data from the distribution of all training data for each label. Parameters: - - - - - **kwargs : if power parameter is defined in kwargs, will perform base classification and weighted classification of the surface vertices """ R = 180 labels = self.labels # load the testing datamap [mm, mtd, ltvm] = self.loadTest(y, yMatch) # Python is 0-indexed, while Matlab is not # We adjust the Matlab coordinates by subtracting 1 midline = ld.loadMat(yMids) - 1 mm[midline, :] = 0 mtd[midline, :] = 0 xTest, yTest = mtd.shape if yTest != self.input_dim: raise Warning('Test data does not have the same number \ features as the training data.') # initialize prediction dictlionary baseline = np.zeros((mtd.shape[0], R + 1)) # for all labels in in training set for lab in labels: # compute vertices that map to that label members = ltvm[lab] memberData = mtd[members, :] estimator = self.mixtures[lab] if len(members) > 0: scores = estimator.score_samples(memberData) # save results in self.predict baseline[members, lab] = scores predicted = np.argmax(baseline, axis=1) self.baseline = baseline self.predicted = predicted self._classified = True """
def processLabelResizing(subjectList, dataDir, hemi): """ """ with open(subjectList, 'r') as inFile: subjects = inFile.readlines() subjects = [x.strip() for x in subjects] hcpDir = dataDir + 'Haynor/Connectome_4_With_MMP/Labels/' midDir = dataDir + 'parcellearning/Data/Midlines/' funDir = dataDir + 'HCP/Connectome_4/' outDir = dataDir + 'parcellearning/Data/Labels/HCP/' lExt = '.' + hemi + '.CorticalAreas_dil_NoTask_Final_Individual.32k_fs_LR.dlabel.nii' fExt = '.' + hemi + '.MyelinMap.32k_fs_LR.func.gii' mExt = '.' + hemi + '.Midline_Indices.mat' N = 32492 inCMAP = dataDir + 'parcellearning/Data/Labels/' + 'Label_Lookup_300.txt' for s in subjects: inLabel = hcpDir + s + lExt inMid = midDir + s + mExt inFunc = funDir + s + '/Surface/MNI/' + s + fExt if os.path.isfile(inLabel) and os.path.isfile( inMid) and os.path.isfile(inFunc): outFunc = outDir + s + '.' + hemi + '.CorticalAreas.fixed.32k_fs_LR.func.gii' outLabel = outDir + s + '.' + hemi + '.CorticalAreas.fixed.32k_fs_LR.label.gii' label = ld.loadGii(inLabel) mids = ld.loadMat(inMid) fixed = ld.fixLabelSize(mids, label, N) M = nb.load(inFunc) M.darrays[0].data = np.asarray(fixed.astype(np.float32)) nb.save(M, outFunc) cmd = '/usr/bin/wb_command -metric-label-import {} {} {}'.format( outFunc, inCMAP, outLabel) os.system(cmd)
def __init__(self, source, source_label, source_mids, source_surface): self.ID = source verts = ld.loadGii(source_surface, 0) self.N = verts.shape[0] self.label = ld.loadGii(source_label, 0) self.mids = ld.loadMat(source_mids) - 1 if self.N != len(self.label): print('Warning: Surface has more vertices that '\ 'label file. Adjusting label file.') self.label = ld.fixLabelSize(self.mids, self.label, self.N) self.vertLib = {}
def loadTest(self, y, yMatch): """ Method to load the test data into the object. We might be interested in loading new test data, so we have explicitly defined this is as a method. Parameters: - - - - - y : SubjectFeatures object for a test brain yMatch : MatchingFeaturesTest object containing vertLib attribute detailing which labels each vertex in surface y maps to in the training data """ # load test subject data, save as attribtues tObject = ld.loadH5(y, *['full']) ID = tObject.attrs['ID'] parsedData = ld.parseH5(tObject, self.features) tObject.close() data = parsedData[ID] mtd = cu.mergeFeatures(data, self.features) print 'Testing shape: {}'.format(mtd.shape) if self.scaled: scaler = self.scaler mtd = scaler.transform(mtd) threshed = ld.loadMat(yMatch) ltvm = cu.vertexMemberships(threshed, 180) return [threshed, mtd, ltvm]
def loadData(subjectList, dataMap, features, hemi): """ Generates the training data from a list of subjects. Parameters: - - - - - subjectList : list of subjects to include in training set dataDir : main directory where data exists -- individual features will exist in sub-directories here features : list of features to include hemi : hemisphere to process """ objDict = dataMap['object'].items() objDir = objDict[0][0] objExt = objDict[0][1] midDict = dataMap['midline'].items() midDir = midDict[0][0] midExt = midDict[0][1] matDict = dataMap['matching'].items() matDir = matDict[0][0] matExt = matDict[0][1] data = {} matches = {} for s in subjectList: # Training data trainObject = '{}{}.{}.{}'.format(objDir, s, hemi, objExt) print trainObject midObject = '{}{}.{}.{}'.format(midDir, s, hemi, midExt) matObject = '{}{}.{}.{}'.format(matDir, s, hemi, matExt) # Check to make sure all 3 files exist if os.path.isfile(trainObject) and os.path.isfile( midObject) and os.path.isfile(matObject): # Load midline indices # Subtract 1 for differece between Matlab and Python indexing mids = ld.loadMat(midObject) - 1 mids = set(mids) match = ld.loadMat(matObject) # Load training data and training labels trainH5 = h5py.File(trainObject, mode='r') # Get data corresponding to features of interest subjData = ld.parseH5(trainH5, features) trainH5.close() nSamples = set(np.arange(subjData[s][features[0]].shape[0])) coords = np.asarray(list(nSamples.difference(mids))) for f in subjData[s].keys(): tempData = subjData[s][f] if tempData.ndim == 1: tempData.shape += (1, ) subjData[s][f] = np.squeeze(tempData[coords, :]) match = match[coords, :] data[s] = subjData[s] matches[s] = match return [data, matches]
def loadDataFromList(subjectList, dataDir, features, hemi): """ Generates the training data for the neural network. Parameters: - - - - - subjectList : list of subjects to include in training set dataDir : main directory where data exists -- individual features will exist in sub-directories here features : list of features to include hemi : hemisphere to process """ hemisphere = {}.fromkeys('Left', 'Right') hemisphere['Left'] = 'L' hemisphere['Right'] = 'R' H = hemisphere[hemi] # For now, we hardcode where the data is trainDir = '{}TrainingObjects/FreeSurfer/'.format(dataDir) trainExt = '.{}.TrainingObject.aparc.a2009s.h5'.format(H) midDir = '{}Midlines/'.format(dataDir) midExt = '.{}.Midline_Indices.mat'.format(H) data = {} for s in subjectList: # Training data trainObject = '{}{}{}'.format(trainDir, s, trainExt) midObject = '{}{}{}'.format(midDir, s, midExt) # Check to make sure all 3 files exist if os.path.isfile(trainObject) and os.path.isfile(midObject): # Load midline indices # Subtract 1 for differece between Matlab and Python indexing mids = ld.loadMat(midObject) - 1 mids = set(mids) # Load training data and training labels trainH5 = h5py.File(trainObject, mode='r') # Get data corresponding to features of interest subjData = ld.parseH5(trainH5, features) trainH5.close() nSamples = set(np.arange(subjData[s][features[0]].shape[0])) coords = np.asarray(list(nSamples.difference(mids))) for f in subjData[s].keys(): tempData = subjData[s][f] if tempData.ndim == 1: tempData.shape += (1, ) subjData[s][f] = np.squeeze(tempData[coords, :]) data[s] = subjData[s] return data
with open(args.subjectList, 'r') as inSubj: subjects = inSubj.readlines() subjects = [x.strip() for x in subjects] assert len(topoDir) == 2 assert len(topoExt) == 2 for d in topoDir: assert os.path.exists(d) df = pd.DataFrame(columns=['tpd']) tpd = [] for s in subjects: v1 = ''.join([topoDir[0], s, topoExt[0]]) v2 = ''.join([topoDir[1], s, topoExt[1]]) if os.path.exists(v1) and os.path.exists(v2): v1 = ld.loadMat(v1) v1 = tm.tpdVector(v1) v2 = ld.loadMat(v2) v2 = tm.tpdVector(v2) metric = tm.tpd(v1, v2) tpd.append(metric) df['tpd'] = tpd df.to_csv(outputName)
def regionalizeStructures(timeSeries, levelStructures, midlines, level, R, measure='median'): """ Method to regionalize the resting state connectivity, using only vertices included at a minimum level away from the border vertices. Parameters: - - - - - timeSeries : input resting state file levelStrucutres : levelStructures created by computeLabelLayers ".RegionalLayers.p" file level : depth to constaint layers at midlines : path to midline indices measure : measure to apply to correlation values ['mean','median'] """ assert measure in ['median', 'mean'] assert level >= 1 resting = ld.loadMat(timeSeries) midlines = ld.loadMat(midlines) levelSets = ld.loadPick(levelStructures) resting[midlines, :] = 0 condensedLevels = layerCondensation(levelSets, level) regionalized = np.zeros((resting.shape[0], R)) # get the central vertices for region_id for region_id in condensedLevels.keys(): print(region_id) subregion = condensedLevels[region_id] subregion = list(set(subregion).difference(set(midlines))) print('# subvertices: {}'.format(len(subregion))) # if subregion has at least 1 vertex if len(subregion): subrest = resting[subregion, :] if np.ndim(subrest) == 1: subrest.shape += (1, ) if subrest.shape[1] != resting.shape[1]: subrest = subrest.T correlated = metrics.pairwise.pairwise_distances( resting, subrest, metric='correlation') if measure == 'median': regionalized[:, region_id - 1] = np.median(1 - correlated, axis=1) else: regionalized[:, region_id - 1] = np.mean(1 - correlated, axis=1) regionalized[midlines, :] = 0 return regionalized
def addToLibraries(self, train, trainML, match): """ Updates the testing subject libraries with the results of a specific matching. Parameters: - - - - - train : train subject ID trainML : train subject MatchingLibraryTrain match : test-to-train matching file """ if not self.vertLib: r = np.arange(0, self.N) # VertexLibrary # Contains the unique labels that a given vertex maps to, and the # number of times the vertex maps to this label self.vertLib = {} self.vertLib = self.vertLib.fromkeys(list(r)) # load SubjectFeatures training data object train = ld.loadPick(trainML) print train.__dict__.keys() # load test to train matching match = np.asarray(np.squeeze(ld.loadMat(match) - 1).astype(int)) gCoords = np.asarray( list(set(np.arange(train.N)).difference(set(train.mids)))) cCoords = np.asarray( list(set(np.arange(self.N)).difference(set(self.mids)))) fixed = np.squeeze(np.zeros((self.N, 1))) fixed[cCoords] = gCoords[match] fixed = fixed.astype(np.int32) # fixed matching # fixed = ld.fixMatching(match,self.N,self.mids,train.N,train.mids) # fixed = fixed.astype(int) # make sure matching is same length as source label if len(fixed) == self.N: # for each vertex in source brain for node in np.arange(0, len(fixed)): # get matched vertex in target brain vertex = fixed[node] # make sure target coordinate is not in midline if vertex != -1 and self.label[node] > 0: # get target label tL = train.label[vertex] # update labelLibrary if not self.vertLib[node]: self.vertLib[node] = {tL: 1} else: if tL not in self.vertLib[node].keys(): self.vertLib[node][tL] = 1 else: self.vertLib[node][tL] += 1
def buildLibraries(self, source, source_label, source_mids, source_surf, matching): """ Updates the vertex and label libraries with matching results. Parameters: - - - - - source : source subject ID source_label : source label file matching : source-to-target matching file produed by DiffeoSpectralMatching (corr12 or corr21) """ # vert vertices of source surface verts = ld.loadGii(source_surf, 0) # get number of vertices in source surface sN = verts.shape[0] # load source label and midline vertices sLab = ld.loadGii(source_label, 0) sMids = ld.loadMat(source_mids) - 1 # check to make sure label file has same number of vertices as # surface if len(sLab) != sN: sLab = ld.fixLabelSize(sMids, sLab, sN) # load source-to-target matching s2t = np.squeeze(ld.loadMat(matching) - 1).astype(int) # fix matching # target vertices in target space, source vertices correct length # s2t is of length (sN-mids), with indices in that range # will become length sN with indices in correct range fixed = ld.fixMatching(s2t, sN, sMids, self.N, self.mids).astype(int) # check if source subject already added if source not in self.matchedSubjects: # check to make sure matching / source label file same length if len(fixed) == len(sLab): # add matched subject to list of seen self.matchedSubjects.add(source) for node in np.arange(len(fixed)): # get target vertex to which source vertex is mapped vertex = fixed[node] # get label of source vertex sl = sLab[node] # if target vertex and source label not midline if vertex != -1 and sl > 0: # get target vertex label tl = self.label[vertex] # check if current vertex already in vertexLibrary.keys if not self.vertCounts[vertex]: self.vertCounts[vertex] = {sl: 1} else: if sl not in self.vertCounts[vertex].keys(): self.vertCounts[vertex].update({sl: 1}) else: self.vertCounts[vertex][sl] += 1 if not self.labCounts[tl]: self.labCounts[tl] = {sl: 1} else: if sl not in self.labCounts[tl].keys(): self.labCounts[tl].update({sl: 1}) else: self.labCounts[tl][sl] += 1 else: print('Warning: Matching not the same length as '\ 'source label file.') else: print('Source has already been included in the libraries.')