def computeLabelLayers(labelFile, surfaceAdjacency, borderFile): """ Method to find level structures of vertices, where each structure is a set of vertices that are a distance k away from the border vertices. """ label = ld.loadGii(labelFile, 0) surfAdj = ld.loadPick(surfaceAdjacency) borders = ld.loadPick(borderFile) # get set of non-zero labels in label file L = set(label) - set([0]) layers = {}.fromkeys(L) """ fullList = Parallel(n_jobs=NUM_CORES)(delayed(labelLayers)(lab, np.where(label == lab)[0], surfAdj,borders[lab]) for lab in L) """ for i, labelValue in enumerate(L): if borders.has_key(labelValue): inds = np.where(label == labelValue)[0] bm = borders[labelValue] layers[labelValue] = labelLayers(labelValue, inds, surfAdj, bm) return layers
def predict(self, y, yMatch, features): """ Method to compute Mahalanobis distance of test data from the distribution of all training data for each label. Parameters: - - - - - y : SubjectFeatures object for a test brain yMatch : MatchingFeaturesTest object containing vertLib attribute detailing which labels each vertex in surface y maps to in the training data feats : names of features to include in the distance calculations """ # load SubjectFeatures object testObject = ld.loadPick(y) # load MatchingLibraryTest testMatch = ld.loadPick(yMatch) lMaps = testMatch.vertLib simplified = {n: lMaps[n].keys() for n in lMaps.keys()} # Merge the feature data for the test data into single array and # compute which vertices map to which labels mergedData = cu.mergeFeatures(testObject.data, features) # this is time consuming -- don't recompute every time if wanting # to run prediction with multiple feature sets if not hasattr(self, '_labelVerts'): labelVerts = cu.vertexMemberships(simplified, self._labels) self._labelVerts = labelVerts else: labelVerts = self._labelVerts # initialize Mahalanobis prediction vector predict = {} predict = predict.fromkeys(testMatch.vertLib.keys()) # for all labels in in training set for lab in self._labels: # compute vertices that map to that label members = labelVerts[lab] if len(members) > 0: # compute Mahalanobis distane of vertex feature to label feaatures scores = self._predictPoint(mergedData, lab, members) # save results in self.predict predict = cu.updateScores(predict, lab, members, scores) self._predict = predict
def mappingConfusionMatrix(merged): """ Method to build a confusion matrix from the merged library data. Note that this is not a symmetric matrix. Rather, we simply display the mapping frequencies in an array, where each row coresponds to a target label, and each index in a row corresponds to the frequency with which a source label maps to the target. Parameters: - - - - - merged : merged MatchingLibrary file Returns: - - - - confusion : array of size N labels by N labels """ if isinstance(merged, str): merged = ld.loadPick(merged) elif isinstance(merged, dict): merged = merged labels = merged.keys() N = len(labels) mappings = dict(zip(labels, np.arange(len(labels)))) confusion = np.zeros((N, N)) for lab in labels: if merged[lab]: c1 = mappings[lab] for maps in merged[lab].keys(): c2 = mappings[maps] confusion[c1][c2] = merged[lab][maps] return confusion
def mappingFrequency(merged): """ Convert matching library counts to frequencies. We will use this for thresholding during the classification step. Parameters: - - - - - merged : merged MatchingLibrary file Returns: - - - - merged : merged MatchingLibrary object with normalized counts """ mergedC = copy.deepcopy(merged) if isinstance(mergedC, str): mergedC = ld.loadPick(mergedC) elif isinstance(mergedC, dict): mergedC = mergedC for lab in mergedC.keys(): if mergedC[lab]: maps = mergedC[lab].keys() total = 1. * np.sum(mergedC[lab].values()) for m in maps: mergedC[lab][m] /= (1. * total) return mergedC
def loadTest(self, y, yMatch): """ Method to load the test data into the object. We might be interested in loading new test data, so we have explicitly defined this is as a method. Parameters: - - - - - y : SubjectFeatures object for a test brain yMatch : MatchingFeaturesTest object containing vertLib attribute detailing which labels each vertex in surface y maps to in the training data """ load = self.load save = self.save features = self.features # load test subject data, save as attribtues tObject = ld.loadH5(y, *['full']) ID = tObject.attrs['ID'] parsedData = ld.parseH5(tObject, features) tObject.close() data = parsedData[ID] mtd = cu.mergeFeatures(data, features) print 'Testing shape: {}'.format(mtd.shape) if self.scaled: scaler = self.scaler mtd = scaler.transform(mtd) threshed = ld.loadMat(yMatch) # Computing label-vertex memberships is time consuming # If already precomputed for given test data at specified threshold, # can supply path to load file. if load: if os.path.isfile(load): ltvm = ld.loadPick(load) # Otherwise, compute label-vertex memberships. else: ltvm = cu.vertexMemberships(threshed, 180) self.ltvm = ltvm # if save is provided, save label-vertex memberships to file if save: try: with open(save, "wb") as outFile: pickle.dump(self.labelToVertexMaps, outFile, -1) except IOError: print('Cannot save label-vertex memberships to file.') return [threshed, mtd, ltvm]
def predict(self, y, yMatch): """ Method to predict the labels based on frequency with which test vertex maps to training label. Parameters: - - - - - y : SubjectFeatures object for a test brain yMatch : MatchingFeaturesTest object containing vertLib attribute detailing which labels each vertex in surface y maps to in the training data """ y = ld.loadPick(y) yMatch = ld.loadPick(yMatch) self._predict = cu.maximumLiklihood(y, yMatch)
def mergeMappings(subjects, inputDir, exten, normalize=False): """ Method to merge MatchingLibraryTrain objects. For a given object, we have vertCounts and labCounts -- here we merge the labCounts attributes to return a dictionary that has the aggregate maps. The keys are labels, and the values are lists of labels that each key label maps to. Parameters: - - - - - subjects : list of training subjects to include in the merged object inputDir : input directory where the MatchingLibraryTrain objects exist exten : exten of the MatchingLibraryTrain objects Returns: - - - - merged : dictionary containing the aggregated labCounts_ results for each included MatchingLibraryTrain object. We do not keep track of the counts. """ cond = True merged = {} if not os.path.isdir(inputDir): cond = False print('Input directory does not exist.') if cond: for s in subjects: inMTL = inputDir + s + exten if not os.path.isfile(inMTL): print('MatchingLibrary for {} does not exist. Skipping.'. format(s)) else: mtl = ld.loadPick(inMTL) merged = addSingleLibrary(merged, mtl) if normalize: merged = mappingFrequency(merged) return merged
def __init__(self, trainObj, feats): if isinstance(trainObj, str): self._trainData = ld.loadPick(trainObj) elif isinstance(trainObj, dict): self._trainData = trainObj if not self._trainData: raise ValueError('Training data cannot be empty.') if not feats: raise ValueError('Feature list cannot be empty.') else: self._features = feats self._labels = set(cu.getLabels(self._trainData)) - set([0, -1]) self._labelData = cu.partitionData(self._trainData, feats=feats) self._mu = self._computeMeans()
def neighborhoodErrorMap(labVal, labelAdjacency, truthLabFile, predLabFile, labelLookup, outputColorMap): """ Method to visualize the results of a prediction map, focusing in on a spefic core label. Parameters: - - - - - core : region of interest labelAdjacency : label adjacency list truthLabFile : ground truth label file predLabFile : predicted label file labelLookup : label color lookup table outputColorMap : new color map for label files """ # load files labAdj = ld.loadPick(labelAdjacency) truth = ld.loadGii(truthLabFile, 0) pred = ld.loadGii(predLabFile, 0) # extract current colors from colormap parsedColors = parseColorLookUpFile(labelLookup) # initialize new color map file color_file = open(outputColorMap, "w") trueColors = ' '.join(map(str, [255, 255, 255])) trueName = 'Label {}'.format(labVal) trueRGBA = '{} {} {}\n'.format(labVal, trueColors, 255) trueStr = '\n'.join([trueName, trueRGBA]) color_file.writelines(trueStr) # get labels that neighbor core neighbors = labAdj[labVal] # get indices of core label in true map truthInds = np.where(truth == labVal)[0] # initialize new map visualizeMap = np.zeros((truth.shape)) visualizeMap[truthInds] = labVal # get predicted label values existing at truthInds predLabelsTruth = pred[truthInds] for n in neighbors: # get color code for label, adjust and write text to file oriName = 'Label {}'.format(n) oriCode = parsedColors[n] oriColors = ' '.join(map(str, oriCode)) oriRGBA = '{} {} {}\n'.format(n, oriColors, 255) oriStr = '\n'.join([oriName, oriRGBA]) color_file.writelines(oriStr) adjLabel = n + 180 adjName = 'Label {}'.format(adjLabel) adjColors = shiftColor(oriCode, mag=30) adjColors = ' '.join(map(str, adjColors)) adjRGBA = '{} {} {}\n'.format(adjLabel, adjColors, 255) adjStr = '\n'.join([adjName, adjRGBA]) color_file.writelines(adjStr) # find where true map == n and set this value n_inds = np.where(truth == n)[0] visualizeMap[n_inds] = n # find where prediction(core) == n, and set to adjusted value n_inds = np.where(predLabelsTruth == n)[0] visualizeMap[truthInds[n_inds]] = adjLabel color_file.close() return visualizeMap
def regionalizeStructures(timeSeries, levelStructures, midlines, level, R, measure='median'): """ Method to regionalize the resting state connectivity, using only vertices included at a minimum level away from the border vertices. Parameters: - - - - - timeSeries : input resting state file levelStrucutres : levelStructures created by computeLabelLayers ".RegionalLayers.p" file level : depth to constaint layers at midlines : path to midline indices measure : measure to apply to correlation values ['mean','median'] """ assert measure in ['median', 'mean'] assert level >= 1 resting = ld.loadMat(timeSeries) midlines = ld.loadMat(midlines) levelSets = ld.loadPick(levelStructures) resting[midlines, :] = 0 condensedLevels = layerCondensation(levelSets, level) regionalized = np.zeros((resting.shape[0], R)) # get the central vertices for region_id for region_id in condensedLevels.keys(): print(region_id) subregion = condensedLevels[region_id] subregion = list(set(subregion).difference(set(midlines))) print('# subvertices: {}'.format(len(subregion))) # if subregion has at least 1 vertex if len(subregion): subrest = resting[subregion, :] if np.ndim(subrest) == 1: subrest.shape += (1, ) if subrest.shape[1] != resting.shape[1]: subrest = subrest.T correlated = metrics.pairwise.pairwise_distances( resting, subrest, metric='correlation') if measure == 'median': regionalized[:, region_id - 1] = np.median(1 - correlated, axis=1) else: regionalized[:, region_id - 1] = np.mean(1 - correlated, axis=1) regionalized[midlines, :] = 0 return regionalized
def addToLibraries(self, train, trainML, match): """ Updates the testing subject libraries with the results of a specific matching. Parameters: - - - - - train : train subject ID trainML : train subject MatchingLibraryTrain match : test-to-train matching file """ if not self.vertLib: r = np.arange(0, self.N) # VertexLibrary # Contains the unique labels that a given vertex maps to, and the # number of times the vertex maps to this label self.vertLib = {} self.vertLib = self.vertLib.fromkeys(list(r)) # load SubjectFeatures training data object train = ld.loadPick(trainML) print train.__dict__.keys() # load test to train matching match = np.asarray(np.squeeze(ld.loadMat(match) - 1).astype(int)) gCoords = np.asarray( list(set(np.arange(train.N)).difference(set(train.mids)))) cCoords = np.asarray( list(set(np.arange(self.N)).difference(set(self.mids)))) fixed = np.squeeze(np.zeros((self.N, 1))) fixed[cCoords] = gCoords[match] fixed = fixed.astype(np.int32) # fixed matching # fixed = ld.fixMatching(match,self.N,self.mids,train.N,train.mids) # fixed = fixed.astype(int) # make sure matching is same length as source label if len(fixed) == self.N: # for each vertex in source brain for node in np.arange(0, len(fixed)): # get matched vertex in target brain vertex = fixed[node] # make sure target coordinate is not in midline if vertex != -1 and self.label[node] > 0: # get target label tL = train.label[vertex] # update labelLibrary if not self.vertLib[node]: self.vertLib[node] = {tL: 1} else: if tL not in self.vertLib[node].keys(): self.vertLib[node][tL] = 1 else: self.vertLib[node][tL] += 1