def __init__(self, target, target_label, target_mids, target_surface):

        # Load input data, and initialie data attribtues
        self.ID = target

        # we load the surface as a source of control
        # we need to determine if our label file is the same size as the
        # surface or if it needs to be adjusted
        verts = ld.loadGii(target_surface, 0)

        # get correct number of vertices
        self.N = verts.shape[0]
        self.label = ld.loadGii(target_label, 0)

        self.mids = ld.loadMat(target_mids) - 1

        if self.N != len(self.label):

            print('Warning:  Target surface has more vertices that '\
                            'label file.  Adjusting label file.')

            self.label = ld.fixLabelSize(self.mids, self.label, self.N)

        # Initialize library attribtues
        # vertCounts contains the labels a vertex-of-interest maps to
        self.vertCounts = {}
        self.vertCounts = self.vertCounts.fromkeys(list(np.arange(self.N)))

        # labCounts contains the labels a label-of-interest maps to
        self.labCounts = {}
        self.labCounts = self.labCounts.fromkeys(list(self.label))

        self.matchedSubjects = set([])
    def loadTest(self, y, yMatch):
        """
        Method to load the test data into the object.  We might be interested
        in loading new test data, so we have explicitly defined this is
        as a method.
        
        Parameters:
        - - - - -
            y : SubjectFeatures object for a test brain      
            
            yMatch : MatchingFeaturesTest object containing vertLib attribute 
                    detailing which labels each vertex in surface y maps to 
                    in the training data

        """

        load = self.load
        save = self.save

        features = self.features

        # load test subject data, save as attribtues
        tObject = ld.loadH5(y, *['full'])
        ID = tObject.attrs['ID']

        parsedData = ld.parseH5(tObject, features)
        tObject.close()

        data = parsedData[ID]
        mtd = cu.mergeFeatures(data, features)
        print 'Testing shape: {}'.format(mtd.shape)

        if self.scaled:
            scaler = self.scaler
            mtd = scaler.transform(mtd)

        threshed = ld.loadMat(yMatch)

        # Computing label-vertex memberships is time consuming
        # If already precomputed for given test data at specified threshold,
        # can supply path to load file.
        if load:
            if os.path.isfile(load):
                ltvm = ld.loadPick(load)
        # Otherwise, compute label-vertex memberships.
        else:
            ltvm = cu.vertexMemberships(threshed, 180)

        self.ltvm = ltvm

        # if save is provided, save label-vertex memberships to file
        if save:
            try:
                with open(save, "wb") as outFile:
                    pickle.dump(self.labelToVertexMaps, outFile, -1)
            except IOError:
                print('Cannot save label-vertex memberships to file.')

        return [threshed, mtd, ltvm]
    def predict(self, y, yMatch, yMids):
        """
        Method to compute Mahalanobis distance of test data from the
        distribution of all training data for each label.
        
        Parameters:
        - - - - - 
        
        **kwargs : if power parameter is defined in kwargs, will perform
                    base classification and weighted classification of the
                    surface vertices
        """

        R = 180
        labels = self.labels
        # load the testing datamap
        [mm, mtd, ltvm] = self.loadTest(y, yMatch)

        # Python is 0-indexed, while Matlab is not
        # We adjust the Matlab coordinates by subtracting 1
        midline = ld.loadMat(yMids) - 1

        mm[midline, :] = 0
        mtd[midline, :] = 0

        xTest, yTest = mtd.shape
        if yTest != self.input_dim:
            raise Warning('Test data does not have the same number \
                          features as the training data.')

        # initialize prediction dictlionary
        baseline = np.zeros((mtd.shape[0], R + 1))

        # for all labels in in training set
        for lab in labels:
            # compute vertices that map to that label
            members = ltvm[lab]
            memberData = mtd[members, :]
            estimator = self.mixtures[lab]

            if len(members) > 0:

                scores = estimator.score_samples(memberData)

                # save results in self.predict
                baseline[members, lab] = scores

        predicted = np.argmax(baseline, axis=1)

        self.baseline = baseline
        self.predicted = predicted
        self._classified = True
        """
Exemple #4
0
def processLabelResizing(subjectList, dataDir, hemi):
    """
    
    """

    with open(subjectList, 'r') as inFile:
        subjects = inFile.readlines()
    subjects = [x.strip() for x in subjects]

    hcpDir = dataDir + 'Haynor/Connectome_4_With_MMP/Labels/'
    midDir = dataDir + 'parcellearning/Data/Midlines/'
    funDir = dataDir + 'HCP/Connectome_4/'
    outDir = dataDir + 'parcellearning/Data/Labels/HCP/'

    lExt = '.' + hemi + '.CorticalAreas_dil_NoTask_Final_Individual.32k_fs_LR.dlabel.nii'
    fExt = '.' + hemi + '.MyelinMap.32k_fs_LR.func.gii'
    mExt = '.' + hemi + '.Midline_Indices.mat'

    N = 32492

    inCMAP = dataDir + 'parcellearning/Data/Labels/' + 'Label_Lookup_300.txt'

    for s in subjects:

        inLabel = hcpDir + s + lExt
        inMid = midDir + s + mExt
        inFunc = funDir + s + '/Surface/MNI/' + s + fExt

        if os.path.isfile(inLabel) and os.path.isfile(
                inMid) and os.path.isfile(inFunc):

            outFunc = outDir + s + '.' + hemi + '.CorticalAreas.fixed.32k_fs_LR.func.gii'
            outLabel = outDir + s + '.' + hemi + '.CorticalAreas.fixed.32k_fs_LR.label.gii'

            label = ld.loadGii(inLabel)
            mids = ld.loadMat(inMid)

            fixed = ld.fixLabelSize(mids, label, N)

            M = nb.load(inFunc)
            M.darrays[0].data = np.asarray(fixed.astype(np.float32))

            nb.save(M, outFunc)

            cmd = '/usr/bin/wb_command -metric-label-import {} {} {}'.format(
                outFunc, inCMAP, outLabel)

            os.system(cmd)
    def __init__(self, source, source_label, source_mids, source_surface):

        self.ID = source

        verts = ld.loadGii(source_surface, 0)

        self.N = verts.shape[0]
        self.label = ld.loadGii(source_label, 0)

        self.mids = ld.loadMat(source_mids) - 1

        if self.N != len(self.label):

            print('Warning:  Surface has more vertices that '\
                            'label file.  Adjusting label file.')

            self.label = ld.fixLabelSize(self.mids, self.label, self.N)

        self.vertLib = {}
Exemple #6
0
    def loadTest(self, y, yMatch):
        """
        Method to load the test data into the object.  We might be interested
        in loading new test data, so we have explicitly defined this is
        as a method.
        
        Parameters:
        - - - - -
            y : SubjectFeatures object for a test brain      
            
            yMatch : MatchingFeaturesTest object containing vertLib attribute 
                    detailing which labels each vertex in surface y maps to 
                    in the training data

        """

        # load test subject data, save as attribtues
        tObject = ld.loadH5(y, *['full'])
        ID = tObject.attrs['ID']

        parsedData = ld.parseH5(tObject, self.features)
        tObject.close()

        data = parsedData[ID]
        mtd = cu.mergeFeatures(data, self.features)
        print 'Testing shape: {}'.format(mtd.shape)

        if self.scaled:
            scaler = self.scaler
            mtd = scaler.transform(mtd)

        threshed = ld.loadMat(yMatch)

        ltvm = cu.vertexMemberships(threshed, 180)

        return [threshed, mtd, ltvm]
def loadData(subjectList, dataMap, features, hemi):
    """
    Generates the training data from a list of subjects.
    
    Parameters:
    - - - - -
        subjectList : list of subjects to include in training set
        dataDir : main directory where data exists -- individual features
                    will exist in sub-directories here
        features : list of features to include
        hemi : hemisphere to process
    """

    objDict = dataMap['object'].items()
    objDir = objDict[0][0]
    objExt = objDict[0][1]

    midDict = dataMap['midline'].items()
    midDir = midDict[0][0]
    midExt = midDict[0][1]

    matDict = dataMap['matching'].items()
    matDir = matDict[0][0]
    matExt = matDict[0][1]

    data = {}
    matches = {}

    for s in subjectList:

        # Training data
        trainObject = '{}{}.{}.{}'.format(objDir, s, hemi, objExt)
        print trainObject
        midObject = '{}{}.{}.{}'.format(midDir, s, hemi, midExt)
        matObject = '{}{}.{}.{}'.format(matDir, s, hemi, matExt)

        # Check to make sure all 3 files exist
        if os.path.isfile(trainObject) and os.path.isfile(
                midObject) and os.path.isfile(matObject):

            # Load midline indices
            # Subtract 1 for differece between Matlab and Python indexing
            mids = ld.loadMat(midObject) - 1
            mids = set(mids)

            match = ld.loadMat(matObject)

            # Load training data and training labels
            trainH5 = h5py.File(trainObject, mode='r')

            # Get data corresponding to features of interest
            subjData = ld.parseH5(trainH5, features)
            trainH5.close()

            nSamples = set(np.arange(subjData[s][features[0]].shape[0]))
            coords = np.asarray(list(nSamples.difference(mids)))

            for f in subjData[s].keys():
                tempData = subjData[s][f]
                if tempData.ndim == 1:
                    tempData.shape += (1, )

                subjData[s][f] = np.squeeze(tempData[coords, :])

            match = match[coords, :]

            data[s] = subjData[s]
            matches[s] = match

    return [data, matches]
Exemple #8
0
def loadDataFromList(subjectList, dataDir, features, hemi):
    """
    Generates the training data for the neural network.
    
    Parameters:
    - - - - -
        subjectList : list of subjects to include in training set
        dataDir : main directory where data exists -- individual features
                    will exist in sub-directories here
        features : list of features to include
        hemi : hemisphere to process
    """

    hemisphere = {}.fromkeys('Left', 'Right')
    hemisphere['Left'] = 'L'
    hemisphere['Right'] = 'R'

    H = hemisphere[hemi]

    # For now, we hardcode where the data is
    trainDir = '{}TrainingObjects/FreeSurfer/'.format(dataDir)
    trainExt = '.{}.TrainingObject.aparc.a2009s.h5'.format(H)

    midDir = '{}Midlines/'.format(dataDir)
    midExt = '.{}.Midline_Indices.mat'.format(H)

    data = {}

    for s in subjectList:

        # Training data
        trainObject = '{}{}{}'.format(trainDir, s, trainExt)
        midObject = '{}{}{}'.format(midDir, s, midExt)

        # Check to make sure all 3 files exist
        if os.path.isfile(trainObject) and os.path.isfile(midObject):

            # Load midline indices
            # Subtract 1 for differece between Matlab and Python indexing
            mids = ld.loadMat(midObject) - 1
            mids = set(mids)

            # Load training data and training labels
            trainH5 = h5py.File(trainObject, mode='r')

            # Get data corresponding to features of interest
            subjData = ld.parseH5(trainH5, features)
            trainH5.close()

            nSamples = set(np.arange(subjData[s][features[0]].shape[0]))
            coords = np.asarray(list(nSamples.difference(mids)))

            for f in subjData[s].keys():
                tempData = subjData[s][f]
                if tempData.ndim == 1:
                    tempData.shape += (1, )

                subjData[s][f] = np.squeeze(tempData[coords, :])

            data[s] = subjData[s]

    return data
Exemple #9
0
with open(args.subjectList, 'r') as inSubj:
    subjects = inSubj.readlines()
subjects = [x.strip() for x in subjects]

assert len(topoDir) == 2
assert len(topoExt) == 2

for d in topoDir:
    assert os.path.exists(d)

df = pd.DataFrame(columns=['tpd'])
tpd = []

for s in subjects:

    v1 = ''.join([topoDir[0], s, topoExt[0]])
    v2 = ''.join([topoDir[1], s, topoExt[1]])

    if os.path.exists(v1) and os.path.exists(v2):

        v1 = ld.loadMat(v1)
        v1 = tm.tpdVector(v1)

        v2 = ld.loadMat(v2)
        v2 = tm.tpdVector(v2)

        metric = tm.tpd(v1, v2)
        tpd.append(metric)

df['tpd'] = tpd
df.to_csv(outputName)
Exemple #10
0
def regionalizeStructures(timeSeries,
                          levelStructures,
                          midlines,
                          level,
                          R,
                          measure='median'):
    """
    Method to regionalize the resting state connectivity, using only vertices
    included at a minimum level away from the border vertices.
    
    Parameters:
    - - - - -
        timeSeries : input resting state file
        levelStrucutres : levelStructures created by computeLabelLayers
                            ".RegionalLayers.p" file
        level : depth to constaint layers at
        midlines : path to midline indices
        measure : measure to apply to correlation values ['mean','median']
    """

    assert measure in ['median', 'mean']
    assert level >= 1

    resting = ld.loadMat(timeSeries)
    midlines = ld.loadMat(midlines)
    levelSets = ld.loadPick(levelStructures)

    resting[midlines, :] = 0

    condensedLevels = layerCondensation(levelSets, level)

    regionalized = np.zeros((resting.shape[0], R))

    # get the central vertices for region_id
    for region_id in condensedLevels.keys():

        print(region_id)

        subregion = condensedLevels[region_id]
        subregion = list(set(subregion).difference(set(midlines)))
        print('# subvertices: {}'.format(len(subregion)))

        # if subregion has at least 1 vertex
        if len(subregion):

            subrest = resting[subregion, :]

            if np.ndim(subrest) == 1:
                subrest.shape += (1, )

            if subrest.shape[1] != resting.shape[1]:
                subrest = subrest.T

            correlated = metrics.pairwise.pairwise_distances(
                resting, subrest, metric='correlation')

            if measure == 'median':
                regionalized[:, region_id - 1] = np.median(1 - correlated,
                                                           axis=1)
            else:
                regionalized[:, region_id - 1] = np.mean(1 - correlated,
                                                         axis=1)

    regionalized[midlines, :] = 0

    return regionalized
    def addToLibraries(self, train, trainML, match):
        """
        Updates the testing subject libraries with the results of a specific
        matching.
        
        Parameters:
        - - - - -
            train : train subject ID
            trainML : train subject MatchingLibraryTrain
            match : test-to-train matching file
        """

        if not self.vertLib:

            r = np.arange(0, self.N)

            # VertexLibrary
            # Contains the unique labels that a given vertex maps to, and the
            # number of times the vertex maps to this label

            self.vertLib = {}
            self.vertLib = self.vertLib.fromkeys(list(r))

        # load SubjectFeatures training data object
        train = ld.loadPick(trainML)
        print train.__dict__.keys()

        # load test to train matching
        match = np.asarray(np.squeeze(ld.loadMat(match) - 1).astype(int))

        gCoords = np.asarray(
            list(set(np.arange(train.N)).difference(set(train.mids))))
        cCoords = np.asarray(
            list(set(np.arange(self.N)).difference(set(self.mids))))

        fixed = np.squeeze(np.zeros((self.N, 1)))
        fixed[cCoords] = gCoords[match]
        fixed = fixed.astype(np.int32)

        # fixed matching
        # fixed = ld.fixMatching(match,self.N,self.mids,train.N,train.mids)
        # fixed = fixed.astype(int)

        # make sure matching is same length as source label
        if len(fixed) == self.N:
            # for each vertex in source brain
            for node in np.arange(0, len(fixed)):
                # get matched vertex in target brain
                vertex = fixed[node]

                # make sure target coordinate is not in midline
                if vertex != -1 and self.label[node] > 0:
                    # get target label
                    tL = train.label[vertex]

                    # update labelLibrary
                    if not self.vertLib[node]:
                        self.vertLib[node] = {tL: 1}
                    else:
                        if tL not in self.vertLib[node].keys():
                            self.vertLib[node][tL] = 1
                        else:
                            self.vertLib[node][tL] += 1
    def buildLibraries(self, source, source_label, source_mids, source_surf,
                       matching):
        """
        Updates the vertex and label libraries with matching results.
        
        Parameters:
        - - - - -
            
            source : source subject ID
            
            source_label : source label file
            
            matching : source-to-target matching file produed by 
                        DiffeoSpectralMatching (corr12 or corr21)
        """

        # vert vertices of source surface
        verts = ld.loadGii(source_surf, 0)

        # get number of vertices in source surface
        sN = verts.shape[0]

        # load source label and midline vertices
        sLab = ld.loadGii(source_label, 0)
        sMids = ld.loadMat(source_mids) - 1

        # check to make sure label file has same number of vertices as
        # surface
        if len(sLab) != sN:

            sLab = ld.fixLabelSize(sMids, sLab, sN)

        # load source-to-target matching
        s2t = np.squeeze(ld.loadMat(matching) - 1).astype(int)

        # fix matching
        # target vertices in target space, source vertices correct length
        # s2t is of length (sN-mids), with indices in that range
        # will become length sN with indices in correct range
        fixed = ld.fixMatching(s2t, sN, sMids, self.N, self.mids).astype(int)

        # check if source subject already added
        if source not in self.matchedSubjects:

            # check to make sure matching / source label file same length
            if len(fixed) == len(sLab):

                # add matched subject to list of seen
                self.matchedSubjects.add(source)

                for node in np.arange(len(fixed)):

                    # get target vertex to which source vertex is mapped
                    vertex = fixed[node]
                    # get label of source vertex
                    sl = sLab[node]

                    # if target vertex and source label not midline
                    if vertex != -1 and sl > 0:

                        # get target vertex label
                        tl = self.label[vertex]

                        # check if current vertex already in vertexLibrary.keys
                        if not self.vertCounts[vertex]:
                            self.vertCounts[vertex] = {sl: 1}
                        else:
                            if sl not in self.vertCounts[vertex].keys():
                                self.vertCounts[vertex].update({sl: 1})
                            else:
                                self.vertCounts[vertex][sl] += 1

                        if not self.labCounts[tl]:
                            self.labCounts[tl] = {sl: 1}
                        else:
                            if sl not in self.labCounts[tl].keys():
                                self.labCounts[tl].update({sl: 1})
                            else:
                                self.labCounts[tl][sl] += 1
            else:
                print('Warning:  Matching not the same length as '\
                                'source label file.')
        else:
            print('Source has already been included in the libraries.')