Ejemplo n.º 1
0
    def getSubjects(self):
        '''
        Method to get the subjects that are listed in the subject paths.

        Now that we can store multiple masks inside each subject it is no
        longer necessary to check for the masks
        '''
        problemString = 'These were the subjects that caused problems:'
        problemList = []
        run = 0

        for subjectPath in self.subjectPaths:
            # open the file
            tempSubFile = gzip.open(subjectPath, 'rb')
            tempSubject = cPickle.load(tempSubFile)
            tempSubName = tempSubject.name

            # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP #
            # this is a temporary solution to change the type of the pheno #
            # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP #
            for pheno in tempSubject.pheno.keys():
                tempPheno = tempSubject.pheno[pheno]
                if mm.isNumber(tempPheno):
                    tempPheno = float(tempPheno)

                tempSubject.pheno[pheno] = tempPheno
            # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP #
            # this was a temporary solution to change the type of the pheno #
            # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP #

            # loop through the masks that are stored in the subjects and
            # get them
            for tempSubMask in tempSubject.masks.values():
                tempMaskName = tempSubMask.name
                # make a copy of the subject so I can delete what I don't like
                # separately for each mask
                maskSubject = copy.deepcopy(tempSubject)
                # check if the mask name already exists in the saved masks
                if tempMaskName in self.masks.keys():
                    # it is already in, compare it to the saved mask
                    if not self.masks[tempMaskName].sameSame(tempSubMask):
                        # this mask is different from the one we already saved
                        # and this is a problem - alert and skip the subject
                        # (not optimal, I know - its an order thing...
                        # fix later)
                        print('The mask: ' + tempMaskName + ' of subject '
                              + tempSubName + ' is different from the saved'
                              + ' mask in our repository')

                        if not tempSubName in problemList:
                            problemList.append(tempSubName)
                            problemString = (problemString
                                         + '\n' + tempSubName)
                        continue

                else:
                    # the mask is not saved yet, so do this now
                    self.masks[tempMaskName] = tempSubMask

                # now we can continue processing the subjects
                # check if there is already a list of subjects for this mask
                if not tempMaskName in self.maskedSubjects.keys():
                    # create the entry and make it a dictionary
                    self.maskedSubjects[tempMaskName] = {}

                # get rid of all the masks before adding the subject to the
                # correct mask key name
                maskSubject.masks = None
                self.maskedSubjects[tempMaskName][tempSubName] = maskSubject

            # done with the subject, print a quick notice
            run += 1
            sys.stdout.write('\rDone loading subject ' + tempSubName
                             + ' : ' + str(run) + '/'
                             + str(len(self.subjectPaths)) + '           ')
            sys.stdout.flush()

        # done with the processing, tell the world about it and give a summary
        print('\n\nDone with fetching subjects'
              + '\nwe have ' + str(len(self.maskedSubjects.keys())) + ' masks')
        maskString = 'These are the masks we have:'
        # show them what we have got
        for mask in self.maskedSubjects.keys():
            maskString = (maskString
                          + '\n    ' + mask + ' : '
                          + str(len(self.maskedSubjects[mask].keys()))
                          + ' subjects')
        print(maskString)
        if len(problemList) > 0:
            print('There were ' + str(len(problemString)) + ' subjects with'
                  + ' problems')
            print(problemString)
        else:
            print('No subjects had any problems')
Ejemplo n.º 2
0
def Main():
    # Define the inputs
    pathToConnectomeDir = '/home2/surchs/secondLine/connectomes'
    pathToPhenotypicFile = '/home2/surchs/secondLine/configs/pheno81_uniform.csv'
    pathToSubjectList = '/home2/surchs/secondLine/configs/subjectList.csv'
    pathToCorrelationMatrixAges = '/home2/surchs/secondLine/correlation/correlation_matrix.txt'

    connectomeSuffix = '_connectome.txt'

    # Define parameters
    minAge = 6
    maxAge = 18

    # Read subject list
    subjectListFile = open(pathToSubjectList, 'rb')
    subjectList = subjectListFile.readlines()

    # Read the phenotypic file
    pheno = loadPhenotypicFile(pathToPhenotypicFile)
    phenoSubjects = pheno['subject'].tolist()
    phenoAges = pheno['age'].tolist()

    # Read the correlation matrix with age
    # connAgeCorr = loadNumpyTextFile(pathToCorrelationMatrixAges)


    # Prepare container variables for the connectome and for age for each of
    # the three age groups - not currently used
    limitConnectomeStack = np.array([])
    limitAgeStack = np.array([])
    fullconnectomeStack = np.array([])
    fullageStack = np.array([])

    # Loop through the subjects
    for i, subject in enumerate(subjectList):
        subject = subject.strip()
        phenoSubject = phenoSubjects[i]

        if not subject == phenoSubject:
            raise Exception('The Phenofile returned a different subject name '
                            + 'than the subject list:\n'
                            + 'pheno: ' + phenoSubject + ' subjectList '
                            + subject)

        # Get the age of the subject from the pheno file
        phenoAge = phenoAges[i]

        # Now continue with the full stacks
        # Construct the path to the connectome file of the subject
        pathToConnectomeFile = os.path.join(pathToConnectomeDir,
                                            (subject + connectomeSuffix))
        # Load the connectome for the subject
        connectome = loadNumpyTextFile(pathToConnectomeFile)

        # Make a selection of age here
        if phenoAge > minAge and phenoAge < maxAge:
            # include subject
            # Stack the connectome
            limitConnectomeStack = stackConnectome(limitConnectomeStack,
                                                   connectome)
            # Stack ages
            limitAgeStack = stackAges(limitAgeStack, phenoAge)

        elif phenoAge > maxAge:
            # drop subject
            print(subject + ' is too old with age = ' + str(phenoAge))
            pass

        elif phenoAge < minAge:
            print(subject + ' is too young with age = ' + str(phenoAge))
            pass

        # Continue with the full stacks

        # Stack the connectome
        fullconnectomeStack = stackConnectome(fullconnectomeStack, connectome)
        # print(connectomeStack.shape)
        # Stack ages
        fullageStack = stackAges(fullageStack, phenoAge)

    # FULL: Correlate age with connections
    fullCorrMat, fullPMat = correlateConnectomeWithAge(fullconnectomeStack,
                                                       fullageStack)
    # LIMITED: Correlate age with connections
    limCorrMat, limPMat = correlateConnectomeWithAge(limitConnectomeStack,
                                                       limitAgeStack)
    # Get mask for top 200
    top200mask = getTopCorrMask(limCorrMat)
    # make a list of values beginning with the lowest rank (which corresponds
    # to the highest correlation)
    top200ranks = np.arange(np.max(top200mask), 0, -1)
    # topCoords = np.argwhere(top200mask)

    print('\n\nVisualizing')
    # Now start plotting:
    plt.ion()
    run = np.max(top200mask)
    while run > 0:
        coord = np.argwhere(top200mask == run).flatten()
        print(coord)
        print(limCorrMat.shape)
        corrVal = limCorrMat[coord[0], coord[1]]
        fullPVal = fullPMat[coord[0], coord[1]]
        fullConnVector = fullconnectomeStack[coord[0], coord[1], :]
        limPVal = limPMat[coord[0], coord[1]]
        limConnVector = limitConnectomeStack[coord[0], coord[1], :]

        # Fit the curves
        lin = np.polyfit(limitAgeStack, limConnVector, deg=1)
        quad = np.polyfit(limitAgeStack, limConnVector, deg=2)
        cube = np.polyfit(limitAgeStack, limConnVector, deg=3)
        # Make the curves
        plotX = np.arange(limitAgeStack.min(), limitAgeStack.max() + 1, 0.1)
        plotLIN = np.polyval(lin, plotX)
        plotQUAD = np.polyval(quad, plotX)
        plotCUBE = np.polyval(cube, plotX)

        print('\nYou are looking at this now')
        print('The rank is: ' + str(run))
        print('coordinates are: ' + str(i))
        print('correlation is: ' + str(corrVal))
        print('connvec' + str(fullConnVector.shape) + ' '
              + str(fullageStack.shape))
        plt.plot(fullageStack, fullConnVector, 'k.', label='connection')
        plt.plot(plotX, plotLIN, color='g', label='linear fit')
        plt.plot(plotX, plotQUAD, color='r', label='quadratic fit')
        plt.plot(plotX, plotCUBE, color='b', label='cubic fit')

        plt.xlabel('age')
        plt.ylabel('connection strength')
        # plt.legend()
        plt.title('Connection #' + str(run) + ' r =: '
                  + str(np.round(corrVal, 2))
                  + ' p = ' + str(np.round(limPVal, 5)))
        plt.draw()
        plt.show()
        inPut = raw_input('To Continue, give new connection number and '
                          + 'Enter or just Enter...\n')
        if mm.isNumber(inPut):
            print('Got a new number ' + inPut)
            run = int(inPut)
        else:
            run -= 1

        plt.close()