def getSubjects(self): ''' Method to get the subjects that are listed in the subject paths. Now that we can store multiple masks inside each subject it is no longer necessary to check for the masks ''' problemString = 'These were the subjects that caused problems:' problemList = [] run = 0 for subjectPath in self.subjectPaths: # open the file tempSubFile = gzip.open(subjectPath, 'rb') tempSubject = cPickle.load(tempSubFile) tempSubName = tempSubject.name # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP # # this is a temporary solution to change the type of the pheno # # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP # for pheno in tempSubject.pheno.keys(): tempPheno = tempSubject.pheno[pheno] if mm.isNumber(tempPheno): tempPheno = float(tempPheno) tempSubject.pheno[pheno] = tempPheno # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP # # this was a temporary solution to change the type of the pheno # # TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP TEMP # # loop through the masks that are stored in the subjects and # get them for tempSubMask in tempSubject.masks.values(): tempMaskName = tempSubMask.name # make a copy of the subject so I can delete what I don't like # separately for each mask maskSubject = copy.deepcopy(tempSubject) # check if the mask name already exists in the saved masks if tempMaskName in self.masks.keys(): # it is already in, compare it to the saved mask if not self.masks[tempMaskName].sameSame(tempSubMask): # this mask is different from the one we already saved # and this is a problem - alert and skip the subject # (not optimal, I know - its an order thing... # fix later) print('The mask: ' + tempMaskName + ' of subject ' + tempSubName + ' is different from the saved' + ' mask in our repository') if not tempSubName in problemList: problemList.append(tempSubName) problemString = (problemString + '\n' + tempSubName) continue else: # the mask is not saved yet, so do this now self.masks[tempMaskName] = tempSubMask # now we can continue processing the subjects # check if there is already a list of subjects for this mask if not tempMaskName in self.maskedSubjects.keys(): # create the entry and make it a dictionary self.maskedSubjects[tempMaskName] = {} # get rid of all the masks before adding the subject to the # correct mask key name maskSubject.masks = None self.maskedSubjects[tempMaskName][tempSubName] = maskSubject # done with the subject, print a quick notice run += 1 sys.stdout.write('\rDone loading subject ' + tempSubName + ' : ' + str(run) + '/' + str(len(self.subjectPaths)) + ' ') sys.stdout.flush() # done with the processing, tell the world about it and give a summary print('\n\nDone with fetching subjects' + '\nwe have ' + str(len(self.maskedSubjects.keys())) + ' masks') maskString = 'These are the masks we have:' # show them what we have got for mask in self.maskedSubjects.keys(): maskString = (maskString + '\n ' + mask + ' : ' + str(len(self.maskedSubjects[mask].keys())) + ' subjects') print(maskString) if len(problemList) > 0: print('There were ' + str(len(problemString)) + ' subjects with' + ' problems') print(problemString) else: print('No subjects had any problems')
def Main(): # Define the inputs pathToConnectomeDir = '/home2/surchs/secondLine/connectomes' pathToPhenotypicFile = '/home2/surchs/secondLine/configs/pheno81_uniform.csv' pathToSubjectList = '/home2/surchs/secondLine/configs/subjectList.csv' pathToCorrelationMatrixAges = '/home2/surchs/secondLine/correlation/correlation_matrix.txt' connectomeSuffix = '_connectome.txt' # Define parameters minAge = 6 maxAge = 18 # Read subject list subjectListFile = open(pathToSubjectList, 'rb') subjectList = subjectListFile.readlines() # Read the phenotypic file pheno = loadPhenotypicFile(pathToPhenotypicFile) phenoSubjects = pheno['subject'].tolist() phenoAges = pheno['age'].tolist() # Read the correlation matrix with age # connAgeCorr = loadNumpyTextFile(pathToCorrelationMatrixAges) # Prepare container variables for the connectome and for age for each of # the three age groups - not currently used limitConnectomeStack = np.array([]) limitAgeStack = np.array([]) fullconnectomeStack = np.array([]) fullageStack = np.array([]) # Loop through the subjects for i, subject in enumerate(subjectList): subject = subject.strip() phenoSubject = phenoSubjects[i] if not subject == phenoSubject: raise Exception('The Phenofile returned a different subject name ' + 'than the subject list:\n' + 'pheno: ' + phenoSubject + ' subjectList ' + subject) # Get the age of the subject from the pheno file phenoAge = phenoAges[i] # Now continue with the full stacks # Construct the path to the connectome file of the subject pathToConnectomeFile = os.path.join(pathToConnectomeDir, (subject + connectomeSuffix)) # Load the connectome for the subject connectome = loadNumpyTextFile(pathToConnectomeFile) # Make a selection of age here if phenoAge > minAge and phenoAge < maxAge: # include subject # Stack the connectome limitConnectomeStack = stackConnectome(limitConnectomeStack, connectome) # Stack ages limitAgeStack = stackAges(limitAgeStack, phenoAge) elif phenoAge > maxAge: # drop subject print(subject + ' is too old with age = ' + str(phenoAge)) pass elif phenoAge < minAge: print(subject + ' is too young with age = ' + str(phenoAge)) pass # Continue with the full stacks # Stack the connectome fullconnectomeStack = stackConnectome(fullconnectomeStack, connectome) # print(connectomeStack.shape) # Stack ages fullageStack = stackAges(fullageStack, phenoAge) # FULL: Correlate age with connections fullCorrMat, fullPMat = correlateConnectomeWithAge(fullconnectomeStack, fullageStack) # LIMITED: Correlate age with connections limCorrMat, limPMat = correlateConnectomeWithAge(limitConnectomeStack, limitAgeStack) # Get mask for top 200 top200mask = getTopCorrMask(limCorrMat) # make a list of values beginning with the lowest rank (which corresponds # to the highest correlation) top200ranks = np.arange(np.max(top200mask), 0, -1) # topCoords = np.argwhere(top200mask) print('\n\nVisualizing') # Now start plotting: plt.ion() run = np.max(top200mask) while run > 0: coord = np.argwhere(top200mask == run).flatten() print(coord) print(limCorrMat.shape) corrVal = limCorrMat[coord[0], coord[1]] fullPVal = fullPMat[coord[0], coord[1]] fullConnVector = fullconnectomeStack[coord[0], coord[1], :] limPVal = limPMat[coord[0], coord[1]] limConnVector = limitConnectomeStack[coord[0], coord[1], :] # Fit the curves lin = np.polyfit(limitAgeStack, limConnVector, deg=1) quad = np.polyfit(limitAgeStack, limConnVector, deg=2) cube = np.polyfit(limitAgeStack, limConnVector, deg=3) # Make the curves plotX = np.arange(limitAgeStack.min(), limitAgeStack.max() + 1, 0.1) plotLIN = np.polyval(lin, plotX) plotQUAD = np.polyval(quad, plotX) plotCUBE = np.polyval(cube, plotX) print('\nYou are looking at this now') print('The rank is: ' + str(run)) print('coordinates are: ' + str(i)) print('correlation is: ' + str(corrVal)) print('connvec' + str(fullConnVector.shape) + ' ' + str(fullageStack.shape)) plt.plot(fullageStack, fullConnVector, 'k.', label='connection') plt.plot(plotX, plotLIN, color='g', label='linear fit') plt.plot(plotX, plotQUAD, color='r', label='quadratic fit') plt.plot(plotX, plotCUBE, color='b', label='cubic fit') plt.xlabel('age') plt.ylabel('connection strength') # plt.legend() plt.title('Connection #' + str(run) + ' r =: ' + str(np.round(corrVal, 2)) + ' p = ' + str(np.round(limPVal, 5))) plt.draw() plt.show() inPut = raw_input('To Continue, give new connection number and ' + 'Enter or just Enter...\n') if mm.isNumber(inPut): print('Got a new number ' + inPut) run = int(inPut) else: run -= 1 plt.close()