Esempio n. 1
0
	def createMultiGroupProfile(self, groupNames, parentHeading, profileHeading, metadata, unclassifiedTreatment):
		multiGroupProfile = MultiGroupProfile() 
		
		multiGroupProfile.groupNames = sorted(groupNames)
		
		# get depth of hierarchical levels of interest 
		if parentHeading == 'Entire sample':
			parentDepth = 0
		else:
			parentDepth = self.hierarchyHeadings.index(parentHeading) + 1
			
		profileDepth = self.hierarchyHeadings.index(profileHeading) + 1
		
		multiGroupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth]
		
		# get list of samples in each group for samples of interest
		multiGroupProfile.samplesInGroups = []
		samples = []
		multiGroupProfile.smallestGroup = sys.maxint
		for groupName in multiGroupProfile.groupNames:
			samplesInGroup = list(set(self.groupDict[groupName]).intersection(metadata.activeSamples))
			sortedSampleNames = sorted(samplesInGroup)
			multiGroupProfile.samplesInGroups.append(sortedSampleNames)
			samples += sortedSampleNames
			
			if len(sortedSampleNames) < multiGroupProfile.smallestGroup:
				multiGroupProfile.smallestGroup = len(sortedSampleNames)
				
		# get counts for all samples
		leafNodes = self.getLeafNodes()

		# traverse up tree from each leaf node
		parentSeqDict = {} 
		for leaf in leafNodes:
			curDepth = len(self.hierarchyHeadings) 
			
			curNode = leaf
			hierarchy = []
			bRemoveUnclassified = False
			while curNode != None:
				if not curNode.isRoot() and curDepth <= profileDepth:
					hierarchy.append(curNode.name)
				
				# add profile level information
				if curDepth == profileDepth:
					if 'unclassified' in curNode.name.lower():
						if unclassifiedTreatment == 'Remove unclassified reads':
							bRemoveUnclassified = True
							break
						elif unclassifiedTreatment == 'Use only for calculating frequency profiles':
							bRemoveUnclassified = True
					
					if bRemoveUnclassified == False:
						name = curNode.name
						
						# remove ' - #' if feature is being calculated relative to the entire sample
						bTruncatedName = False
						if curNode.isLeaf() and parentDepth == 0:
							if name.rfind(' - #') != -1:
								name = name[0:name.rfind(' - #')]
								bTruncatedName = True
						
						profileEntry = multiGroupProfile.profileDict.get(name)
						if bTruncatedName == True and profileEntry != None:
							bRemoveUnclassified = True
							break
					
						if profileEntry == None:
							profileEntry = GroupProfileEntry()
							profileEntry.featureCounts = [0]*len(samples)
							multiGroupProfile.profileDict[curNode.name] = profileEntry
							
						col = 0
						for sampleName in samples:
							profileEntry.featureCounts[col] += leaf.countData[sampleName]
							col += 1
									
				# add parent level information
				if curDepth == parentDepth:
					sequences = parentSeqDict.get(curNode.name)
					if sequences == None:
						sequences = [0]*len(samples)
						parentSeqDict[curNode.name] = sequences
						
					col = 0
					for sampleName in samples:
						sequences[col] += leaf.countData[sampleName]
						col += 1
						
					if bRemoveUnclassified == False:
						profileEntry.parentCounts = sequences
						
				curDepth -= 1
				curNode = curNode.parent
		
			if bRemoveUnclassified == False:
				hierarchy.reverse()
				profileEntry.hierarchy = hierarchy

		multiGroupProfile.numParentCategories = len(parentSeqDict)
		multiGroupProfile.setActiveGroups(self.groupActive)

		return multiGroupProfile
		
Esempio n. 2
0
    def createMultiGroupProfile(self, groupNames, parentHeading,
                                profileHeading, metadata,
                                unclassifiedTreatment):
        multiGroupProfile = MultiGroupProfile()

        multiGroupProfile.groupNames = sorted(groupNames)

        # get depth of hierarchical levels of interest
        if parentHeading == 'Entire sample':
            parentDepth = 0
        else:
            parentDepth = self.hierarchyHeadings.index(parentHeading) + 1

        profileDepth = self.hierarchyHeadings.index(profileHeading) + 1

        multiGroupProfile.hierarchyHeadings = self.hierarchyHeadings[
            0:profileDepth]

        # get list of samples in each group for samples of interest
        multiGroupProfile.samplesInGroups = []
        samples = []
        multiGroupProfile.smallestGroup = sys.maxint
        for groupName in multiGroupProfile.groupNames:
            samplesInGroup = list(
                set(self.groupDict[groupName]).intersection(
                    metadata.activeSamples))
            sortedSampleNames = sorted(samplesInGroup)
            multiGroupProfile.samplesInGroups.append(sortedSampleNames)
            samples += sortedSampleNames

            if len(sortedSampleNames) < multiGroupProfile.smallestGroup:
                multiGroupProfile.smallestGroup = len(sortedSampleNames)

        # get counts for all samples
        leafNodes = self.getLeafNodes()

        # traverse up tree from each leaf node
        parentSeqDict = {}
        for leaf in leafNodes:
            curDepth = len(self.hierarchyHeadings)

            curNode = leaf
            hierarchy = []
            bRemoveUnclassified = False
            while curNode != None:
                if not curNode.isRoot() and curDepth <= profileDepth:
                    hierarchy.append(curNode.name)

                # add profile level information
                if curDepth == profileDepth:
                    if 'unclassified' in curNode.name.lower():
                        if unclassifiedTreatment == 'Remove unclassified reads':
                            bRemoveUnclassified = True
                            break
                        elif unclassifiedTreatment == 'Use only for calculating frequency profiles':
                            bRemoveUnclassified = True

                    if bRemoveUnclassified == False:
                        name = curNode.name

                        # remove ' - #' if feature is being calculated relative to the entire sample
                        bTruncatedName = False
                        if curNode.isLeaf() and parentDepth == 0:
                            if name.rfind(' - #') != -1:
                                name = name[0:name.rfind(' - #')]
                                bTruncatedName = True

                        profileEntry = multiGroupProfile.profileDict.get(name)
                        if bTruncatedName == True and profileEntry != None:
                            bRemoveUnclassified = True
                            break

                        if profileEntry == None:
                            profileEntry = GroupProfileEntry()
                            profileEntry.featureCounts = [0] * len(samples)
                            multiGroupProfile.profileDict[
                                curNode.name] = profileEntry

                        col = 0
                        for sampleName in samples:
                            profileEntry.featureCounts[col] += leaf.countData[
                                sampleName]
                            col += 1

                # add parent level information
                if curDepth == parentDepth:
                    sequences = parentSeqDict.get(curNode.name)
                    if sequences == None:
                        sequences = [0] * len(samples)
                        parentSeqDict[curNode.name] = sequences

                    col = 0
                    for sampleName in samples:
                        sequences[col] += leaf.countData[sampleName]
                        col += 1

                    if bRemoveUnclassified == False:
                        profileEntry.parentCounts = sequences

                curDepth -= 1
                curNode = curNode.parent

            if bRemoveUnclassified == False:
                hierarchy.reverse()
                profileEntry.hierarchy = hierarchy

        multiGroupProfile.numParentCategories = len(parentSeqDict)
        multiGroupProfile.setActiveGroups(self.groupActive)

        return multiGroupProfile
Esempio n. 3
0
	def createGroupProfile(self, groupName1, groupName2, parentHeading, profileHeading, metadata, unclassifiedTreatment):
		groupProfile = GroupProfile() 
		
		if groupName1 == '' or groupName2 == '':
			return groupProfile
		
		groupProfile.groupName1 = groupName1
		groupProfile.groupName2 = groupName2
		
		# get depth of hierarchical levels of interest 
		if parentHeading == 'Entire sample':
			parentDepth = 0
		else:
			parentDepth = self.hierarchyHeadings.index(parentHeading) + 1
			
		profileDepth = self.hierarchyHeadings.index(profileHeading) + 1
		
		groupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth]
		
		# get list of samples in each group for samples of interest
		samplesInGroup1 = list(set(self.groupDict[groupName1]).intersection(metadata.activeSamples))
		if groupName2 != '<All other samples>':
			samplesInGroup2 = list(set(self.groupDict[groupName2]).intersection(metadata.activeSamples))
		else:
			samplesInGroup2 = set([])
			for groupName in self.groupDict:
				if groupName != groupName1:
					samplesInGroup2 = samplesInGroup2.union(set(self.groupDict[groupName]).intersection(metadata.activeSamples))
			samplesInGroup2 = list(samplesInGroup2)
					
		groupProfile.samplesInGroup1 = sorted(samplesInGroup1)
		groupProfile.samplesInGroup2 = sorted(samplesInGroup2)
		samples = groupProfile.samplesInGroup1 + groupProfile.samplesInGroup2
		
		# get counts for all samples
		leafNodes = self.getLeafNodes()

		# traverse up tree from each leaf node
		parentSeqDict = {} 
		for leaf in leafNodes:
			curDepth = len(self.hierarchyHeadings) 
			
			curNode = leaf
			hierarchy = []
			bRemoveUnclassified = False
			while curNode != None:
				if not curNode.isRoot() and curDepth <= profileDepth:
					hierarchy.append(curNode.name)
				
				# add profile level information
				if curDepth == profileDepth:
					if 'unclassified' in curNode.name.lower():
						if unclassifiedTreatment == 'Remove unclassified reads':
							bRemoveUnclassified = True
							break
						elif unclassifiedTreatment == 'Use only for calculating frequency profiles':
							bRemoveUnclassified = True
					
					if bRemoveUnclassified == False:
						name = curNode.name
						
						# remove ' - #' if feature is being calculated relative to the entire sample
						bTruncatedName = False
						if curNode.isLeaf() and parentDepth == 0:
							if name.rfind(' - #') != -1:
								name = name[0:name.rfind(' - #')]
								bTruncatedName = True
						
						profileEntry = groupProfile.profileDict.get(name)
						if bTruncatedName == True and profileEntry != None:
							bRemoveUnclassified = True
							break
							
						if profileEntry == None:
							profileEntry = GroupProfileEntry()
							profileEntry.featureCounts = [0]*len(samples)
							groupProfile.profileDict[name] = profileEntry
							
						col = 0
						for sampleName in samples:
							profileEntry.featureCounts[col] += leaf.countData[sampleName]
							col += 1
									
				# add parent level information
				if curDepth == parentDepth:
					sequences = parentSeqDict.get(curNode.name)
					if sequences == None:
						sequences = [0]*len(samples)
						parentSeqDict[curNode.name] = sequences
						
					col = 0
					for sampleName in samples:
						sequences[col] += leaf.countData[sampleName]
						col += 1
						
					if bRemoveUnclassified == False:
						profileEntry.parentCounts = sequences
						
				curDepth -= 1
				curNode = curNode.parent
			
			if bRemoveUnclassified == False:
				hierarchy.reverse()
				profileEntry.hierarchy = hierarchy
			
		groupProfile.numParentCategories = len(parentSeqDict)
	
		return groupProfile
Esempio n. 4
0
    def createGroupProfile(self, groupName1, groupName2, parentHeading,
                           profileHeading, metadata, unclassifiedTreatment):
        groupProfile = GroupProfile()

        if groupName1 == '' or groupName2 == '':
            return groupProfile

        groupProfile.groupName1 = groupName1
        groupProfile.groupName2 = groupName2

        # get depth of hierarchical levels of interest
        if parentHeading == 'Entire sample':
            parentDepth = 0
        else:
            parentDepth = self.hierarchyHeadings.index(parentHeading) + 1

        profileDepth = self.hierarchyHeadings.index(profileHeading) + 1

        groupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth]

        # get list of samples in each group for samples of interest
        samplesInGroup1 = list(
            set(self.groupDict[groupName1]).intersection(
                metadata.activeSamples))
        if groupName2 != '<All other samples>':
            samplesInGroup2 = list(
                set(self.groupDict[groupName2]).intersection(
                    metadata.activeSamples))
        else:
            samplesInGroup2 = set([])
            for groupName in self.groupDict:
                if groupName != groupName1:
                    samplesInGroup2 = samplesInGroup2.union(
                        set(self.groupDict[groupName]).intersection(
                            metadata.activeSamples))
            samplesInGroup2 = list(samplesInGroup2)

        groupProfile.samplesInGroup1 = sorted(samplesInGroup1)
        groupProfile.samplesInGroup2 = sorted(samplesInGroup2)
        samples = groupProfile.samplesInGroup1 + groupProfile.samplesInGroup2

        # get counts for all samples
        leafNodes = self.getLeafNodes()

        # traverse up tree from each leaf node
        parentSeqDict = {}
        for leaf in leafNodes:
            curDepth = len(self.hierarchyHeadings)

            curNode = leaf
            hierarchy = []
            bRemoveUnclassified = False
            while curNode != None:
                if not curNode.isRoot() and curDepth <= profileDepth:
                    hierarchy.append(curNode.name)

                # add profile level information
                if curDepth == profileDepth:
                    if 'unclassified' in curNode.name.lower():
                        if unclassifiedTreatment == 'Remove unclassified reads':
                            bRemoveUnclassified = True
                            break
                        elif unclassifiedTreatment == 'Use only for calculating frequency profiles':
                            bRemoveUnclassified = True

                    if bRemoveUnclassified == False:
                        name = curNode.name

                        # remove ' - #' if feature is being calculated relative to the entire sample
                        bTruncatedName = False
                        if curNode.isLeaf() and parentDepth == 0:
                            if name.rfind(' - #') != -1:
                                name = name[0:name.rfind(' - #')]
                                bTruncatedName = True

                        profileEntry = groupProfile.profileDict.get(name)
                        if bTruncatedName == True and profileEntry != None:
                            bRemoveUnclassified = True
                            break

                        if profileEntry == None:
                            profileEntry = GroupProfileEntry()
                            profileEntry.featureCounts = [0] * len(samples)
                            groupProfile.profileDict[name] = profileEntry

                        col = 0
                        for sampleName in samples:
                            profileEntry.featureCounts[col] += leaf.countData[
                                sampleName]
                            col += 1

                # add parent level information
                if curDepth == parentDepth:
                    sequences = parentSeqDict.get(curNode.name)
                    if sequences == None:
                        sequences = [0] * len(samples)
                        parentSeqDict[curNode.name] = sequences

                    col = 0
                    for sampleName in samples:
                        sequences[col] += leaf.countData[sampleName]
                        col += 1

                    if bRemoveUnclassified == False:
                        profileEntry.parentCounts = sequences

                curDepth -= 1
                curNode = curNode.parent

            if bRemoveUnclassified == False:
                hierarchy.reverse()
                profileEntry.hierarchy = hierarchy

        groupProfile.numParentCategories = len(parentSeqDict)

        return groupProfile