def createMultiGroupProfile(self, groupNames, parentHeading, profileHeading, metadata, unclassifiedTreatment): multiGroupProfile = MultiGroupProfile() multiGroupProfile.groupNames = sorted(groupNames) # get depth of hierarchical levels of interest if parentHeading == 'Entire sample': parentDepth = 0 else: parentDepth = self.hierarchyHeadings.index(parentHeading) + 1 profileDepth = self.hierarchyHeadings.index(profileHeading) + 1 multiGroupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth] # get list of samples in each group for samples of interest multiGroupProfile.samplesInGroups = [] samples = [] multiGroupProfile.smallestGroup = sys.maxint for groupName in multiGroupProfile.groupNames: samplesInGroup = list(set(self.groupDict[groupName]).intersection(metadata.activeSamples)) sortedSampleNames = sorted(samplesInGroup) multiGroupProfile.samplesInGroups.append(sortedSampleNames) samples += sortedSampleNames if len(sortedSampleNames) < multiGroupProfile.smallestGroup: multiGroupProfile.smallestGroup = len(sortedSampleNames) # get counts for all samples leafNodes = self.getLeafNodes() # traverse up tree from each leaf node parentSeqDict = {} for leaf in leafNodes: curDepth = len(self.hierarchyHeadings) curNode = leaf hierarchy = [] bRemoveUnclassified = False while curNode != None: if not curNode.isRoot() and curDepth <= profileDepth: hierarchy.append(curNode.name) # add profile level information if curDepth == profileDepth: if 'unclassified' in curNode.name.lower(): if unclassifiedTreatment == 'Remove unclassified reads': bRemoveUnclassified = True break elif unclassifiedTreatment == 'Use only for calculating frequency profiles': bRemoveUnclassified = True if bRemoveUnclassified == False: name = curNode.name # remove ' - #' if feature is being calculated relative to the entire sample bTruncatedName = False if curNode.isLeaf() and parentDepth == 0: if name.rfind(' - #') != -1: name = name[0:name.rfind(' - #')] bTruncatedName = True profileEntry = multiGroupProfile.profileDict.get(name) if bTruncatedName == True and profileEntry != None: bRemoveUnclassified = True break if profileEntry == None: profileEntry = GroupProfileEntry() profileEntry.featureCounts = [0]*len(samples) multiGroupProfile.profileDict[curNode.name] = profileEntry col = 0 for sampleName in samples: profileEntry.featureCounts[col] += leaf.countData[sampleName] col += 1 # add parent level information if curDepth == parentDepth: sequences = parentSeqDict.get(curNode.name) if sequences == None: sequences = [0]*len(samples) parentSeqDict[curNode.name] = sequences col = 0 for sampleName in samples: sequences[col] += leaf.countData[sampleName] col += 1 if bRemoveUnclassified == False: profileEntry.parentCounts = sequences curDepth -= 1 curNode = curNode.parent if bRemoveUnclassified == False: hierarchy.reverse() profileEntry.hierarchy = hierarchy multiGroupProfile.numParentCategories = len(parentSeqDict) multiGroupProfile.setActiveGroups(self.groupActive) return multiGroupProfile
def createMultiGroupProfile(self, groupNames, parentHeading, profileHeading, metadata, unclassifiedTreatment): multiGroupProfile = MultiGroupProfile() multiGroupProfile.groupNames = sorted(groupNames) # get depth of hierarchical levels of interest if parentHeading == 'Entire sample': parentDepth = 0 else: parentDepth = self.hierarchyHeadings.index(parentHeading) + 1 profileDepth = self.hierarchyHeadings.index(profileHeading) + 1 multiGroupProfile.hierarchyHeadings = self.hierarchyHeadings[ 0:profileDepth] # get list of samples in each group for samples of interest multiGroupProfile.samplesInGroups = [] samples = [] multiGroupProfile.smallestGroup = sys.maxint for groupName in multiGroupProfile.groupNames: samplesInGroup = list( set(self.groupDict[groupName]).intersection( metadata.activeSamples)) sortedSampleNames = sorted(samplesInGroup) multiGroupProfile.samplesInGroups.append(sortedSampleNames) samples += sortedSampleNames if len(sortedSampleNames) < multiGroupProfile.smallestGroup: multiGroupProfile.smallestGroup = len(sortedSampleNames) # get counts for all samples leafNodes = self.getLeafNodes() # traverse up tree from each leaf node parentSeqDict = {} for leaf in leafNodes: curDepth = len(self.hierarchyHeadings) curNode = leaf hierarchy = [] bRemoveUnclassified = False while curNode != None: if not curNode.isRoot() and curDepth <= profileDepth: hierarchy.append(curNode.name) # add profile level information if curDepth == profileDepth: if 'unclassified' in curNode.name.lower(): if unclassifiedTreatment == 'Remove unclassified reads': bRemoveUnclassified = True break elif unclassifiedTreatment == 'Use only for calculating frequency profiles': bRemoveUnclassified = True if bRemoveUnclassified == False: name = curNode.name # remove ' - #' if feature is being calculated relative to the entire sample bTruncatedName = False if curNode.isLeaf() and parentDepth == 0: if name.rfind(' - #') != -1: name = name[0:name.rfind(' - #')] bTruncatedName = True profileEntry = multiGroupProfile.profileDict.get(name) if bTruncatedName == True and profileEntry != None: bRemoveUnclassified = True break if profileEntry == None: profileEntry = GroupProfileEntry() profileEntry.featureCounts = [0] * len(samples) multiGroupProfile.profileDict[ curNode.name] = profileEntry col = 0 for sampleName in samples: profileEntry.featureCounts[col] += leaf.countData[ sampleName] col += 1 # add parent level information if curDepth == parentDepth: sequences = parentSeqDict.get(curNode.name) if sequences == None: sequences = [0] * len(samples) parentSeqDict[curNode.name] = sequences col = 0 for sampleName in samples: sequences[col] += leaf.countData[sampleName] col += 1 if bRemoveUnclassified == False: profileEntry.parentCounts = sequences curDepth -= 1 curNode = curNode.parent if bRemoveUnclassified == False: hierarchy.reverse() profileEntry.hierarchy = hierarchy multiGroupProfile.numParentCategories = len(parentSeqDict) multiGroupProfile.setActiveGroups(self.groupActive) return multiGroupProfile
def createGroupProfile(self, groupName1, groupName2, parentHeading, profileHeading, metadata, unclassifiedTreatment): groupProfile = GroupProfile() if groupName1 == '' or groupName2 == '': return groupProfile groupProfile.groupName1 = groupName1 groupProfile.groupName2 = groupName2 # get depth of hierarchical levels of interest if parentHeading == 'Entire sample': parentDepth = 0 else: parentDepth = self.hierarchyHeadings.index(parentHeading) + 1 profileDepth = self.hierarchyHeadings.index(profileHeading) + 1 groupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth] # get list of samples in each group for samples of interest samplesInGroup1 = list(set(self.groupDict[groupName1]).intersection(metadata.activeSamples)) if groupName2 != '<All other samples>': samplesInGroup2 = list(set(self.groupDict[groupName2]).intersection(metadata.activeSamples)) else: samplesInGroup2 = set([]) for groupName in self.groupDict: if groupName != groupName1: samplesInGroup2 = samplesInGroup2.union(set(self.groupDict[groupName]).intersection(metadata.activeSamples)) samplesInGroup2 = list(samplesInGroup2) groupProfile.samplesInGroup1 = sorted(samplesInGroup1) groupProfile.samplesInGroup2 = sorted(samplesInGroup2) samples = groupProfile.samplesInGroup1 + groupProfile.samplesInGroup2 # get counts for all samples leafNodes = self.getLeafNodes() # traverse up tree from each leaf node parentSeqDict = {} for leaf in leafNodes: curDepth = len(self.hierarchyHeadings) curNode = leaf hierarchy = [] bRemoveUnclassified = False while curNode != None: if not curNode.isRoot() and curDepth <= profileDepth: hierarchy.append(curNode.name) # add profile level information if curDepth == profileDepth: if 'unclassified' in curNode.name.lower(): if unclassifiedTreatment == 'Remove unclassified reads': bRemoveUnclassified = True break elif unclassifiedTreatment == 'Use only for calculating frequency profiles': bRemoveUnclassified = True if bRemoveUnclassified == False: name = curNode.name # remove ' - #' if feature is being calculated relative to the entire sample bTruncatedName = False if curNode.isLeaf() and parentDepth == 0: if name.rfind(' - #') != -1: name = name[0:name.rfind(' - #')] bTruncatedName = True profileEntry = groupProfile.profileDict.get(name) if bTruncatedName == True and profileEntry != None: bRemoveUnclassified = True break if profileEntry == None: profileEntry = GroupProfileEntry() profileEntry.featureCounts = [0]*len(samples) groupProfile.profileDict[name] = profileEntry col = 0 for sampleName in samples: profileEntry.featureCounts[col] += leaf.countData[sampleName] col += 1 # add parent level information if curDepth == parentDepth: sequences = parentSeqDict.get(curNode.name) if sequences == None: sequences = [0]*len(samples) parentSeqDict[curNode.name] = sequences col = 0 for sampleName in samples: sequences[col] += leaf.countData[sampleName] col += 1 if bRemoveUnclassified == False: profileEntry.parentCounts = sequences curDepth -= 1 curNode = curNode.parent if bRemoveUnclassified == False: hierarchy.reverse() profileEntry.hierarchy = hierarchy groupProfile.numParentCategories = len(parentSeqDict) return groupProfile
def createGroupProfile(self, groupName1, groupName2, parentHeading, profileHeading, metadata, unclassifiedTreatment): groupProfile = GroupProfile() if groupName1 == '' or groupName2 == '': return groupProfile groupProfile.groupName1 = groupName1 groupProfile.groupName2 = groupName2 # get depth of hierarchical levels of interest if parentHeading == 'Entire sample': parentDepth = 0 else: parentDepth = self.hierarchyHeadings.index(parentHeading) + 1 profileDepth = self.hierarchyHeadings.index(profileHeading) + 1 groupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth] # get list of samples in each group for samples of interest samplesInGroup1 = list( set(self.groupDict[groupName1]).intersection( metadata.activeSamples)) if groupName2 != '<All other samples>': samplesInGroup2 = list( set(self.groupDict[groupName2]).intersection( metadata.activeSamples)) else: samplesInGroup2 = set([]) for groupName in self.groupDict: if groupName != groupName1: samplesInGroup2 = samplesInGroup2.union( set(self.groupDict[groupName]).intersection( metadata.activeSamples)) samplesInGroup2 = list(samplesInGroup2) groupProfile.samplesInGroup1 = sorted(samplesInGroup1) groupProfile.samplesInGroup2 = sorted(samplesInGroup2) samples = groupProfile.samplesInGroup1 + groupProfile.samplesInGroup2 # get counts for all samples leafNodes = self.getLeafNodes() # traverse up tree from each leaf node parentSeqDict = {} for leaf in leafNodes: curDepth = len(self.hierarchyHeadings) curNode = leaf hierarchy = [] bRemoveUnclassified = False while curNode != None: if not curNode.isRoot() and curDepth <= profileDepth: hierarchy.append(curNode.name) # add profile level information if curDepth == profileDepth: if 'unclassified' in curNode.name.lower(): if unclassifiedTreatment == 'Remove unclassified reads': bRemoveUnclassified = True break elif unclassifiedTreatment == 'Use only for calculating frequency profiles': bRemoveUnclassified = True if bRemoveUnclassified == False: name = curNode.name # remove ' - #' if feature is being calculated relative to the entire sample bTruncatedName = False if curNode.isLeaf() and parentDepth == 0: if name.rfind(' - #') != -1: name = name[0:name.rfind(' - #')] bTruncatedName = True profileEntry = groupProfile.profileDict.get(name) if bTruncatedName == True and profileEntry != None: bRemoveUnclassified = True break if profileEntry == None: profileEntry = GroupProfileEntry() profileEntry.featureCounts = [0] * len(samples) groupProfile.profileDict[name] = profileEntry col = 0 for sampleName in samples: profileEntry.featureCounts[col] += leaf.countData[ sampleName] col += 1 # add parent level information if curDepth == parentDepth: sequences = parentSeqDict.get(curNode.name) if sequences == None: sequences = [0] * len(samples) parentSeqDict[curNode.name] = sequences col = 0 for sampleName in samples: sequences[col] += leaf.countData[sampleName] col += 1 if bRemoveUnclassified == False: profileEntry.parentCounts = sequences curDepth -= 1 curNode = curNode.parent if bRemoveUnclassified == False: hierarchy.reverse() profileEntry.hierarchy = hierarchy groupProfile.numParentCategories = len(parentSeqDict) return groupProfile