Ejemplo n.º 1
0
    def createMultiGroupProfile(self, groupNames, parentHeading,
                                profileHeading, metadata,
                                unclassifiedTreatment):
        multiGroupProfile = MultiGroupProfile()

        multiGroupProfile.groupNames = sorted(groupNames)

        # get depth of hierarchical levels of interest
        if parentHeading == 'Entire sample':
            parentDepth = 0
        else:
            parentDepth = self.hierarchyHeadings.index(parentHeading) + 1

        profileDepth = self.hierarchyHeadings.index(profileHeading) + 1

        multiGroupProfile.hierarchyHeadings = self.hierarchyHeadings[
            0:profileDepth]

        # get list of samples in each group for samples of interest
        multiGroupProfile.samplesInGroups = []
        samples = []
        multiGroupProfile.smallestGroup = sys.maxint
        for groupName in multiGroupProfile.groupNames:
            samplesInGroup = list(
                set(self.groupDict[groupName]).intersection(
                    metadata.activeSamples))
            sortedSampleNames = sorted(samplesInGroup)
            multiGroupProfile.samplesInGroups.append(sortedSampleNames)
            samples += sortedSampleNames

            if len(sortedSampleNames) < multiGroupProfile.smallestGroup:
                multiGroupProfile.smallestGroup = len(sortedSampleNames)

        # get counts for all samples
        leafNodes = self.getLeafNodes()

        # traverse up tree from each leaf node
        parentSeqDict = {}
        for leaf in leafNodes:
            curDepth = len(self.hierarchyHeadings)

            curNode = leaf
            hierarchy = []
            bRemoveUnclassified = False
            while curNode != None:
                if not curNode.isRoot() and curDepth <= profileDepth:
                    hierarchy.append(curNode.name)

                # add profile level information
                if curDepth == profileDepth:
                    if 'unclassified' in curNode.name.lower():
                        if unclassifiedTreatment == 'Remove unclassified reads':
                            bRemoveUnclassified = True
                            break
                        elif unclassifiedTreatment == 'Use only for calculating frequency profiles':
                            bRemoveUnclassified = True

                    if bRemoveUnclassified == False:
                        name = curNode.name

                        # remove ' - #' if feature is being calculated relative to the entire sample
                        bTruncatedName = False
                        if curNode.isLeaf() and parentDepth == 0:
                            if name.rfind(' - #') != -1:
                                name = name[0:name.rfind(' - #')]
                                bTruncatedName = True

                        profileEntry = multiGroupProfile.profileDict.get(name)
                        if bTruncatedName == True and profileEntry != None:
                            bRemoveUnclassified = True
                            break

                        if profileEntry == None:
                            profileEntry = GroupProfileEntry()
                            profileEntry.featureCounts = [0] * len(samples)
                            multiGroupProfile.profileDict[
                                curNode.name] = profileEntry

                        col = 0
                        for sampleName in samples:
                            profileEntry.featureCounts[col] += leaf.countData[
                                sampleName]
                            col += 1

                # add parent level information
                if curDepth == parentDepth:
                    sequences = parentSeqDict.get(curNode.name)
                    if sequences == None:
                        sequences = [0] * len(samples)
                        parentSeqDict[curNode.name] = sequences

                    col = 0
                    for sampleName in samples:
                        sequences[col] += leaf.countData[sampleName]
                        col += 1

                    if bRemoveUnclassified == False:
                        profileEntry.parentCounts = sequences

                curDepth -= 1
                curNode = curNode.parent

            if bRemoveUnclassified == False:
                hierarchy.reverse()
                profileEntry.hierarchy = hierarchy

        multiGroupProfile.numParentCategories = len(parentSeqDict)
        multiGroupProfile.setActiveGroups(self.groupActive)

        return multiGroupProfile
Ejemplo n.º 2
0
    def createGroupProfile(self, groupName1, groupName2, parentHeading,
                           profileHeading, metadata, unclassifiedTreatment):
        groupProfile = GroupProfile()

        if groupName1 == '' or groupName2 == '':
            return groupProfile

        groupProfile.groupName1 = groupName1
        groupProfile.groupName2 = groupName2

        # get depth of hierarchical levels of interest
        if parentHeading == 'Entire sample':
            parentDepth = 0
        else:
            parentDepth = self.hierarchyHeadings.index(parentHeading) + 1

        profileDepth = self.hierarchyHeadings.index(profileHeading) + 1

        groupProfile.hierarchyHeadings = self.hierarchyHeadings[0:profileDepth]

        # get list of samples in each group for samples of interest
        samplesInGroup1 = list(
            set(self.groupDict[groupName1]).intersection(
                metadata.activeSamples))
        if groupName2 != '<All other samples>':
            samplesInGroup2 = list(
                set(self.groupDict[groupName2]).intersection(
                    metadata.activeSamples))
        else:
            samplesInGroup2 = set([])
            for groupName in self.groupDict:
                if groupName != groupName1:
                    samplesInGroup2 = samplesInGroup2.union(
                        set(self.groupDict[groupName]).intersection(
                            metadata.activeSamples))
            samplesInGroup2 = list(samplesInGroup2)

        groupProfile.samplesInGroup1 = sorted(samplesInGroup1)
        groupProfile.samplesInGroup2 = sorted(samplesInGroup2)
        samples = groupProfile.samplesInGroup1 + groupProfile.samplesInGroup2

        # get counts for all samples
        leafNodes = self.getLeafNodes()

        # traverse up tree from each leaf node
        parentSeqDict = {}
        for leaf in leafNodes:
            curDepth = len(self.hierarchyHeadings)

            curNode = leaf
            hierarchy = []
            bRemoveUnclassified = False
            while curNode != None:
                if not curNode.isRoot() and curDepth <= profileDepth:
                    hierarchy.append(curNode.name)

                # add profile level information
                if curDepth == profileDepth:
                    if 'unclassified' in curNode.name.lower():
                        if unclassifiedTreatment == 'Remove unclassified reads':
                            bRemoveUnclassified = True
                            break
                        elif unclassifiedTreatment == 'Use only for calculating frequency profiles':
                            bRemoveUnclassified = True

                    if bRemoveUnclassified == False:
                        name = curNode.name

                        # remove ' - #' if feature is being calculated relative to the entire sample
                        bTruncatedName = False
                        if curNode.isLeaf() and parentDepth == 0:
                            if name.rfind(' - #') != -1:
                                name = name[0:name.rfind(' - #')]
                                bTruncatedName = True

                        profileEntry = groupProfile.profileDict.get(name)
                        if bTruncatedName == True and profileEntry != None:
                            bRemoveUnclassified = True
                            break

                        if profileEntry == None:
                            profileEntry = GroupProfileEntry()
                            profileEntry.featureCounts = [0] * len(samples)
                            groupProfile.profileDict[name] = profileEntry

                        col = 0
                        for sampleName in samples:
                            profileEntry.featureCounts[col] += leaf.countData[
                                sampleName]
                            col += 1

                # add parent level information
                if curDepth == parentDepth:
                    sequences = parentSeqDict.get(curNode.name)
                    if sequences == None:
                        sequences = [0] * len(samples)
                        parentSeqDict[curNode.name] = sequences

                    col = 0
                    for sampleName in samples:
                        sequences[col] += leaf.countData[sampleName]
                        col += 1

                    if bRemoveUnclassified == False:
                        profileEntry.parentCounts = sequences

                curDepth -= 1
                curNode = curNode.parent

            if bRemoveUnclassified == False:
                hierarchy.reverse()
                profileEntry.hierarchy = hierarchy

        groupProfile.numParentCategories = len(parentSeqDict)

        return groupProfile