Python PositiveControlEnricher Examples

Programming Language: Python

Namespace/Package Name: PositiveControlEnricher

Examples at hotexamples.com: 2

Python PositiveControlEnricher - 2 examples found. These are the top rated real world Python examples of PositiveControlEnricher.PositiveControlEnricher extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

callEnricherFunction(2)

Example #1

Show file

File: KineticWorker.py Project: khaikhai/kineticsTools

    def onChunk(self, referenceWindow):

        # Setup the object for a new window.
        self._prepForReferenceWindow(referenceWindow)

        # start and end are the windows of the reference that we are responsible for reporting data from.
        # We may elect to pull data from a wider window for use with positive control
        (reference, start, end) = referenceWindow

        # Trim end coordinate to length of current template
        end = min(end, self.ipdModel.refLength(reference))

        if self.options.identify:
            # If we are attempting to identify modifications, get the raw data for a slightly expanded window
            # then do the decoding, then weave the modification results back into the main results

            padStart = start - self.pad
            padEnd = end + self.pad
            perSiteResults = self._summarizeReferenceRegion((padStart, padEnd), self.options.methylFraction, self.options.identify)

            if self.options.useLDA:

                # FIXME: add on a column "Ca5C" containing LDA score for each C-residue site
                # Below is an example of how to use an alternative, the BasicLdaEnricher, which does not use the positive control model
                # PositiveControlEnricher currently uses a logistic regression model trained using SMRTportal job 65203 (native E. coli)

                # lda = BasicLdaEnricher( self.ipdModel.gbmModel, self.sequence, perSiteResults, self.options.identify, self.options.modsToCall )
                lda = PositiveControlEnricher(self.ipdModel.gbmModel, self.sequence, perSiteResults)
                perSiteResults = lda.callEnricherFunction(perSiteResults)

            try:
                # Handle different modes of 'extra analysis' here -- this one is for multi-site m5C detection
                # mods = self._multiSiteDetection(perSiteResults, (start, end))
                mods = self._decodePositiveControl(perSiteResults, (start, end))
            except:
                type, value, tb = sys.exc_info()
                traceback.print_exc()
                pdb.post_mortem(tb)

            finalCalls = []

            # Weave together results
            for strand in [0, 1]:
                strandSign = 1 if strand == 0 else -1

                siteDict = dict((x['tpl'], x) for x in perSiteResults if start <= x['tpl'] < end and x['strand'] == strand)
                modDict = dict((x['tpl'], x) for x in mods if start <= x['tpl'] < end and x['strand'] == strand)

                # Go through the modifications - add tags for identified mods to per-site stats
                # add a 'offTarget' tag to the off target peaks.
                for (pos, mod) in modDict.items():

                    # Only convert to positive control call if we actually have enough
                    # coverage on the cognate base!
                    if siteDict.has_key(mod['tpl']):

                        # Copy mod identification data
                        #siteDict[mod['tpl']]['modificationScore'] = mod['QMod']
                        #siteDict[mod['tpl']]['modification'] = mod['modification']

                        if self.options.methylFraction and mod.has_key(FRAC):
                            siteDict[mod['tpl']][FRAC] = mod[FRAC]
                            siteDict[mod['tpl']][FRAClow] = mod[FRAClow]
                            siteDict[mod['tpl']][FRACup] = mod[FRACup]

                        # Copy any extra properties that were added
                        newKeys = set(mod.keys()) - set(siteDict[mod['tpl']].keys())
                        for nk in newKeys:
                            siteDict[mod['tpl']][nk] = mod[nk]

                    if mod.has_key('Mask'):
                        # The decoder should supply the off-target peak mask
                        mask = mod['Mask']
                        mask.append(0)  # make sure we always mask the cognate position
                    else:
                        # If the decoder doesn't supply a mask - use a hard-coded version
                        # FIXME - this branch is deprecated
                        mask = ModificationPeakMask[mod['modification']]

                    # Mask out neighbor peaks that may have been caused by this mod
                    for offset in mask:
                        shadowPos = mod['tpl'] + strandSign * offset
                        if siteDict.has_key(shadowPos):
                            siteDict[shadowPos]['offTargetPeak'] = True

                finalCalls.extend(siteDict.values())

            # Sort by template position
            finalCalls.sort(key=lambda x: x['tpl'])
            return finalCalls

        else:
            result = self._summarizeReferenceRegion((start, end), self.options.methylFraction, self.options.identify)

            if self.options.useLDA and self.controlCmpH5 is None:

                # FIXME: add on a column "Ca5C" containing LDA score for each C-residue site
                # lda = BasicLdaEnricher(self.ipdModel.gbmModel, self.sequence, result, self.options.identify)
                lda = PositiveControlEnricher(self.ipdModel.gbmModel, self.sequence, result)
                results = lda.callEnricherFunction(result)

            result.sort(key=lambda x: x['tpl'])
            return result

Example #2

Show file

File: smKineticWorker.py Project: khaikhai/kineticsTools

    def onChunk(self, referenceWindow):


        # start and end are the windows of the reference that we are responsible for reporting data from.
        # We may elect to pull data from a wider window for use with positive control

        if self.options.smBaseMod:
		(reference, smId, start, end) = referenceWindow
	else:
		(reference, start, end) = referenceWindow

	targetBounds = (start,end)
        # Trim end coordinate to length of current template
        end = min(end,self.ipdModel.refLength(reference))

        # Each chunk is from a single reference -- fire up meanIpd func on the current reference
        self.meanIpdFunc = self.ipdModel.predictIpdFunc(reference)

        # Get the cognate base at a given position
        self.cognateBaseFunc = self.ipdModel.cognateBaseFunc(reference)

        self.refId = reference

        self.sequence = self.ipdModel.getReferenceWindow(self.refId, 0, start, end)

        # Compute the data for this chunk

        if self.options.identify:
            # If we are attempting to identify modifications, get the raw data for a slightly expanded window
            # then do the decoding, then weave the modification results back into the main results

            padStart = start - 8
            padEnd = end + 8
            perSiteResults = self._summarizeReferenceRegion((padStart, padEnd), self.options.methylFraction, self.options.identify)

            if self.options.useLDA:

                # FIXME: add on a column "Ca5C" containing LDA score for each C-residue site
                # Below is an example of how to use an alternative, the BasicLdaEnricher, which does not use the positive control model
                # PositiveControlEnricher currently uses a logistic regression model trained using SMRTportal job 65203 (native E. coli)

                # lda = BasicLdaEnricher( self.ipdModel.gbmModel, self.sequence, perSiteResults, self.options.identify, self.options.modsToCall )
                lda = PositiveControlEnricher( self.ipdModel.gbmModel, self.sequence, perSiteResults )
                perSiteResults = lda.callEnricherFunction( perSiteResults )

            mods = self._decodePositiveControl(perSiteResults, (start, end))

            finalCalls = []

            # Weave together results
            for strand in [0, 1]:
                strandSign = 1 if strand == 0 else -1

                siteDict = dict((x['tpl'], x) for x in perSiteResults if start <= x['tpl'] < end and x['strand'] == strand)
                modDict = dict((x['tpl'], x) for x in mods if start <= x['tpl'] < end and x['strand'] == strand)

                # Go through the modifications - add tags for identified mods to per-site stats
                # add a 'offTarget' tag to the off target peaks.
                for (pos, mod) in modDict.items():

                    # Only convert to positive control call if we actually have enough
                    # coverage on the cognate base!
                    if siteDict.has_key(mod['tpl']):

                        # Copy mod identification data 
                        siteDict[mod['tpl']]['modificationScore'] = mod['QMod']
                        siteDict[mod['tpl']]['modification'] = mod['modification']
		
                        if self.options.methylFraction and mod.has_key(FRAC):
                            siteDict[mod['tpl']][FRAC] = mod[FRAC]
                            siteDict[mod['tpl']][FRAClow] = mod[FRAClow]
                            siteDict[mod['tpl']][FRACup] = mod[FRACup]


                    if mod.has_key('Mask'):
                        # The decoder should supply the off-target peak mask
                        mask = mod['Mask']
                        mask.append(0) # make sure we always mask the cognate position
                    else:
                        # If the decoder doesn't supply a mask - use a hard-coded version
                        # FIXME - this branch is deprecated
                        mask = ModificationPeakMask[mod['modification']]

                    # Mask out neighbor peaks that may have been caused by this mod
                    for offset in mask:
                        shadowPos = mod['tpl'] + strandSign * offset
                        if siteDict.has_key(shadowPos):
                            siteDict[shadowPos]['offTargetPeak'] = True

                finalCalls.extend(siteDict.values())

            # Sort by template position
            finalCalls.sort(key = lambda x: x['tpl'])
            return finalCalls

        else:
            if self.options.smBaseMod: 
		result = self._summarizeMolecule(smId, targetBounds, self.options.methylFraction, self.options.identify)
	    else:
		result = self._summarizeReferenceRegion(targetBounds, self.options.methylFraction, self.options.identify)		

            if self.options.useLDA and self.controlCmpH5 is None:

                # FIXME: add on a column "Ca5C" containing LDA score for each C-residue site
                # lda = BasicLdaEnricher(self.ipdModel.gbmModel, self.sequence, result, self.options.identify)
                lda = PositiveControlEnricher( self.ipdModel.gbmModel, self.sequence, result )
                results = lda.callEnricherFunction( result )

            if self.options.smBaseMod:
	 	pass
	    else:
		result.sort(key = lambda x: x['tpl'])
            
	    return result