예제 #1
0
class ChIPWithControl():

    #Constructor
    def __init__(self, ChIPFilename, controlFilename, extend=None):
        self.ChIPData = BedFile(ChIPFilename, extend=extend)
        self.ControlData = BedFile(controlFilename, extend=extend)

    #Gets the tag counts for both ChIP and control datasets
    def getTagsInRegion(self, chrm, start, stop):
        # store a tuple of sample/cntrl values
        return self.ChIPData.getIntervalsInRange(
            chrm, start,
            stop), self.ControlData.getIntervalsInRange(chrm, start, stop)

    #Gets the reads per million for the library:
    def getLibraryNormalizationFactor(self, data):
        return data.count / 1000000

    #Gets normalised tag counts for a region - using a variety of normalisation strategies
    def normaliseTags(self, chrm, start, end, chipTagsInRegion,
                      controlTagsInRegion):

        assert end > start, str(end) + " > " + str(start)
        chipFractionalTagsInRegion = getFractionalTagsInRegion(
            chrm, start, end, chipTagsInRegion)
        controlFractionalTagsInRegion = getFractionalTagsInRegion(
            chrm, start, end, controlTagsInRegion)

        chipTags = math.fsum(chipFractionalTagsInRegion)
        chipTagsRegionNormalised = chipTags / float(end - start)
        chipTagsLibraryNormalised = chipTags / self.getLibraryNormalizationFactor(
            self.ChIPData)
        chipTagsRegionLibraryNormalised = chipTagsRegionNormalised / self.getLibraryNormalizationFactor(
            self.ChIPData)

        controlTags = math.fsum(controlFractionalTagsInRegion)
        controlTagsRegionNormalised = controlTags / float(end - start)
        controlTagsLibraryNormalised = controlTags / self.getLibraryNormalizationFactor(
            self.ControlData)
        controlTagsRegionLibraryNormalised = controlTagsRegionNormalised / self.getLibraryNormalizationFactor(
            self.ControlData)

        diffTags = chipTags - controlTags
        diffTagsRegionNormalised = chipTagsRegionNormalised - controlTagsRegionNormalised
        diffTagsLibraryNormalised = chipTagsLibraryNormalised - controlTagsLibraryNormalised
        diffTagsRegionLibraryNormalised = chipTagsRegionLibraryNormalised - controlTagsRegionLibraryNormalised

        if (controlTags > 0):
            ratioTags = chipTags / controlTags
            ratioTagsRegionNormalised = chipTagsRegionNormalised / controlTagsRegionNormalised
            ratioTagsLibraryNormalised = chipTagsLibraryNormalised / controlTagsLibraryNormalised
            ratioTagsRegionLibraryNormalised = chipTagsRegionLibraryNormalised / controlTagsRegionLibraryNormalised
        else:
            ratioTags = 0.0
            ratioTagsRegionNormalised = 0.0
            ratioTagsLibraryNormalised = 0.0
            ratioTagsRegionLibraryNormalised = 0.0

        return chipTags, chipTagsRegionNormalised, chipTagsLibraryNormalised, chipTagsRegionLibraryNormalised, controlTags, controlTagsRegionNormalised, controlTagsLibraryNormalised, controlTagsRegionLibraryNormalised, diffTags, diffTagsRegionNormalised, diffTagsLibraryNormalised, diffTagsRegionLibraryNormalised, ratioTags, ratioTagsRegionNormalised, ratioTagsLibraryNormalised, ratioTagsRegionLibraryNormalised
예제 #2
0
class BedWithControl(Bed):
    
    # sample is a Bed from above
    def __init__(self,controlFilename,sample,extend=None,fractions=True,normalizeLibrarySize=True):
        
        # control is already loaded in the sample Bed object
        self.sample = sample
        self.control = BedFile(controlFilename,extend=extend)
        
        self.fractions = fractions
        self.normalizeLibrarySize = normalizeLibrarySize # divide by 1mil to get a normalization factor
        
        assert self.fractions == self.sample.fractions, "Bed and BedWithControl in different formats"
        assert self.normalizeLibrarySize == self.sample.normalizeLibrarySize
    
    def getValues(self,chrm, start, stop):
        # store a tuple of sample/cntrl values
        return self.sample.getValues(chrm,start,stop),self.control.getIntervalsInRange(chrm, start, stop)
    
    def getLibraryNormalizationFactor(self):
        return self.control.count / 1000000
    
    def valuesBehaviour(self,chrm,values,start,end):
        
        sampleValues, controlValues = values
        
        if self.fractions:
            return [self.sample.fractionallyCountByBP(chrm,sampleValues,start,end) - self.fractionallyCountByBP(chrm,controlValues,start,end)]
        else:
            return [self.sample.count(chrm,sampleValues,start,end) - self.count(chrm,controlValues,start,end)]
예제 #3
0
 def __init__(self,controlFilename,sample,extend=None,fractions=True,normalizeLibrarySize=True):
     
     # control is already loaded in the sample Bed object
     self.sample = sample
     self.control = BedFile(controlFilename,extend=extend)
     
     self.fractions = fractions
     self.normalizeLibrarySize = normalizeLibrarySize # divide by 1mil to get a normalization factor
     
     assert self.fractions == self.sample.fractions, "Bed and BedWithControl in different formats"
     assert self.normalizeLibrarySize == self.sample.normalizeLibrarySize
예제 #4
0
 def __init__(self, ChIPFilename, controlFilename, extend=None):
     self.ChIPData = BedFile(ChIPFilename, extend=extend)
     self.ControlData = BedFile(controlFilename, extend=extend)
예제 #5
0
        sys.exit(2)

    # add executing directory as part of path
    sys.path.append(sys.path[0])

    for o, a in opts:
        if (o == "-d"):
            debug = True

    for o, a in opts:
        if (o == "-d"):
            pass  # we dealt with this already
        elif o == "-b":
            treatmentFileName = a
            if debug:
                replaceTreatment = BedTreatment(treatmentFileName, 50000)
            else:
                replaceTreatment = BedTreatment(treatmentFileName)
        elif o == "-a":
            baseTreatmentFileName = a
        elif o == "-o":
            outputFile = a
        else:
            assert False, "Unhandled option"

    baseTreatment = csv.reader(open(baseTreatmentFileName, "r"),
                               delimiter='\t')
    outputFile = csv.writer(open(outputFile, "w"), delimiter='\t')

    for row in baseTreatment:
        if len(row) == 0 or row[0].startswith("#"):
예제 #6
0
        #        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    # add executing directory as part of path
    sys.path.append(sys.path[0])

    treatmentFileName = None
    baseTreatmentFileName = None
    outputFile = None
    pad = 0

    for o, a in opts:
        if o == "-b":
            treatmentFileName = a
            subtractTreatment = BedTreatment(treatmentFileName)
        elif o == "-a":
            baseTreatmentFileName = a
        elif o == "-o":
            outputFile = a
        elif o == "-p":
            pad = int(a)
        else:
            assert False, "Unhandled option"

    if treatmentFileName is None or baseTreatmentFileName is None or outputFile is None:
        usage()
        sys.exit(2)

    baseTreatment = csv.reader(open(baseTreatmentFileName, "r"),
                               delimiter='\t')