class ChIPWithControl(): #Constructor def __init__(self, ChIPFilename, controlFilename, extend=None): self.ChIPData = BedFile(ChIPFilename, extend=extend) self.ControlData = BedFile(controlFilename, extend=extend) #Gets the tag counts for both ChIP and control datasets def getTagsInRegion(self, chrm, start, stop): # store a tuple of sample/cntrl values return self.ChIPData.getIntervalsInRange( chrm, start, stop), self.ControlData.getIntervalsInRange(chrm, start, stop) #Gets the reads per million for the library: def getLibraryNormalizationFactor(self, data): return data.count / 1000000 #Gets normalised tag counts for a region - using a variety of normalisation strategies def normaliseTags(self, chrm, start, end, chipTagsInRegion, controlTagsInRegion): assert end > start, str(end) + " > " + str(start) chipFractionalTagsInRegion = getFractionalTagsInRegion( chrm, start, end, chipTagsInRegion) controlFractionalTagsInRegion = getFractionalTagsInRegion( chrm, start, end, controlTagsInRegion) chipTags = math.fsum(chipFractionalTagsInRegion) chipTagsRegionNormalised = chipTags / float(end - start) chipTagsLibraryNormalised = chipTags / self.getLibraryNormalizationFactor( self.ChIPData) chipTagsRegionLibraryNormalised = chipTagsRegionNormalised / self.getLibraryNormalizationFactor( self.ChIPData) controlTags = math.fsum(controlFractionalTagsInRegion) controlTagsRegionNormalised = controlTags / float(end - start) controlTagsLibraryNormalised = controlTags / self.getLibraryNormalizationFactor( self.ControlData) controlTagsRegionLibraryNormalised = controlTagsRegionNormalised / self.getLibraryNormalizationFactor( self.ControlData) diffTags = chipTags - controlTags diffTagsRegionNormalised = chipTagsRegionNormalised - controlTagsRegionNormalised diffTagsLibraryNormalised = chipTagsLibraryNormalised - controlTagsLibraryNormalised diffTagsRegionLibraryNormalised = chipTagsRegionLibraryNormalised - controlTagsRegionLibraryNormalised if (controlTags > 0): ratioTags = chipTags / controlTags ratioTagsRegionNormalised = chipTagsRegionNormalised / controlTagsRegionNormalised ratioTagsLibraryNormalised = chipTagsLibraryNormalised / controlTagsLibraryNormalised ratioTagsRegionLibraryNormalised = chipTagsRegionLibraryNormalised / controlTagsRegionLibraryNormalised else: ratioTags = 0.0 ratioTagsRegionNormalised = 0.0 ratioTagsLibraryNormalised = 0.0 ratioTagsRegionLibraryNormalised = 0.0 return chipTags, chipTagsRegionNormalised, chipTagsLibraryNormalised, chipTagsRegionLibraryNormalised, controlTags, controlTagsRegionNormalised, controlTagsLibraryNormalised, controlTagsRegionLibraryNormalised, diffTags, diffTagsRegionNormalised, diffTagsLibraryNormalised, diffTagsRegionLibraryNormalised, ratioTags, ratioTagsRegionNormalised, ratioTagsLibraryNormalised, ratioTagsRegionLibraryNormalised
class BedWithControl(Bed): # sample is a Bed from above def __init__(self,controlFilename,sample,extend=None,fractions=True,normalizeLibrarySize=True): # control is already loaded in the sample Bed object self.sample = sample self.control = BedFile(controlFilename,extend=extend) self.fractions = fractions self.normalizeLibrarySize = normalizeLibrarySize # divide by 1mil to get a normalization factor assert self.fractions == self.sample.fractions, "Bed and BedWithControl in different formats" assert self.normalizeLibrarySize == self.sample.normalizeLibrarySize def getValues(self,chrm, start, stop): # store a tuple of sample/cntrl values return self.sample.getValues(chrm,start,stop),self.control.getIntervalsInRange(chrm, start, stop) def getLibraryNormalizationFactor(self): return self.control.count / 1000000 def valuesBehaviour(self,chrm,values,start,end): sampleValues, controlValues = values if self.fractions: return [self.sample.fractionallyCountByBP(chrm,sampleValues,start,end) - self.fractionallyCountByBP(chrm,controlValues,start,end)] else: return [self.sample.count(chrm,sampleValues,start,end) - self.count(chrm,controlValues,start,end)]
def __init__(self,controlFilename,sample,extend=None,fractions=True,normalizeLibrarySize=True): # control is already loaded in the sample Bed object self.sample = sample self.control = BedFile(controlFilename,extend=extend) self.fractions = fractions self.normalizeLibrarySize = normalizeLibrarySize # divide by 1mil to get a normalization factor assert self.fractions == self.sample.fractions, "Bed and BedWithControl in different formats" assert self.normalizeLibrarySize == self.sample.normalizeLibrarySize
def __init__(self, ChIPFilename, controlFilename, extend=None): self.ChIPData = BedFile(ChIPFilename, extend=extend) self.ControlData = BedFile(controlFilename, extend=extend)
sys.exit(2) # add executing directory as part of path sys.path.append(sys.path[0]) for o, a in opts: if (o == "-d"): debug = True for o, a in opts: if (o == "-d"): pass # we dealt with this already elif o == "-b": treatmentFileName = a if debug: replaceTreatment = BedTreatment(treatmentFileName, 50000) else: replaceTreatment = BedTreatment(treatmentFileName) elif o == "-a": baseTreatmentFileName = a elif o == "-o": outputFile = a else: assert False, "Unhandled option" baseTreatment = csv.reader(open(baseTreatmentFileName, "r"), delimiter='\t') outputFile = csv.writer(open(outputFile, "w"), delimiter='\t') for row in baseTreatment: if len(row) == 0 or row[0].startswith("#"):
# print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) # add executing directory as part of path sys.path.append(sys.path[0]) treatmentFileName = None baseTreatmentFileName = None outputFile = None pad = 0 for o, a in opts: if o == "-b": treatmentFileName = a subtractTreatment = BedTreatment(treatmentFileName) elif o == "-a": baseTreatmentFileName = a elif o == "-o": outputFile = a elif o == "-p": pad = int(a) else: assert False, "Unhandled option" if treatmentFileName is None or baseTreatmentFileName is None or outputFile is None: usage() sys.exit(2) baseTreatment = csv.reader(open(baseTreatmentFileName, "r"), delimiter='\t')