def __init__(self,filename,sample=0): assert sample.lower() in ["l","r","left","right", "absdiff", "reldiff", "folddiff"] ### treatment is THE TREE FOR CpGs - allows rapid searching. self.treatment = AggregateTree(filename) ### What is sample lower? self.sample = sample.lower() self.valuesBehaviour = missingValuesDontCount
class CpGMethPercentDifference(DataBehaviour): def __init__(self,filename): self.treatment = AggregateTree(filename) self.valuesBehaviour = missingValuesDontCount def methPercentage(self,values): lmethTotal = 0 lunmethTotal = 0 rmethTotal = 0 runmethTotal = 0 for (lmeth,lunmeth,rmeth,runmeth) in values: lmethTotal += lmeth lunmethTotal += lunmeth rmethTotal += rmeth runmethTotal += runmeth lmeth = float(lmethTotal) lunmeth = float(lunmethTotal) # arbitrary cutoff for regions with very few reads if lmeth + lunmeth <= 10.0: return [] rmeth = float(rmethTotal) runmeth = float(runmethTotal) # arbitrary cutoff for regions with very few reads if rmeth + runmeth <= 10.0: return [] lpercentage = lmeth / (lmeth+lunmeth) rpercentage = rmeth / (rmeth+runmeth) return [rpercentage - lpercentage] # get the cpg meth percentage for a given range (chrm,start,stop) def getValues(self,chrm, start, stop): return self.methPercentage(self.treatment.getValuesInRange(chrm,start,stop))
elif assembly == "hg19": cpgIslands = ExtendedBed( os.path.expanduser( "~/mount/publicdata/hg19/CpGIslands/cpgislands.bed")) lINEs = ExtendedBed( os.path.expanduser( "~/mount/publicdata/hg19/Repeats/UCSC_HG19_LINEs.bed"), defaultkeys=["chrom", "chromStart", "chromEnd", "name", "strand"]) sINEs = ExtendedBed( os.path.expanduser( "~/mount/publicdata/hg19/Repeats/UCSC_HG19_SINEs.bed"), defaultkeys=["chrom", "chromStart", "chromEnd", "name", "strand"]) else: assert False, "Unknown genome build for cpgislands / lines / sines" methdata = AggregateTree(methdatafile, reverse=reverse) def methPercentageDiff(values): assert values != None lmethTotal = 0 lunmethTotal = 0 rmethTotal = 0 runmethTotal = 0 for (lmeth, lunmeth, rmeth, runmeth) in values: lmethTotal += lmeth lunmethTotal += lunmeth rmethTotal += rmeth runmethTotal += runmeth
def __init__(self,filename): self.treatment = AggregateTree(filename) self.valuesBehaviour = missingValuesDontCount
class CpGMethPercent(DataBehaviour): #Takes the CpG format input, plus left/right def __init__(self,filename,sample=0): assert sample.lower() in ["l","r","left","right", "absdiff", "reldiff", "folddiff"] ### treatment is THE TREE FOR CpGs - allows rapid searching. self.treatment = AggregateTree(filename) ### What is sample lower? self.sample = sample.lower() self.valuesBehaviour = missingValuesDontCount # For a window? def methPercentage(self,values): lmethTotal = 0 lunmethTotal = 0 rmethTotal = 0 runmethTotal = 0 for (lmeth,lunmeth,rmeth,runmeth) in values: lmethTotal += lmeth lunmethTotal += lunmeth rmethTotal += rmeth runmethTotal += runmeth lmeth = float(lmethTotal) lunmeth = float(lunmethTotal) # arbitrary cutoff for regions with very few reads if lmeth + lunmeth <= 10.0: return [] rmeth = float(rmethTotal) runmeth = float(runmethTotal) # arbitrary cutoff for regions with very few reads if rmeth + runmeth <= 10.0: return [] #Adds an ~0 value to each total (removes a division by 0 error) lmeth += 0.0000000001 lunmeth += 0.0000000001 rmeth += 0.0000000001 runmeth += 0.0000000001 # Calculates the percentages and differences lpercentage = lmeth / (lmeth+lunmeth) rpercentage = rmeth / (rmeth+runmeth) absDiff = rpercentage - lpercentage relDiff = absDiff/lpercentage foldDiff = math.log(rpercentage,2) - math.log(lpercentage,2) if relDiff > 5: relDiff = 5 elif relDiff < -5: relDiff = -5 if foldDiff > 5: foldDiff = 5 elif foldDiff < -5: foldDiff = -5 if self.sample.startswith("l"): return [lpercentage] elif self.sample.startswith("rel"): return [relDiff] elif self.sample.startswith("a"): return [absDiff] elif self.sample.startswith("f"): return [absDiff] else: return [rpercentage] # get the cpg meth percentage for a given range (chrm,start,stop) def getValues(self,chrm, start, stop): return self.methPercentage(self.treatment.getValuesInRange(chrm,start,stop)) # Tests for an appropriate R data range def heatmapHasNegativeValues(self): if self.sample=="absdiff": return True elif self.sample=="reldiff": return True else: return False def heatmapUpperLowerBounds(self): if self.sample=="absdiff": return -1,1 elif self.sample=="reldiff": return -5,5 else: return None, None
assert methdatafile != None assert regionsfile != None assert outputfile != None assert genomeBuild != None if printThreshold == None: printThreshold = 10 if isinstance(printThreshold, int) is False: printThreshold = 10 if genomeBuild not in ("hg19", "hg18", "mm9"): genomeBuild = "hg19" print "Genome build type unacceptable. Defaulting to genome hg19..." regions = SimpleBed(regionsfile) methdata = AggregateTree(methdatafile) genome = Genome(genomeBuild) def methChiSquared(pooledMeth): chi2, p = None, None try: chi2, p, dof, expected = scipy.stats.chi2_contingency(pooledMeth) except ValueError: p = 1.0 return chi2, p def methTotals(values): lmethTotal = 0 lunmethTotal = 0 rmethTotal = 0
for affy in affyCSV: ensembls = affyannotation.getValues(affy, "Ensembl") if len(ensembls) == 1: affyFC = float(affyCSV[affy][affyfccol]) affylogFC = math.log(affyFC) if affyFC > 0.0 else math.log( abs(affyFC)) * -1.0 affyEnsemblLogFCs[ensembls[0]].append(affylogFC) affyexpr = float(affyCSV[affy][affyexprcol]) affyEnsemblExprs[ensembls[0]].append(affyexpr) affyp = float(affyCSV[affy][affypcol]) affyEnsemblPvalues[ensembls[0]].append(affyp) methdata = AggregateTree(methdatafile) def methPercentageDiff(values): lmethTotal = 0 lunmethTotal = 0 rmethTotal = 0 runmethTotal = 0 for (lmeth, lunmeth, rmeth, runmeth) in values: lmethTotal += lmeth lunmethTotal += lunmeth rmethTotal += rmeth runmethTotal += runmeth lmeth = float(lmethTotal) lunmeth = float(lunmethTotal)