Exemple #1
0
 def __init__(self,filename,sample=0):
     assert sample.lower() in ["l","r","left","right", "absdiff", "reldiff", "folddiff"]
     
     ### treatment is THE TREE FOR CpGs - allows rapid searching.
     self.treatment = AggregateTree(filename)
     ### What is sample lower?
     self.sample = sample.lower()
     self.valuesBehaviour = missingValuesDontCount
Exemple #2
0
class CpGMethPercentDifference(DataBehaviour):
    def __init__(self,filename):
        self.treatment = AggregateTree(filename)
        self.valuesBehaviour = missingValuesDontCount
    
    def methPercentage(self,values):
        lmethTotal = 0
        lunmethTotal = 0
        rmethTotal = 0
        runmethTotal = 0
  
        for (lmeth,lunmeth,rmeth,runmeth) in values:
            lmethTotal += lmeth
            lunmethTotal += lunmeth
            rmethTotal += rmeth
            runmethTotal += runmeth
            
        
        lmeth = float(lmethTotal)
        lunmeth = float(lunmethTotal)
        
        # arbitrary cutoff for regions with very few reads
        if lmeth + lunmeth <= 10.0:
            return []
        
        rmeth = float(rmethTotal)
        runmeth = float(runmethTotal)
        
        # arbitrary cutoff for regions with very few reads
        if rmeth + runmeth <= 10.0:
            return []
        
        lpercentage = lmeth / (lmeth+lunmeth)
        rpercentage = rmeth / (rmeth+runmeth)
        
        return [rpercentage - lpercentage]
    
    # get the cpg meth percentage for a given range (chrm,start,stop)   
    def getValues(self,chrm, start, stop):
        return self.methPercentage(self.treatment.getValuesInRange(chrm,start,stop))
Exemple #3
0
    elif assembly == "hg19":
        cpgIslands = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg19/CpGIslands/cpgislands.bed"))
        lINEs = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg19/Repeats/UCSC_HG19_LINEs.bed"),
            defaultkeys=["chrom", "chromStart", "chromEnd", "name", "strand"])
        sINEs = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg19/Repeats/UCSC_HG19_SINEs.bed"),
            defaultkeys=["chrom", "chromStart", "chromEnd", "name", "strand"])
    else:
        assert False, "Unknown genome build for cpgislands / lines / sines"

    methdata = AggregateTree(methdatafile, reverse=reverse)

    def methPercentageDiff(values):

        assert values != None

        lmethTotal = 0
        lunmethTotal = 0
        rmethTotal = 0
        runmethTotal = 0

        for (lmeth, lunmeth, rmeth, runmeth) in values:
            lmethTotal += lmeth
            lunmethTotal += lunmeth
            rmethTotal += rmeth
            runmethTotal += runmeth
Exemple #4
0
 def __init__(self,filename):
     self.treatment = AggregateTree(filename)
     self.valuesBehaviour = missingValuesDontCount
Exemple #5
0
class CpGMethPercent(DataBehaviour):
    
    #Takes the CpG format input, plus left/right
    def __init__(self,filename,sample=0):
        assert sample.lower() in ["l","r","left","right", "absdiff", "reldiff", "folddiff"]
        
        ### treatment is THE TREE FOR CpGs - allows rapid searching.
        self.treatment = AggregateTree(filename)
        ### What is sample lower?
        self.sample = sample.lower()
        self.valuesBehaviour = missingValuesDontCount
    
    # For a window?
    def methPercentage(self,values):
        lmethTotal = 0
        lunmethTotal = 0
        rmethTotal = 0
        runmethTotal = 0
  
        for (lmeth,lunmeth,rmeth,runmeth) in values:
            lmethTotal += lmeth
            lunmethTotal += lunmeth
            rmethTotal += rmeth
            runmethTotal += runmeth
            
        lmeth = float(lmethTotal)
        lunmeth = float(lunmethTotal)
        
        # arbitrary cutoff for regions with very few reads
        if lmeth + lunmeth <= 10.0:
            return []
        
        rmeth = float(rmethTotal)
        runmeth = float(runmethTotal)
        
        # arbitrary cutoff for regions with very few reads
        if rmeth + runmeth <= 10.0:
            return []
        
        #Adds an ~0 value to each total (removes a division by 0 error)
        lmeth += 0.0000000001
        lunmeth += 0.0000000001
        rmeth += 0.0000000001
        runmeth += 0.0000000001
        
        # Calculates the percentages and differences
        lpercentage = lmeth / (lmeth+lunmeth)
        rpercentage = rmeth / (rmeth+runmeth)
        absDiff = rpercentage - lpercentage
        relDiff = absDiff/lpercentage
        foldDiff = math.log(rpercentage,2) - math.log(lpercentage,2)
        
        if relDiff > 5:
            relDiff = 5
        elif relDiff < -5:
            relDiff = -5
        if foldDiff > 5:
            foldDiff = 5
        elif foldDiff < -5:
            foldDiff = -5
            
        if self.sample.startswith("l"):
            return [lpercentage]
        elif self.sample.startswith("rel"):
            return [relDiff]
        elif self.sample.startswith("a"):
            return [absDiff]
        elif self.sample.startswith("f"):
            return [absDiff]
        else:
            return [rpercentage]
    
    # get the cpg meth percentage for a given range (chrm,start,stop)   
    def getValues(self,chrm, start, stop):
        return self.methPercentage(self.treatment.getValuesInRange(chrm,start,stop))
    
    # Tests for an appropriate R data range 
    def heatmapHasNegativeValues(self):
        
        if self.sample=="absdiff":
            return True
        elif self.sample=="reldiff":
            return True
        else:
            return False
    
    def heatmapUpperLowerBounds(self):
        if self.sample=="absdiff":
            return -1,1
        elif self.sample=="reldiff":
            return -5,5
        else:
            return None, None
    assert methdatafile != None
    assert regionsfile != None
    assert outputfile != None
    assert genomeBuild != None
    if printThreshold == None:
        printThreshold = 10
    if isinstance(printThreshold, int) is False:
        printThreshold = 10

    if genomeBuild not in ("hg19", "hg18", "mm9"):
        genomeBuild = "hg19"
        print "Genome build type unacceptable. Defaulting to genome hg19..."

    regions = SimpleBed(regionsfile)
    methdata = AggregateTree(methdatafile)

    genome = Genome(genomeBuild)

    def methChiSquared(pooledMeth):
        chi2, p = None, None
        try:
            chi2, p, dof, expected = scipy.stats.chi2_contingency(pooledMeth)
        except ValueError:
            p = 1.0
        return chi2, p

    def methTotals(values):
        lmethTotal = 0
        lunmethTotal = 0
        rmethTotal = 0
Exemple #7
0
    for affy in affyCSV:
        ensembls = affyannotation.getValues(affy, "Ensembl")
        if len(ensembls) == 1:
            affyFC = float(affyCSV[affy][affyfccol])
            affylogFC = math.log(affyFC) if affyFC > 0.0 else math.log(
                abs(affyFC)) * -1.0
            affyEnsemblLogFCs[ensembls[0]].append(affylogFC)

            affyexpr = float(affyCSV[affy][affyexprcol])
            affyEnsemblExprs[ensembls[0]].append(affyexpr)

            affyp = float(affyCSV[affy][affypcol])
            affyEnsemblPvalues[ensembls[0]].append(affyp)

    methdata = AggregateTree(methdatafile)

    def methPercentageDiff(values):
        lmethTotal = 0
        lunmethTotal = 0
        rmethTotal = 0
        runmethTotal = 0

        for (lmeth, lunmeth, rmeth, runmeth) in values:
            lmethTotal += lmeth
            lunmethTotal += lunmeth
            rmethTotal += rmeth
            runmethTotal += runmeth

        lmeth = float(lmethTotal)
        lunmeth = float(lunmethTotal)