コード例 #1
0
ファイル: stats.py プロジェクト: t-neumann/slamdunk
def readSummary(filteredFiles, countDirectory, outputFile, log, printOnly=False, verbose=True, force=False):
    
    # Print sort by ID
    contentDict = {}
    
    tsvFile = open(outputFile, "w")
    
    print("# slamdunk summary v" + __version__, file=tsvFile)

    if (countDirectory != None) :
        f = tempfile.NamedTemporaryFile(delete=False)

    for bam in filteredFiles:
        slamseqInfo = SlamSeqInfo(bam)
        sampleInfo = getSampleInfo(bam)
        
        if (countDirectory != None) :
            
            countedReads = 0
            
            countFile = os.path.join(countDirectory, replaceExtension(os.path.basename(bam), ".tsv", "_tcount"))
            if not os.path.exists(countFile):
                print("TCount directory does not seem to contain tcount file for:\t" + countFile)
            else :
                print(sampleInfo.Name, countFile, sep='\t', file=f)
                countedReads = sumCounts(countFile)
            
            if(int(sampleInfo.ID) in contentDict):
                ID = len(contentDict) + 1
            else:
                ID = sampleInfo.ID
        
            contentDict[int(ID)] = "\t".join([bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time, str(slamseqInfo.SequencedReads), str(slamseqInfo.MappedReads), str(slamseqInfo.DedupReads), str(slamseqInfo.MQFilteredReads), str(slamseqInfo.IdFilteredReads), str(slamseqInfo.NmFilteredReads), str(slamseqInfo.MultimapperReads), str(slamseqInfo.FilteredReads), str(countedReads), slamseqInfo.AnnotationName])
        
        else :
            
            if(int(sampleInfo.ID) in contentDict):
                ID = len(contentDict) + 1
            else:
                ID = sampleInfo.ID
        
            contentDict[int(ID)] = "\t".join([bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time, str(slamseqInfo.SequencedReads), str(slamseqInfo.MappedReads), str(slamseqInfo.DedupReads), str(slamseqInfo.MQFilteredReads), str(slamseqInfo.IdFilteredReads), str(slamseqInfo.NmFilteredReads), str(slamseqInfo.MultimapperReads), str(slamseqInfo.FilteredReads), slamseqInfo.AnnotationName])
          
    if (countDirectory != None) :
        
        f.close()
        
        callR(getPlotter("PCAPlotter") + " -f " + f.name + " -O " + replaceExtension(outputFile, ".pdf", "_PCA") + " -P " + replaceExtension(outputFile, ".txt", "_PCA"), log, dry=printOnly, verbose=verbose)
        
        print("FileName", "SampleName", "SampleType", "SampleTime", "Sequenced", "Mapped", "Deduplicated", "MQ-Filtered", "Identity-Filtered", "NM-Filtered", "Multimap-Filtered", "Retained", "Counted", "Annotation", sep="\t", file=tsvFile)
        
        
    else :
        print("FileName", "SampleName", "SampleType", "SampleTime", "Sequenced", "Mapped", "Deduplicated", "MQ-Filtered", "Identity-Filtered", "NM-Filtered", "Multimap-Filtered", "Retained", "Annotation", sep="\t", file=tsvFile)
            
    for key in sorted(contentDict):
        print(contentDict[key], file=tsvFile)
        
    tsvFile.close()
コード例 #2
0
ファイル: tcounter.py プロジェクト: selsarrag/slamdunk
def computeTconversions(ref,
                        bed,
                        snpsFile,
                        bam,
                        maxReadLength,
                        minQual,
                        outputCSV,
                        outputBedgraphPlus,
                        outputBedgraphMinus,
                        conversionThreshold,
                        log,
                        mle=False):

    referenceFile = pysam.FastaFile(ref)

    sampleInfo = getSampleInfo(bam)

    slamseqInfo = SlamSeqInfo(bam)
    #readNumber = slamseqInfo.MappedReads
    readNumber = slamseqInfo.FilteredReads

    bedMD5 = md5(bed)

    if (mle):
        fileNameTest = replaceExtension(outputCSV, ".tsv", "_perread")
        fileTest = open(fileNameTest, 'w')
        print("#slamdunk v" + __version__,
              __count_version__,
              "sample info:",
              sampleInfo.Name,
              sampleInfo.ID,
              sampleInfo.Type,
              sampleInfo.Time,
              sep="\t",
              file=fileTest)
        print("#annotation:",
              os.path.basename(bed),
              bedMD5,
              sep="\t",
              file=fileTest)
        #print("utr", "n", "k", file=fileTest)
        print(SlamSeqInterval.Header, file=fileTest)

    fileCSV = open(outputCSV, 'w')
    print("#slamdunk v" + __version__,
          __count_version__,
          "sample info:",
          sampleInfo.Name,
          sampleInfo.ID,
          sampleInfo.Type,
          sampleInfo.Time,
          sep="\t",
          file=fileCSV)
    print("#annotation:",
          os.path.basename(bed),
          bedMD5,
          sep="\t",
          file=fileCSV)
    print(SlamSeqInterval.Header, file=fileCSV)

    snps = SNPtools.SNPDictionary(snpsFile)
    snps.read()

    #Go through one chr after the other
    testFile = SlamSeqBamFile(bam, ref, snps)
    if not testFile.bamVersion == __bam_version__:
        raise RuntimeError("Wrong filtered BAM file version detected (" +
                           testFile.bamVersion + "). Expected version " +
                           __bam_version__ + ". Please rerun slamdunk filter.")

    bedMD5 = md5(bed)
    if slamseqInfo.AnnotationMD5 != bedMD5:
        print(
            "Warning: MD5 checksum of annotation (" + bedMD5 +
            ") does not matched MD5 in filtered BAM files (" +
            slamseqInfo.AnnotationMD5 +
            "). Most probably the annotation filed changed after the filtered BAM files were created.",
            file=log)

    conversionBedGraph = {}

    for utr in BedIterator(bed):
        Tcontent = 0
        slamSeqUtr = SlamSeqInterval(utr.chromosome, utr.start, utr.stop,
                                     utr.strand, utr.name, Tcontent, 0, 0, 0,
                                     0, 0, 0, 0)
        slamSeqUtrMLE = SlamSeqInterval(utr.chromosome, utr.start, utr.stop,
                                        utr.strand, utr.name, Tcontent, 0, 0,
                                        0, 0, 0, 0, 0)
        if (not utr.hasStrand()):
            raise RuntimeError(
                "Input BED file does not contain stranded intervals.")

        if utr.start < 0:
            raise RuntimeError(
                "Negativ start coordinate found. Please check the following entry in your BED file: "
                + utr)
        # Retreive reference sequence
        region = utr.chromosome + ":" + str(utr.start + 1) + "-" + str(
            utr.stop)

        if (utr.chromosome in list(referenceFile.references)):
            #print(refRegion,file=sys.stderr)
            # pysam-0.15.0.1
            #refSeq = referenceFile.fetch(region=region).upper()
            refSeq = referenceFile.fetch(reference=utr.chromosome,
                                         start=utr.start,
                                         end=utr.stop).upper()
            if (utr.strand == "-"):
                #refSeq = complement(refSeq[::-1])
                Tcontent = refSeq.count("A")
            else:
                Tcontent = refSeq.count("T")

            slamSeqUtr._Tcontent = Tcontent

        readIterator = testFile.readInRegion(utr.chromosome, utr.start,
                                             utr.stop, utr.strand,
                                             maxReadLength, minQual,
                                             conversionThreshold)

        tcCountUtr = [0] * utr.getLength()
        coverageUtr = [0] * utr.getLength()

        tInReads = []
        tcInRead = []

        countFwd = 0
        tcCountFwd = 0
        countRev = 0
        tCountRev = 0

        multiMapFwd = 0
        multiMapRev = 0

        for read in readIterator:

            # Overwrite any conversions for non-TC reads (reads with < 2 TC conversions)
            if (not read.isTcRead):
                read.tcCount = 0
                read.mismatches = []
                read.conversionRates = 0.0
                read.tcRate = 0.0

            if (read.direction == ReadDirection.Reverse):
                countRev += 1
                if read.tcCount > 0:
                    tCountRev += 1
                if read.isMultimapper:
                    multiMapRev += 1
            else:
                countFwd += 1
                if read.tcCount > 0:
                    tcCountFwd += 1
                if read.isMultimapper:
                    multiMapFwd += 1

            for mismatch in read.mismatches:
                if (mismatch.isTCMismatch(
                        read.direction == ReadDirection.Reverse)
                        and mismatch.referencePosition >= 0
                        and mismatch.referencePosition < utr.getLength()):
                    tcCountUtr[mismatch.referencePosition] += 1

            testN = read.getTcount()
            testk = 0
            for mismatch in read.mismatches:
                if (mismatch.referencePosition >= 0
                        and mismatch.referencePosition < utr.getLength()):
                    if (mismatch.isT(read.direction == ReadDirection.Reverse)):
                        testN += 1
                    if (mismatch.isTCMismatch(
                            read.direction == ReadDirection.Reverse)):
                        testk += 1
            #print(utr.name, read.name, read.direction, testN, testk, read.sequence, sep="\t")
            tInReads.append(testN)
            tcInRead.append(testk)
            #print(utr.name, testN, testk, sep="\t", file=fileTest)

            for i in xrange(read.startRefPos, read.endRefPos):
                if (i >= 0 and i < utr.getLength()):
                    coverageUtr[i] += 1

        if ((utr.strand == "+" and countFwd > 0)
                or (utr.strand == "-" and countRev > 0)):
            tcRateUtr = [
                x * 100.0 / y if y > 0 else 0
                for x, y in zip(tcCountUtr, coverageUtr)
            ]

            readCount = countFwd
            tcReadCount = tcCountFwd
            multiMapCount = multiMapFwd

            if (utr.strand == "-"):
                readCount = countRev
                tcReadCount = tCountRev
                multiMapCount = multiMapRev

            if ((utr.strand == "-" and countFwd > countRev)
                    or (utr.strand == "+" and countRev > countFwd)):
                print(
                    "Warning: " + utr.name + " is located on the " +
                    utr.strand +
                    " strand but read counts are higher for the opposite strand (fwd: "
                    + countFwd + ", rev: " + countRev + ")",
                    file=sys.stderr)

            refSeq = readIterator.getRefSeq()

            # Get number of covered Ts/As in the UTR and compute average conversion rate for all covered Ts/As
            coveredTcount = 0
            avgConversationRate = 0
            coveredPositions = 0
            # Get number of reads on T positions and number of reads with T->C conversions on T positions
            coverageOnTs = 0
            conversionsOnTs = 0

            for position in xrange(0, len(coverageUtr)):

                if (coverageUtr[position] > 0
                        and ((utr.strand == "+" and refSeq[position] == "T") or
                             (utr.strand == "-" and refSeq[position] == "A"))):
                    coveredTcount += 1
                    avgConversationRate += tcRateUtr[position]

                    coverageOnTs += coverageUtr[position]
                    conversionsOnTs += tcCountUtr[position]
                    conversionBedGraph[utr.chromosome + ":" +
                                       str(utr.start + position) + ":" +
                                       str(utr.strand)] = tcRateUtr[position]
                if (coverageUtr[position] > 0):
                    coveredPositions += 1

            if (coveredTcount > 0):
                avgConversationRate = avgConversationRate / coveredTcount
            else:
                avgConversationRate = 0

            # reads per million mapped to the UTR
            readsCPM = 0
            if (readNumber > 0):
                readsCPM = readCount * 1000000.0 / readNumber

            # Convert to SlamSeqInterval and print
            conversionRate = 0
            if (coverageOnTs > 0):
                conversionRate = float(conversionsOnTs) / float(coverageOnTs)
            slamSeqUtr = SlamSeqInterval(utr.chromosome, utr.start, utr.stop,
                                         utr.strand, utr.name, Tcontent,
                                         readsCPM, coverageOnTs,
                                         conversionsOnTs, conversionRate,
                                         readCount, tcReadCount, multiMapCount)
            slamSeqUtrMLE = SlamSeqInterval(
                utr.chromosome, utr.start, utr.stop, utr.strand, utr.name,
                Tcontent, readsCPM, coverageOnTs, conversionsOnTs,
                conversionRate, ",".join(str(x) for x in tInReads),
                ",".join(str(x) for x in tcInRead), multiMapCount)

        print(slamSeqUtr, file=fileCSV)
        if (mle):
            print(slamSeqUtrMLE, file=fileTest)

    fileCSV.close()
    if (mle):
        fileTest.close()

    fileBedgraphPlus = open(outputBedgraphPlus, 'w')
    fileBedgraphMinus = open(outputBedgraphMinus, 'w')

    for position in conversionBedGraph:
        positionData = position.split(":")
        if (positionData[2] == "+"):
            print(positionData[0],
                  positionData[1],
                  int(positionData[1]) + 1,
                  conversionBedGraph[position],
                  file=fileBedgraphPlus)
        else:
            print(positionData[0],
                  positionData[1],
                  int(positionData[1]) + 1,
                  conversionBedGraph[position],
                  file=fileBedgraphMinus)

    fileBedgraphPlus.close()
    fileBedgraphMinus.close()

    if (mle):
        fileNameMLE = replaceExtension(outputCSV, ".tsv", "_mle")
        callR(
            getPlotter("compute_conversion_rate_mle") + " -f " + fileNameTest +
            " -r " + "0.024" + " -o " + fileNameMLE + " &> /dev/null")
コード例 #3
0
def readSummary(filteredFiles,
                countDirectory,
                outputFile,
                log,
                printOnly=False,
                verbose=True,
                force=False):

    # Print sort by ID
    contentDict = {}

    tsvFile = open(outputFile, "w")

    print("# slamdunk summary v" + __version__, file=tsvFile)

    if (countDirectory != None):
        f = tempfile.NamedTemporaryFile(delete=False)

    for bam in filteredFiles:
        slamseqInfo = SlamSeqInfo(bam)
        sampleInfo = getSampleInfo(bam)

        if (countDirectory != None):

            countedReads = 0

            countFile = os.path.join(
                countDirectory,
                replaceExtension(os.path.basename(bam), ".tsv", "_tcount"))
            if not os.path.exists(countFile):
                print(
                    "TCount directory does not seem to contain tcount file for:\t"
                    + countFile)
            else:
                print(sampleInfo.Name, countFile, sep='\t', file=f)
                countedReads = sumCounts(countFile)

            if (sampleInfo.ID in contentDict):
                ID = len(contentDict) + 1
            else:
                ID = sampleInfo.ID

            contentDict[int(ID)] = "\t".join([
                bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time,
                str(slamseqInfo.SequencedReads),
                str(slamseqInfo.MappedReads),
                str(slamseqInfo.DedupReads),
                str(slamseqInfo.MQFilteredReads),
                str(slamseqInfo.IdFilteredReads),
                str(slamseqInfo.NmFilteredReads),
                str(slamseqInfo.MultimapperReads),
                str(slamseqInfo.FilteredReads),
                str(countedReads), slamseqInfo.AnnotationName
            ])

        else:

            if (sampleInfo.ID in contentDict):
                ID = len(contentDict) + 1
            else:
                ID = sampleInfo.ID

            contentDict[int(ID)] = "\t".join([
                bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time,
                str(slamseqInfo.SequencedReads),
                str(slamseqInfo.MappedReads),
                str(slamseqInfo.DedupReads),
                str(slamseqInfo.MQFilteredReads),
                str(slamseqInfo.IdFilteredReads),
                str(slamseqInfo.NmFilteredReads),
                str(slamseqInfo.MultimapperReads),
                str(slamseqInfo.FilteredReads), slamseqInfo.AnnotationName
            ])

    if (countDirectory != None):

        f.close()

        callR(getPlotter("PCAPlotter") + " -f " + f.name + " -O " +
              replaceExtension(outputFile, ".pdf", "_PCA") + " -P " +
              replaceExtension(outputFile, ".txt", "_PCA"),
              log,
              dry=printOnly,
              verbose=verbose)

        print("FileName",
              "SampleName",
              "SampleType",
              "SampleTime",
              "Sequenced",
              "Mapped",
              "Deduplicated",
              "MQ-Filtered",
              "Identity-Filtered",
              "NM-Filtered",
              "Multimap-Filtered",
              "Retained",
              "Counted",
              "Annotation",
              sep="\t",
              file=tsvFile)

    else:
        print("FileName",
              "SampleName",
              "SampleType",
              "SampleTime",
              "Sequenced",
              "Mapped",
              "Deduplicated",
              "MQ-Filtered",
              "Identity-Filtered",
              "NM-Filtered",
              "Multimap-Filtered",
              "Retained",
              "Annotation",
              sep="\t",
              file=tsvFile)

    for key in sorted(contentDict):
        print(contentDict[key], file=tsvFile)

    tsvFile.close()
コード例 #4
0
def statsComputeOverallRatesPerUTR(referenceFile,
                                   bam,
                                   minBaseQual,
                                   strictTCs,
                                   outputCSV,
                                   outputPDF,
                                   utrBed,
                                   maxReadLength,
                                   log,
                                   printOnly=False,
                                   verbose=True,
                                   force=False):

    sampleInfo = getSampleInfo(bam)

    slamseqInfo = SlamSeqInfo(bam)

    if (not checkStep([bam, referenceFile], [outputCSV], force)):
        print("Skipped computing overall rates for file " + bam, file=log)
    else:

        # Go through one chr after the other
        testFile = SlamSeqBamFile(bam, referenceFile, None)

        # UTR stats for MultiQC
        utrStats = dict()

        plotConversions = [
            'A>T',
            'A>G',
            'A>C',
            'C>A',
            'C>G',
            'C>T',
            'G>A',
            'G>C',
            'G>T',
            'T>A',
            'T>G',
            'T>C',
        ]

        for conversion in plotConversions:
            utrStats[conversion] = list()

        f = tempfile.NamedTemporaryFile(delete=False)

        for utr in BedIterator(utrBed):

            readIterator = testFile.readInRegion(utr.chromosome, utr.start,
                                                 utr.stop, utr.strand,
                                                 maxReadLength, minBaseQual)

            # Init
            totalRates = [0] * 25

            readCount = 0
            for read in readIterator:

                if (not read.isTcRead and strictTCs and read.tcCount > 0):
                    pass
                else:

                    # Compute rates for current read
                    rates = read.conversionRates

                    # Add rates from read to total rates
                    totalRates = sumLists(totalRates, rates)
                    readCount += 1

            print(utr.name,
                  utr.chromosome,
                  utr.start,
                  utr.stop,
                  utr.strand,
                  readCount,
                  "\t".join(str(x) for x in totalRates),
                  sep="\t",
                  file=f)

            # Process rates for MultiQC
            # Copied directly, too lazy to do it properly now

            utrDict = {}

            conversionSum = 0

            A_A = totalRates[0]
            conversionSum = +A_A
            A_C = totalRates[1]
            conversionSum = +A_C
            A_G = totalRates[2]
            conversionSum = +A_G
            A_T = totalRates[3]
            conversionSum = +A_T

            C_A = totalRates[5]
            conversionSum = +C_A
            C_C = totalRates[6]
            conversionSum = +C_C
            C_G = totalRates[7]
            conversionSum = +C_G
            C_T = totalRates[8]
            conversionSum = +C_T

            G_A = totalRates[10]
            conversionSum = +G_A
            G_C = totalRates[11]
            conversionSum = +G_C
            G_G = totalRates[12]
            conversionSum = +G_G
            G_T = totalRates[13]
            conversionSum = +G_T

            T_A = totalRates[15]
            conversionSum = +T_A
            T_C = totalRates[16]
            conversionSum = +T_C
            T_G = totalRates[17]
            conversionSum = +T_G
            T_T = totalRates[18]
            conversionSum = +T_T

            if utr.strand == "-":

                A_A, T_T = T_T, A_A
                G_G, C_C = C_C, G_G
                A_C, T_G = T_G, A_C
                A_G, T_C = T_C, A_G
                A_T, T_A = T_A, A_T
                C_A, G_T = G_T, C_A
                C_G, G_C = G_C, C_G
                C_T, G_A = G_A, C_T

            if conversionSum > 0:

                Asum = A_A + A_C + A_G + A_T
                Csum = C_A + C_C + C_G + C_T
                Gsum = G_A + G_C + G_G + G_T
                Tsum = T_A + T_C + T_G + T_T

                if Asum > 0:
                    A_T = A_T / float(Asum) * 100
                    A_G = A_G / float(Asum) * 100
                    A_C = A_C / float(Asum) * 100
                else:
                    A_T = 0
                    A_G = 0
                    A_C = 0
                if Csum > 0:
                    C_A = C_A / float(Csum) * 100
                    C_G = C_G / float(Csum) * 100
                    C_T = C_T / float(Csum) * 100
                else:
                    C_A = 0
                    C_G = 0
                    C_T = 0
                if Gsum > 0:
                    G_A = G_A / float(Gsum) * 100
                    G_C = G_C / float(Gsum) * 100
                    G_T = G_T / float(Gsum) * 100
                else:
                    G_A = 0
                    G_C = 0
                    G_T = 0
                if Tsum > 0:
                    T_A = T_A / float(Tsum) * 100
                    T_G = T_G / float(Tsum) * 100
                    T_C = T_C / float(Tsum) * 100
                else:
                    T_A = 0
                    T_G = 0
                    T_C = 0

                utrStats['A>T'].append(A_T)
                utrStats['A>G'].append(A_G)
                utrStats['A>C'].append(A_C)

                utrStats['C>A'].append(C_A)
                utrStats['C>G'].append(C_G)
                utrStats['C>T'].append(C_T)

                utrStats['G>A'].append(G_A)
                utrStats['G>T'].append(G_T)
                utrStats['G>C'].append(G_C)

                utrStats['T>A'].append(T_A)
                utrStats['T>G'].append(T_G)
                utrStats['T>C'].append(T_C)

        f.close()

        fo = open(outputCSV, "w")

        print("# slamdunk utrrates v" + __version__, file=fo)

        print("# Median-Conversions=", end="", file=fo)

        first = True
        for conversion in plotConversions:
            if (not first):
                print(',', file=fo, end="")
            else:
                first = False
            print(conversion + ":" + str(np.median(utrStats[conversion])),
                  file=fo,
                  end="")
        print(file=fo)

        print("Name",
              "Chr",
              "Start",
              "End",
              "Strand",
              "ReadCount",
              sep="\t",
              end="\t",
              file=fo)
        for i in range(0, 5):
            for j in range(0, 5):
                print(toBase[i].upper() + "_" + toBase[j].upper(),
                      end="",
                      file=fo)
                if (i != 4 or j != 4):
                    print("\t", end="", file=fo)
        print(file=fo)

        with open(f.name, "rb") as valueFile:
            fo.write(valueFile.read())

        fo.close()

    if (not checkStep([bam, referenceFile], [outputPDF], force)):
        print("Skipped computing global rate pdfs for file " + bam, file=log)
    else:
        f = tempfile.NamedTemporaryFile(delete=False)
        print(sampleInfo.Name, outputCSV, sep='\t', file=f)
        f.close()

        callR(getPlotter("globalRatePlotter") + " -f " + f.name + " -O " +
              outputPDF,
              log,
              dry=printOnly,
              verbose=verbose)
コード例 #5
0
ファイル: stats.py プロジェクト: t-neumann/slamdunk
def statsComputeOverallRatesPerUTR(referenceFile, bam, minBaseQual, strictTCs, outputCSV, outputPDF, utrBed, maxReadLength, log, printOnly=False, verbose=True, force=False):
    
    sampleInfo = getSampleInfo(bam)
    
    slamseqInfo = SlamSeqInfo(bam)
    
    if(not checkStep([bam, referenceFile], [outputCSV], force)):
        print("Skipped computing overall rates for file " + bam, file=log)
    else:
    
        # Go through one chr after the other
        testFile = SlamSeqBamFile(bam, referenceFile, None)
        
        # UTR stats for MultiQC
        utrStats = dict()
        
        plotConversions = ['A>T', 'A>G', 'A>C',
                           'C>A', 'C>G', 'C>T',
                           'G>A', 'G>C', 'G>T',
                           'T>A', 'T>G', 'T>C',
        ]
        
        for conversion in plotConversions:
            utrStats[conversion] = list()
            
        f = tempfile.NamedTemporaryFile(delete=False)
                        
        for utr in BedIterator(utrBed):
                                         
            readIterator = testFile.readInRegion(utr.chromosome, utr.start, utr.stop, utr.strand, maxReadLength, minBaseQual)
            
            # Init
            totalRates = [0] * 25
            
            readCount = 0
            for read in readIterator:
                
                if (not read.isTcRead and strictTCs and read.tcCount > 0) :
                    pass
                else :
                
                    # Compute rates for current read
                    rates = read.conversionRates
                
                    # Add rates from read to total rates
                    totalRates = sumLists(totalRates, rates)
                    readCount += 1
                    
            print(utr.name, utr.chromosome, utr.start, utr.stop, utr.strand, readCount, "\t".join(str(x) for x in totalRates), sep="\t", file=f)
            
            # Process rates for MultiQC
            # Copied directly, too lazy to do it properly now
            
            utrDict = {}
            
            conversionSum = 0
            
            A_A = totalRates[0]
            conversionSum =+ A_A
            A_C = totalRates[1]
            conversionSum =+ A_C
            A_G = totalRates[2]
            conversionSum =+ A_G
            A_T = totalRates[3]
            conversionSum =+ A_T
            
            C_A = totalRates[5]
            conversionSum =+ C_A
            C_C = totalRates[6]
            conversionSum =+ C_C
            C_G = totalRates[7]
            conversionSum =+ C_G
            C_T = totalRates[8]
            conversionSum =+ C_T
            
            G_A = totalRates[10]
            conversionSum =+ G_A
            G_C = totalRates[11]
            conversionSum =+ G_C
            G_G = totalRates[12]
            conversionSum =+ G_G
            G_T = totalRates[13]
            conversionSum =+ G_T
            
            T_A = totalRates[15]
            conversionSum =+ T_A
            T_C = totalRates[16]
            conversionSum =+ T_C
            T_G = totalRates[17]
            conversionSum =+ T_G
            T_T = totalRates[18]
            conversionSum =+ T_T
            
            if utr.strand == "-":
                    
                A_A, T_T = T_T,A_A
                G_G, C_C = C_C,G_G
                A_C, T_G = T_G, A_C
                A_G, T_C = T_C, A_G
                A_T, T_A = T_A, A_T
                C_A, G_T = G_T, C_A
                C_G, G_C = G_C, C_G
                C_T, G_A = G_A, C_T
            
            if conversionSum > 0:
                        
                Asum = A_A + A_C + A_G + A_T
                Csum = C_A + C_C + C_G + C_T
                Gsum = G_A + G_C + G_G + G_T
                Tsum = T_A + T_C + T_G + T_T
                 
                if Asum > 0 :
                    A_T = A_T / float(Asum) * 100
                    A_G = A_G / float(Asum) * 100
                    A_C = A_C / float(Asum) * 100
                else :
                    A_T = 0
                    A_G = 0
                    A_C = 0
                if Csum > 0:
                    C_A = C_A / float(Csum) * 100
                    C_G = C_G / float(Csum) * 100
                    C_T = C_T / float(Csum) * 100
                else :
                    C_A = 0
                    C_G = 0
                    C_T = 0
                if Gsum > 0:
                    G_A = G_A / float(Gsum) * 100
                    G_C = G_C / float(Gsum) * 100
                    G_T = G_T / float(Gsum) * 100
                else :
                    G_A = 0
                    G_C = 0
                    G_T = 0
                if Tsum > 0:
                    T_A = T_A / float(Tsum) * 100
                    T_G = T_G / float(Tsum) * 100
                    T_C = T_C / float(Tsum) * 100
                else :
                    T_A = 0
                    T_G = 0
                    T_C = 0
                   
                utrStats['A>T'].append(A_T)
                utrStats['A>G'].append(A_G)
                utrStats['A>C'].append(A_C)
                
                utrStats['C>A'].append(C_A)
                utrStats['C>G'].append(C_G)
                utrStats['C>T'].append(C_T)
                
                utrStats['G>A'].append(G_A)
                utrStats['G>T'].append(G_T)
                utrStats['G>C'].append(G_C)
                
                utrStats['T>A'].append(T_A)
                utrStats['T>G'].append(T_G)
                utrStats['T>C'].append(T_C)        
                
        f.close()
        
        fo = open(outputCSV, "w")
        
        print("# slamdunk utrrates v" + __version__, file=fo)
        
        print("# Median-Conversions=",end="",file=fo)
        
        first = True
        for conversion in plotConversions:
            if (not first) :
                print(',',file=fo, end="")
            else :
                first = False
            print(conversion + ":" + str(np.median(utrStats[conversion])),file=fo, end="")
        print(file=fo) 
        
        print("Name", "Chr", "Start", "End", "Strand", "ReadCount", sep="\t", end="\t", file=fo)
        for i in range(0, 5):
            for j in range(0, 5):
                print(toBase[i].upper() + "_" + toBase[j].upper(), end="", file=fo)
                if(i != 4 or j != 4):
                    print("\t", end="", file=fo)
        print(file=fo)
        
        with open(f.name, "rb") as valueFile:
            fo.write(valueFile.read())
        
        fo.close()
                
    if(not checkStep([bam, referenceFile], [outputPDF], force)):
        print("Skipped computing global rate pdfs for file " + bam, file=log)
    else:
        f = tempfile.NamedTemporaryFile(delete=False)
        print(sampleInfo.Name, outputCSV, sep='\t', file=f)
        f.close()
              
        callR(getPlotter("globalRatePlotter") + " -f " + f.name + " -O " + outputPDF, log, dry=printOnly, verbose=verbose)