Esempio n. 1
0
def runCount(tid, bam, ref, bed, maxLength, minQual, conversionThreshold,
             outputDirectory, snpDirectory):
    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount"))
    outputBedgraphPlus = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bedgraph", "_tcount_plus"))
    outputBedgraphMinus = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bedgraph", "_tcount_mins"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_tcount"))
    if (snpDirectory != None):
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    if (maxLength == None):
        maxLength = estimateMaxReadLength(bam)
    if (maxLength < 0):
        print(
            "Difference between minimum and maximum read length is > 10. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxLength) + " as maximum read length.", file=log)

    tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength, minQual,
                                 outputCSV, outputBedgraphPlus,
                                 outputBedgraphMinus, conversionThreshold, log)
    stepFinished()
    return outputCSV
Esempio n. 2
0
def runTcPerUtr(tid, bam, referenceFile, bed, minMQ, maxReadLength,
                outputDirectory, snpDirectory, vcfFile):
    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".csv", "_tcperutr"))
    outputPDF = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".pdf", "_tcperutr"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_tcperutr"))

    if (vcfFile != None):
        inputSNP = vcfFile
    elif (snpDirectory != None):
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    if (maxReadLength == None):
        maxReadLength = estimateMaxReadLength(bam)
    if (maxReadLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxReadLength) + " as maximum read length.", file=log)

    stats.tcPerUtr(referenceFile, bed, bam, minMQ, maxReadLength, outputCSV,
                   outputPDF, inputSNP, log, False, True, True)

    closeLogFile(log)
    stepFinished()
Esempio n. 3
0
def runStatsRatesUTR(tid, bam, referenceFile, minMQ, strictTCs,
                     outputDirectory, utrFile, maxReadLength):
    outputCSV = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".csv", "_mutationrates_utr"))
    outputPDF = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".pdf", "_mutationrates_utr"))
    outputLOG = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".log", "_mutationrates_utr"))

    if (maxReadLength == None):
        maxReadLength = estimateMaxReadLength(bam)
    if (maxReadLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxReadLength) + " as maximum read length.", file=log)

    stats.statsComputeOverallRatesPerUTR(referenceFile, bam, minMQ, strictTCs,
                                         outputCSV, outputPDF, utrFile,
                                         maxReadLength, log)
    closeLogFile(log)
    stepFinished()
Esempio n. 4
0
def runSNPeval(tid, bam, ref, bed, maxLength, minQual, coverageCutoff,
               variantFraction, strictTCs, outputDirectory, snpDirectory):

    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".csv", "_SNPeval"))
    outputPDF = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".pdf", "_SNPeval"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_SNPeval"))

    if (not os.path.isdir(snpDirectory)):
        print("SNP directory does not exists. Abort.")
        sys.exit(0)

    inputSNP = os.path.join(snpDirectory,
                            replaceExtension(basename(bam), ".vcf", "_snp"))

    if (maxLength == None):
        maxLength = estimateMaxReadLength(bam)
    if (maxLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxLength) + " as maximum read length.", file=log)

    stats.computeSNPMaskedRates(ref, bed, inputSNP, bam, maxLength, minQual,
                                coverageCutoff, variantFraction, outputCSV,
                                outputPDF, strictTCs, log)
    stepFinished()
Esempio n. 5
0
def runCount(
    bam,
    ref,
    bed,
    maxLength,
    minQual,
    conversionThreshold,
    is_inverse,
    outputDirectory,
    snpDirectory,
    vcfFile,
):
    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount"))
    outputBedgraphPlus = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bedgraph", "_tcount_plus"))
    outputBedgraphMinus = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bedgraph", "_tcount_mins"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_tcount"))

    if vcfFile is not None:
        inputSNP = vcfFile
    elif snpDirectory is not None:
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    if maxLength is None:
        maxLength = estimateMaxReadLength(bam)
    if maxLength < 0:
        print("Difference between minimum and maximum read length is > 10. "
              "Please specify --max-read-length parameter.")
        sys.exit(0)

    log = getLogFile(outputLOG)
    print("Using " + str(maxLength) + " as maximum read length.", file=log)
    if bed is not None:
        message("Bed file detected.")
        tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength,
                                     minQual, outputCSV, outputBedgraphPlus,
                                     outputBedgraphMinus, conversionThreshold,
                                     log)
    else:
        message("No bed file passed. Count w.r.t. the full genome.")
        outputBedgraphPlusNew = os.path.join(
            outputDirectory,
            replaceExtension(basename(bam), ".bedgraph", "_tcount_plus_new"))
        outputBedgraphMinusNew = os.path.join(
            outputDirectory,
            replaceExtension(basename(bam), ".bedgraph", "_tcount_mins_new"))
        tcounter.computeTconversionsAll(ref, inputSNP, bam, outputBedgraphPlus,
                                        outputBedgraphPlusNew,
                                        outputBedgraphMinus,
                                        outputBedgraphMinusNew,
                                        conversionThreshold, minQual,
                                        is_inverse, log)
    stepFinished()
    return outputCSV