def runCount(tid, bam, ref, bed, maxLength, minQual, conversionThreshold, outputDirectory, snpDirectory): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount")) outputBedgraphPlus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_plus")) outputBedgraphMinus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_mins")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_tcount")) if (snpDirectory != None): inputSNP = os.path.join( snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None if (maxLength == None): maxLength = estimateMaxReadLength(bam) if (maxLength < 0): print( "Difference between minimum and maximum read length is > 10. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxLength) + " as maximum read length.", file=log) tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength, minQual, outputCSV, outputBedgraphPlus, outputBedgraphMinus, conversionThreshold, log) stepFinished() return outputCSV
def runTcPerUtr(tid, bam, referenceFile, bed, minMQ, maxReadLength, outputDirectory, snpDirectory, vcfFile): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".csv", "_tcperutr")) outputPDF = os.path.join( outputDirectory, replaceExtension(basename(bam), ".pdf", "_tcperutr")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_tcperutr")) if (vcfFile != None): inputSNP = vcfFile elif (snpDirectory != None): inputSNP = os.path.join( snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None if (maxReadLength == None): maxReadLength = estimateMaxReadLength(bam) if (maxReadLength < 0): print( "Could not reliable estimate maximum read length. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxReadLength) + " as maximum read length.", file=log) stats.tcPerUtr(referenceFile, bed, bam, minMQ, maxReadLength, outputCSV, outputPDF, inputSNP, log, False, True, True) closeLogFile(log) stepFinished()
def runStatsRatesUTR(tid, bam, referenceFile, minMQ, strictTCs, outputDirectory, utrFile, maxReadLength): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".csv", "_mutationrates_utr")) outputPDF = os.path.join( outputDirectory, replaceExtension(basename(bam), ".pdf", "_mutationrates_utr")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_mutationrates_utr")) if (maxReadLength == None): maxReadLength = estimateMaxReadLength(bam) if (maxReadLength < 0): print( "Could not reliable estimate maximum read length. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxReadLength) + " as maximum read length.", file=log) stats.statsComputeOverallRatesPerUTR(referenceFile, bam, minMQ, strictTCs, outputCSV, outputPDF, utrFile, maxReadLength, log) closeLogFile(log) stepFinished()
def runSNPeval(tid, bam, ref, bed, maxLength, minQual, coverageCutoff, variantFraction, strictTCs, outputDirectory, snpDirectory): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".csv", "_SNPeval")) outputPDF = os.path.join( outputDirectory, replaceExtension(basename(bam), ".pdf", "_SNPeval")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_SNPeval")) if (not os.path.isdir(snpDirectory)): print("SNP directory does not exists. Abort.") sys.exit(0) inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) if (maxLength == None): maxLength = estimateMaxReadLength(bam) if (maxLength < 0): print( "Could not reliable estimate maximum read length. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxLength) + " as maximum read length.", file=log) stats.computeSNPMaskedRates(ref, bed, inputSNP, bam, maxLength, minQual, coverageCutoff, variantFraction, outputCSV, outputPDF, strictTCs, log) stepFinished()
def runCount( bam, ref, bed, maxLength, minQual, conversionThreshold, is_inverse, outputDirectory, snpDirectory, vcfFile, ): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount")) outputBedgraphPlus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_plus")) outputBedgraphMinus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_mins")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_tcount")) if vcfFile is not None: inputSNP = vcfFile elif snpDirectory is not None: inputSNP = os.path.join( snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None if maxLength is None: maxLength = estimateMaxReadLength(bam) if maxLength < 0: print("Difference between minimum and maximum read length is > 10. " "Please specify --max-read-length parameter.") sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxLength) + " as maximum read length.", file=log) if bed is not None: message("Bed file detected.") tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength, minQual, outputCSV, outputBedgraphPlus, outputBedgraphMinus, conversionThreshold, log) else: message("No bed file passed. Count w.r.t. the full genome.") outputBedgraphPlusNew = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_plus_new")) outputBedgraphMinusNew = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_mins_new")) tcounter.computeTconversionsAll(ref, inputSNP, bam, outputBedgraphPlus, outputBedgraphPlusNew, outputBedgraphMinus, outputBedgraphMinusNew, conversionThreshold, minQual, is_inverse, log) stepFinished() return outputCSV