def statsComputeOverallRates(referenceFile, bam, minBaseQual, outputCSV, outputPDF, log, printOnly=False, verbose=True, force=False): if (not checkStep([bam, referenceFile], [outputCSV], force)): print("Skipped computing overall rates for file " + bam, file=log) else: # Init totalRatesFwd = [0] * 25 totalRatesRev = [0] * 25 tcCount = [0] * 100 # Go through one chr after the other testFile = SlamSeqBamFile(bam, referenceFile, None) chromosomes = testFile.getChromosomes() for chromosome in chromosomes: readIterator = testFile.readsInChromosome(chromosome, minBaseQual) for read in readIterator: # Compute rates for current read rates = read.conversionRates # Get T -> C conversions for current read tc = read.tcCount tcCount[tc] += 1 # Add rates from read to total rates if (read.direction == ReadDirection.Reverse): totalRatesRev = sumLists(totalRatesRev, rates) else: totalRatesFwd = sumLists(totalRatesFwd, rates) # Print rates in correct format for plotting fo = open(outputCSV, "w") print("# slamdunk rates v" + __version__, file=fo) printRates(totalRatesFwd, totalRatesRev, fo) fo.close() if (not checkStep([bam, referenceFile], [outputPDF], force)): print("Skipped computing overall rate pdfs for file " + bam, file=log) else: #f = tempfile.NamedTemporaryFile(delete=False) #print(removeExtension(basename(bam)), outputCSV, sep='\t', file=f) #f.close() callR(getPlotter("compute_overall_rates") + " -f " + outputCSV + " -n " + removeExtension(os.path.basename(bam)) + " -O " + outputPDF, log, dry=printOnly, verbose=verbose)
def statsComputeOverallRates(referenceFile, bam, minBaseQual, outputCSV, outputPDF, log, printOnly=False, verbose=True, force=False): if(not checkStep([bam, referenceFile], [outputCSV], force)): print("Skipped computing overall rates for file " + bam, file=log) else: # Init totalRatesFwd = [0] * 25 totalRatesRev = [0] * 25 tcCount = [0] * 100 # Go through one chr after the other testFile = SlamSeqBamFile(bam, referenceFile, None) chromosomes = testFile.getChromosomes() for chromosome in chromosomes: readIterator = testFile.readsInChromosome(chromosome, minBaseQual) for read in readIterator: # Compute rates for current read rates = read.conversionRates # Get T -> C conversions for current read tc = read.tcCount tcCount[tc] += 1 # Add rates from read to total rates if(read.direction == ReadDirection.Reverse): totalRatesRev = sumLists(totalRatesRev, rates) else: totalRatesFwd = sumLists(totalRatesFwd, rates) # Print rates in correct format for plotting fo = open(outputCSV, "w") print("# slamdunk rates v" + __version__, file=fo) printRates(totalRatesFwd, totalRatesRev, fo) fo.close() if(not checkStep([bam, referenceFile], [outputPDF], force)): print("Skipped computing overall rate pdfs for file " + bam, file=log) else: #f = tempfile.NamedTemporaryFile(delete=False) #print(removeExtension(basename(bam)), outputCSV, sep='\t', file=f) #f.close() callR(getPlotter("compute_overall_rates") + " -f " + outputCSV + " -n " + removeExtension(os.path.basename(bam)) + " -O " + outputPDF, log, dry=printOnly, verbose=verbose)
def statsComputeTCContext(referenceFile, bam, minBaseQual, outputCSV, outputPDF, log, printOnly=False, verbose=True, force=False): if (not checkStep([bam, referenceFile], [outputCSV], force)): print("Skipped computing overall rates for file " + bam, file=log) else: # Init # combinations = ["AT","CT","GT","TT","NT","AA","CA","GA","TA","NA"] frontCombinations = ["AT", "CT", "GT", "TT", "NT"] backCombinations = ["TA", "TC", "TG", "TT", "TN"] counts = {} counts['5prime'] = {} counts['3prime'] = {} counts['5prime']['fwd'] = {} counts['5prime']['rev'] = {} counts['3prime']['fwd'] = {} counts['3prime']['rev'] = {} for combination in frontCombinations: counts['5prime']['fwd'][combination] = 0 counts['5prime']['rev'][combination] = 0 for combination in backCombinations: counts['3prime']['fwd'][combination] = 0 counts['3prime']['rev'][combination] = 0 bamFile = pysam.AlignmentFile(bam, "rb") # Go through one chr after the other testFile = SlamSeqBamFile(bam, referenceFile, None) chromosomes = testFile.getChromosomes() for chromosome in chromosomes: for read in bamFile.fetch(region=chromosome): i = 0 while i < len(read.query_sequence): if (read.query_sequence[i] == "T" and not read.is_reverse): frontContext = None backContext = None if (i > 0): frontContext = read.query_sequence[i - 1] if (i < (len(read.query_sequence) - 1)): backContext = read.query_sequence[i + 1] if (frontContext != None): counts['5prime']['fwd'][frontContext + "T"] += 1 if (backContext != None): counts['3prime']['fwd']["T" + backContext] += 1 if (read.query_sequence[i] == "A" and read.is_reverse): frontContext = None backContext = None if (i > 0): backContext = read.query_sequence[i - 1] if (i < (len(read.query_sequence) - 1)): frontContext = read.query_sequence[i + 1] if (frontContext != None): counts['5prime']['rev'][complement(frontContext + "A")] += 1 if (backContext != None): counts['3prime']['rev'][complement( "A" + backContext)] += 1 i += 1 # Print rates in correct format for plotting fo = open(outputCSV, "w") print("\t".join(frontCombinations), file=fo) frontFwdLine = "" frontRevLine = "" backFwdLine = "" backRevLine = "" for combination in frontCombinations: frontFwdLine += str(counts['5prime']['fwd'][combination]) + "\t" frontRevLine += str(counts['5prime']['rev'][combination]) + "\t" print(frontFwdLine.rstrip(), file=fo) print(frontRevLine.rstrip(), file=fo) print("\t".join(backCombinations), file=fo) for combination in backCombinations: backFwdLine += str(counts['3prime']['fwd'][combination]) + "\t" backRevLine += str(counts['3prime']['rev'][combination]) + "\t" print(backFwdLine.rstrip(), file=fo) print(backRevLine.rstrip(), file=fo) fo.close() if (not checkStep([bam, referenceFile], [outputPDF], force)): print("Skipped computing overall rate pdfs for file " + bam, file=log) else: f = tempfile.NamedTemporaryFile(delete=False) print(removeExtension(os.path.basename(bam)), outputCSV, sep='\t', file=f) f.close() callR(getPlotter("compute_context_TC_rates") + " -f " + f.name + " -O " + outputPDF, log, dry=printOnly, verbose=verbose)
def statsComputeTCContext(referenceFile, bam, minBaseQual, outputCSV, outputPDF, log, printOnly=False, verbose=True, force=False): if(not checkStep([bam, referenceFile], [outputCSV], force)): print("Skipped computing overall rates for file " + bam, file=log) else: # Init # combinations = ["AT","CT","GT","TT","NT","AA","CA","GA","TA","NA"] frontCombinations = ["AT", "CT", "GT", "TT", "NT"] backCombinations = ["TA", "TC", "TG", "TT", "TN"] counts = {} counts['5prime'] = {} counts['3prime'] = {} counts['5prime']['fwd'] = {} counts['5prime']['rev'] = {} counts['3prime']['fwd'] = {} counts['3prime']['rev'] = {} for combination in frontCombinations : counts['5prime']['fwd'][combination] = 0 counts['5prime']['rev'][combination] = 0 for combination in backCombinations: counts['3prime']['fwd'][combination] = 0 counts['3prime']['rev'][combination] = 0 bamFile = pysam.AlignmentFile(bam, "rb") # Go through one chr after the other testFile = SlamSeqBamFile(bam, referenceFile, None) chromosomes = testFile.getChromosomes() for chromosome in chromosomes: for read in bamFile.fetch(region=chromosome): i = 0 while i < len(read.query_sequence): if(read.query_sequence[i] == "T" and not read.is_reverse) : frontContext = None backContext = None if (i > 0) : frontContext = read.query_sequence[i - 1] if (i < (len(read.query_sequence) - 1)) : backContext = read.query_sequence[i + 1] if (frontContext != None) : counts['5prime']['fwd'][frontContext + "T"] += 1 if (backContext != None) : counts['3prime']['fwd']["T" + backContext] += 1 if(read.query_sequence[i] == "A" and read.is_reverse) : frontContext = None backContext = None if (i > 0) : backContext = read.query_sequence[i - 1] if (i < (len(read.query_sequence) - 1)) : frontContext = read.query_sequence[i + 1] if (frontContext != None) : counts['5prime']['rev'][complement(frontContext + "A")] += 1 if (backContext != None) : counts['3prime']['rev'][complement("A" + backContext)] += 1 i += 1 # Print rates in correct format for plotting fo = open(outputCSV, "w") print("\t".join(frontCombinations), file=fo) frontFwdLine = "" frontRevLine = "" backFwdLine = "" backRevLine = "" for combination in frontCombinations : frontFwdLine += str(counts['5prime']['fwd'][combination]) + "\t" frontRevLine += str(counts['5prime']['rev'][combination]) + "\t" print(frontFwdLine.rstrip(), file=fo) print(frontRevLine.rstrip(), file=fo) print("\t".join(backCombinations), file=fo) for combination in backCombinations : backFwdLine += str(counts['3prime']['fwd'][combination]) + "\t" backRevLine += str(counts['3prime']['rev'][combination]) + "\t" print(backFwdLine.rstrip(), file=fo) print(backRevLine.rstrip(), file=fo) fo.close() if(not checkStep([bam, referenceFile], [outputPDF], force)): print("Skipped computing overall rate pdfs for file " + bam, file=log) else: f = tempfile.NamedTemporaryFile(delete=False) print(removeExtension(os.path.basename(bam)), outputCSV, sep='\t', file=f) f.close() callR(getPlotter("compute_context_TC_rates") + " -f " + f.name + " -O " + outputPDF, log, dry=printOnly, verbose=verbose)
def run(): ######################################################################## # Argument parsing ######################################################################## # TODO: parameter for simulating expression levels # TODO: more realistic simulation of half lifes # Info usage = "SLAMdunk software for simulating SLAM-seq data" # Main Parsers parser = ArgumentParser(description=usage, formatter_class=RawDescriptionHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) # Initialize Subparsers subparsers = parser.add_subparsers(help="", dest="command") allparse = subparsers.add_parser('all', help='Simulated full SlamSeq samples') allparse.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") allparse.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") allparse.add_argument("-l", "--read-length", type=int, required=True, dest="readLength", help="All UTRs short than the read length are removed.") allparse.add_argument("-o", "--outputDir", type=str, required=False, dest="outputDir", default=".", help="Output directory for mapped BAM files.") allparse.add_argument("-s", "--snp-rate", type=float, required=False, default=0.001, dest="snpRate", help="SNP rate in UTRs") allparse.add_argument("-cov", "--read-coverage", type=int, required=False, default=20, dest="readCoverage", help="Read coverage (if read number is not specified)") allparse.add_argument("-e", "--sequencing-error", type=float, required=False, default=0.05, dest="seqError", help="Sequencing error") allparse.add_argument("-p", "--pulse", type=str, required=False, dest="pulse", help="Pulse in minutes") allparse.add_argument("-ra", "--rates", type=str, required=False, default=None, dest="rates", help="List of rates") allparse.add_argument("-c", "--chase", type=str, required=False, default="", dest="chase", help="Chase in minutes") allparse.add_argument("-tc", "--tc-rate", type=float, required=False, dest="conversionRate", default=0.024, help="T->C conversion rate") allparse.add_argument("-minhl", "--min-halflife", type=int, required=False, default=30, dest="minHalfLife", help="Lower bound for the simulated half lifes in minutes") allparse.add_argument("-maxhl", "--max-halflife", type=int, required=False, default=720, dest="maxHalfLife", help="Upper bound for the simulated half lifes in minutes") allparse.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number") allparse.add_argument("-rep", "--replicates", type=int, required=False, default=1, dest="replicates", help="Number of replicates") allparse.add_argument('-st', "--skip-turnover", required=False, dest="skipTurnover", action='store_true', help="Take half-life from score filed of input BED file") preparebedparse = subparsers.add_parser('preparebed', help='Prepares a UTR BED file for SlamSim') preparebedparse.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") preparebedparse.add_argument("-l", "--read-length", type=int, required=True, dest="readLength", help="All UTRs short than the read length are removed.") preparebedparse.add_argument("-o", "--outputDir", type=str, required=False, dest="outputDir", default=".", help="Output directory for mapped BAM files.") turnoverparse = subparsers.add_parser('turnover', help='Simulate utrs and turnover rate') turnoverparse.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") turnoverparse.add_argument("-minhl", "--min-halflife", type=int, required=False, default=30, dest="minHalfLife", help="Lower bound for the simulated half lifes in minutes") turnoverparse.add_argument("-maxhl", "--max-halflife", type=int, required=False, default=720, dest="maxHalfLife", help="Upper bound for the simulated half lifes in minutes") turnoverparse.add_argument("-o", "--outputDir", type=str, required=False, dest="outputDir", default=".", help="Output directory for mapped BAM files.") utrsparse = subparsers.add_parser('utrs', help='Simulate utrs and turnover rate') utrsparse.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") utrsparse.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") utrsparse.add_argument("-l", "--read-length", type=int, required=True, dest="readLength", help="Read length") utrsparse.add_argument("-o", "--outputDir", type=str, required=False, dest="outputDir", default=".", help="Output directory for mapped BAM files.") utrsparse.add_argument("-s", "--snp-rate", type=float, required=False, default=0.001, dest="snpRate", help="SNP rate in UTRs") simulateparse = subparsers.add_parser('reads', help='Simulate SLAM-seq read data') simulateparse.add_argument("-o", "--outputDir", type=str, required=False, dest="outputDir", default=".", help="Output directory for mapped BAM files.") simulateparse.add_argument("--sample-name", type=str, required=True, dest="sampleName", help="Name of sample") simulateparse.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") simulateparse.add_argument("-l", "--read-length", type=int, required=True, dest="readLength", help="Read length") simulateparse.add_argument("-n", "--read-number", type=int, required=False, default=0, dest="readNumber", help="Number of reads to simulate") simulateparse.add_argument("-cov", "--read-coverage", type=int, required=False, default=20, dest="readCoverage", help="Read coverage (if read number is not specified)") simulateparse.add_argument("-e", "--sequencing-error", type=float, required=False, default=0.05, dest="seqError", help="Sequencing error") simulateparse.add_argument("-p", "--pulse", type=int, required=True, dest="pulse", help="Pulse in minutes") simulateparse.add_argument("-c", "--chase", type=int, required=False, default=0, dest="chase", help="Chase in minutes") simulateparse.add_argument("-tc", "--tc-rate", type=float, required=False, dest="conversionRate", default=0.024, help="T->C conversion rate") evalparser = subparsers.add_parser('eval-counts', help='Evaluate count files') evalparser.add_argument("-s", "--simulated", type=str, required=True, dest="simulated", help="") evalparser.add_argument("-d", "--slamdun", type=str, required=True, dest="slamdunk", help="") evalparser.add_argument("-o", "--outputFile", type=str, required=True, dest="outputFile", help="") evalreadsparser = subparsers.add_parser('eval-reads', help='Evaluate read files') evalreadsparser.add_argument("-o", "--outputFile", type=str, required=True, dest="outputFile", help="") evalreadsparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") evalreadsparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") evalreadsparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") evalconversionplotparse = subparsers.add_parser('plot.conversions', help='Plots differences in simulated and found conversion rates') evalconversionplotparse.add_argument("-sim", "--simDir", type=str, required=True, dest="simDir", help="") evalconversionplotparse.add_argument("-slam", "--slamdunkDir", type=str, required=True, dest="slamDir", help="") evalconversionplotparse.add_argument("-o", "--outputFile", type=str, required=True, dest="outputFile", help="") evalconversionplotparse.add_argument("-tc", "--tc-rate", type=float, required=False, dest="conversionRate", default=0.03, help="T->C conversion rate") evalhalflifeparse = subparsers.add_parser('plot.halflifes', help='Plots half lifes') evalhalflifeparse.add_argument("-sim", "--simulated-hl", type=str, required=True, dest="simHL", help="Simulated half-lifes") evalhalflifeparse.add_argument("-pred", "--predicted-hl", type=str, required=True, dest="predHL", help="Predicted half-lifes") evalhalflifeparse.add_argument("-true", "--true-hl", type=str, required=True, dest="trueHL", help="Predicted half-lifes") evalhalflifeparse.add_argument("-o", "--outputFile", type=str, required=True, dest="outputFile", help="") evalhalflifeparse.add_argument("-e", "--erroroutputFile", type=str, required=True, dest="erroutputFile", help="") evalhalflifeplotparse = subparsers.add_parser('plot.halflifespergene', help='Plots half lifes') evalhalflifeplotparse.add_argument("-sim", "--simDir", type=str, required=True, dest="simDir", help="") evalhalflifeplotparse.add_argument("-slam", "--slamdunkDir", type=str, required=True, dest="slamDir", help="") evalhalflifeplotparse.add_argument("-t", "--timepoints", type=str, required=True, dest="timepoints", help="") evalhalflifeplotparse.add_argument("-o", "--outputFile", type=str, required=True, dest="outputFile", help="") evalhalflifeplotparse.add_argument("-tc", "--tc-rate", type=float, required=False, dest="conversionRate", default=0.03, help="T->C conversion rate") evalhalflifeplotparse.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") utilcrateparse = subparsers.add_parser('util.conversionrate', help='Get conversion rate from mapped BAM files') utilcrateparse.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") utilcrateparse.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") utilcrateparse.add_argument("-region", "--region", type=str, required=True, dest="region", help="") utilcrateparse.add_argument('-rev',required=False, dest="reverse", action='store_true') args = parser.parse_args() ######################################################################## # Routine selection ######################################################################## def prepareBed(outputDirectory, bed, readLength): createDir(outputDirectory) slamSimBed = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed", "_original")) simulator.prepareBED(bed, slamSimBed, readLength) def turnOver(outputDirectory, bed, minHalflife, maxHalfLife, skipTurnover=False): message("Simulating turnover") createDir(outputDirectory) trunoverBed = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed", "_utrs")) if not skipTurnover: simulator.simulateTurnOver(bed, trunoverBed, minHalflife, maxHalfLife) else: copyfile(bed, trunoverBed) def Utrs(outputDirectory, bed, referenceFasta, readLength, polyALength, snpRate): message("Simulating UTRs") createDir(outputDirectory) bed12 = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed12", "_utrs")) bed12Fasta = os.path.join(outputDirectory, replaceExtension(basename(bed), ".fa", "_utrs")) explv = os.path.join(outputDirectory, replaceExtension(basename(bed), ".eplv", "_utrs")) vcfFile = os.path.join(outputDirectory, replaceExtension(basename(bed), ".vcf", "_utrs")) totalUTRlength = simulator.prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile) command = args.command if (command == "preparebed") : prepareBed(args.outputDir, args.bed, args.readLength) elif (command == "turnover"): turnOver(args.outputDir, args.bed, args.minHalfLife, args.maxHalfLife) elif (command == "utrs") : polyALength = 0 Utrs(args.outputDir, args.bed, args.referenceFile, args.readLength, polyALength, args.snpRate) elif (command == "reads") : createDir(args.outputDir) reads(args.outputDir, args.bed, args.sampleName, args.readLength, args.readNumber, args.readCoverage, args.seqError, args.pulse, args.chase, args.conversionRate) elif (command == "eval-counts") : outputPath = os.path.dirname(args.outputFile) createDir(outputPath) simulator.evaluate(args.simulated, args.slamdunk, args.outputFile, mainOutput) elif (command == "eval-reads") : outputPath = os.path.dirname(args.outputFile) createDir(outputPath) for bam in args.bam: simulator.evaluateReads(bam, args.referenceFile, args.bed, args.outputFile, mainOutput) elif (command == "plot.conversions") : simDir = args.simDir slamDir = args.slamDir outputPDF = args.outputFile conversionRate = args.conversionRate outputPath = os.path.dirname(outputPDF) createDir(outputPath) simulator.plotconversiondifferences(simDir, slamDir, conversionRate, outputPDF) elif (command == "plot.halflifespergene") : bed = args.bed simDir = args.simDir slamDir = args.slamDir outputPDF = args.outputFile conversionRate = args.conversionRate timePoints = args.timepoints outputPath = os.path.dirname(outputPDF) createDir(outputPath) simulator.plotHalfLifes(bed, simDir, slamDir, timePoints, conversionRate, outputPDF) elif (command == "plot.halflifes") : trueHLFile = args.trueHL simHLFile = args.simHL predHLFile = args.predHL outputPDF = args.outputFile erroutputCSV = args.erroutputFile simulator.evalHalfLifes(trueHLFile, simHLFile, predHLFile, outputPDF, erroutputCSV) elif (command == "util.conversionrate") : ref = args.referenceFile bams = args.bam region = args.region region = region.replace(",", "") chromosome = region.split(":")[0] start = int(region.split(":")[1].split("-")[0]) end = int(region.split(":")[1].split("-")[1]) strand = "+" if(args.reverse): strand = "-" for bam in bams: simulator.getConversionRateFromBam(bam, ref, chromosome, start, end, strand) elif (command == "all") : #args.outputDir, args.bed, args.sampleName, args.readLength, args.readNumber, args.readCoverage, args.seqError, args.pulse, args.chase, args.conversionRate referenceFile = args.referenceFile baseFolder = args.outputDir annotationFile = args.bed readLength = args.readLength readCoverage = args.readCoverage sequencingError = args.seqError polyALength = 0 #timePoints = [0, 15, 30, 60, 180, 360, 720, 1440] if not args.pulse == None: timePoints = args.pulse.split(",") chaseTimePoints = [] if len(args.chase) > 0: chaseTimePoints = args.chase.split(",") labledTranscripots = None if not args.rates == None: labledTranscripots = args.rates.split(",") replicates = args.replicates n = args.threads createDir(baseFolder) annotationPrefix = removeExtension(basename(annotationFile)) simulatedAnnotationPref = os.path.join(baseFolder, annotationPrefix) prepareBed(baseFolder, annotationFile, readLength) # TODO parameter to skip this turnOver(baseFolder, simulatedAnnotationPref + "_original.bed", args.minHalfLife, args.maxHalfLife, args.skipTurnover) Utrs(baseFolder, simulatedAnnotationPref + "_original.bed", referenceFile, readLength, polyALength, args.snpRate) sampleFile = open(os.path.join(baseFolder, "samples.tsv"), "w") sampleNumber = 1 jobs = [] if(labledTranscripots == None): for timePoint in timePoints: for replicate in range(1, replicates + 1): sampleName = "sample_" + str(sampleNumber) + "_pulse_" + str(timePoint) + "min_rep" + str(replicate) sampleInfo = SampleInfo(ID = sampleNumber, Name = sampleName, Type = "pulse", Time = str(timePoint)) jobs.append(delayed(reads)(baseFolder, simulatedAnnotationPref + "_original_utrs.bed", sampleName, readLength, 0, readCoverage, sequencingError, int(timePoint), 0, args.conversionRate, sampleInfo)) sampleNumber += 1 print(os.path.join(baseFolder, sampleName + "_reads.bam"), sampleName, "pulse", timePoint, sep="\t", file=sampleFile) for timePoint in chaseTimePoints: for replicate in range(1, replicates + 1): sampleName = "sample_" + str(sampleNumber) + "_chase_" + str(timePoint) + "min_rep" + str(replicate) sampleInfo = SampleInfo(ID = sampleNumber, Name = sampleName, Type = "chase", Time = str(timePoint)) jobs.append(delayed(reads)(baseFolder, simulatedAnnotationPref + "_original_utrs.bed", sampleName, readLength, 0, readCoverage, sequencingError, int(timePoints[-1]), int(timePoint), args.conversionRate, sampleInfo)) sampleNumber += 1 print(os.path.join(baseFolder, sampleName + "_reads.bam"), sampleName, "chase", timePoint, sep="\t", file=sampleFile) else: for rate in labledTranscripots: for replicate in range(1, replicates + 1): sampleName = "sample_" + str(sampleNumber) + "_rate_" + str(rate) + "_rep" + str(replicate) sampleInfo = SampleInfo(ID = sampleNumber, Name = sampleName, Type = "rate", Time = str(rate)) jobs.append(delayed(reads)(baseFolder, simulatedAnnotationPref + "_original_utrs.bed", sampleName, readLength, 0, readCoverage, sequencingError, 0, 0, args.conversionRate, sampleInfo, float(rate))) sampleNumber += 1 print(os.path.join(baseFolder, sampleName + "_reads.bam"), sampleName, "rate", rate, sep="\t", file=sampleFile) sampleFile.close() results = Parallel(n_jobs=n, verbose=False)(jobs) else: parser.error("Too few arguments.")