def assembleWithStringTie(infiles, outfile): infile, reference = infiles job_threads = PARAMS["stringtie_threads"] job_memory = PARAMS["stringtie_memory"] statement = '''stringtie %(infile)s -p %(stringtie_threads)s -G <(zcat %(reference)s) %(stringtie_options)s 2> %(outfile)s.log | gzip > %(outfile)s ''' if infile.endswith(".remote"): token = glob.glob("gdc-user-token*") tmpfilename = P.getTempFilename() if len(token) > 0: token = token[0] else: token = None s, infile = Sra.process_remote_BAM( infile, token, tmpfilename, filter_bed=os.path.join( PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs_bed"])) infile = " ".join(infile) statement = "; checkpoint ;".join( ["mkdir %(tmpfilename)s", s, statement, "rm -r %(tmpfilename)s"]) P.run()
def quantifyWithSalmon(infiles, outfile): '''Quantify existing samples against genesets''' job_threads = 2 job_memory = "16G" infile, gtffile = infiles basefile = os.path.basename(infile) sample_name = basefile.split(os.extsep, 1) sorted_bam = "sorted_bams/" + sample_name[0] + "_sorted.bam" gtfbase = P.snip(os.path.basename(gtffile), ".gz") salmonIndex = "salmon_index/" + gtfbase + ".salmon.index" fastq1 = P.snip(outfile, "_agg-agg-agg") + ".1.fastq" fastq2 = P.snip(outfile, "_agg-agg-agg") + ".2.fastq" salmon_options = PARAMS["salmon_quantoptions"] statement = ''' samtools sort -n %(infile)s -o %(sorted_bam)s; samtools fastq -1 %(fastq1)s -2 %(fastq2)s -0 /dev/null -s /dev/null -n -F 0x900 %(sorted_bam)s; salmon quant -i %(salmonIndex)s --libType IU -1 %(fastq1)s -2 %(fastq2)s -o %(outfile)s %(salmon_options)s; mv %(outfile)s/quant.sf %(outfile)s.sf; rm %(fastq1)s; rm %(fastq2)s; rm %(sorted_bam)s ''' if infile.endswith(".remote"): token = glob.glob("gdc-user-token*") filename = "temp_bams/%s" % basefile tmpfilename = P.get_temp_filename() if os.path.exists(tmpfilename): os.unlink(tmpfilename) if len(token) > 0: token = token[0] else: token = None s, infile = Sra.process_remote_BAM( infile, token, filename, filter_bed=os.path.join( PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs_bed"])) infile = " ".join(infile) statement = "; ".join( ["mkdir %(filename)s", s, statement, "rm -r %(filename)s"]) P.run(statement)
def downloadFiles(infiles, outfile): infile = infiles basefile = os.path.basename(infile) filename = "temp_bams/%s" % basefile baseoutfile = os.path.basename(outfile) outdir = os.path.dirname(outfile) if infile.endswith(".remote"): for line in IOTools.open_file(infile): repo, acc = line.strip().split("\t")[:2] if repo == "SRA": if not os.path.isfile(outfile + ".1.gz"): statement = "; ".join( [Sra.prefetch(acc), Sra.extract(acc, outdir)]) P.run(statement) else: pass elif repo == "GDC": base = os.path.splitext(basefile) outfile = "bam.dir/" + base[0] + ".bam" token = glob.glob("gdc-user-token*") if len(token) > 0: token = token[0] else: token = None s, infile = Sra.process_remote_BAM( infile, token, filename, filter_bed=os.path.join( PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs_bed"])) infile = " ".join(infile) if not os.path.isfile(outfile): statement = "; ".join([ "mkdir -p %(filename)s", s, '''cp %(infile)s %(outfile)s; rm -r %(filename)s''' ]) P.run(statement) else: pass else: raise ValueError("Unknown repository: %s" % repo) else: pass
def assembleWithStringTie(infiles, outfile): infile, reference = infiles basefile = os.path.basename(infile) job_threads = PARAMS["stringtie_threads"] job_memory = PARAMS["stringtie_memory"] tmpfile = P.get_temp_filename() if os.path.exists(tmpfile): os.unlink(tmpfile) statement = ''' portcullis full -t 1 -o portcullis/%(basefile)s/ -r %(portcullis_bedref)s -b %(portcullis_fastaref)s %(infile)s && mv portcullis/%(basefile)s/portcullis.filtered.bam %(tmpfile)s && rm -r portcullis/%(basefile)s/ && stringtie %(tmpfile)s -p %(stringtie_threads)s -G <(zcat %(reference)s) %(stringtie_options)s 2> %(outfile)s.log | gzip > %(outfile)s && rm %(tmpfile)s''' if infile.endswith(".remote"): token = glob.glob("gdc-user-token*") tmpfilename = P.get_temp_filename() if os.path.exists(tmpfilename): os.unlink(tmpfilename) if len(token) > 0: token = token[0] else: token = None s, infile = Sra.process_remote_BAM( infile, token, tmpfilename, filter_bed=os.path.join( PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs_bed"])) infile = " ".join(infile) statement = "; ".join([ "mkdir -p %(tmpfilename)s", s, statement, "rm -r %(tmpfilename)s" ]) P.run(statement)
def quantifyWithSalmon(infiles, outfile): '''Quantify existing samples against genesets''' job_threads = 2 job_memory = "8G" infile, gtffile = infiles basefile = os.path.basename(infile) gtfbase = P.snip(os.path.basename(gtffile), ".gz") salmonIndex = "salmon_index/" + gtfbase + ".salmon.index" fastq1 = P.snip(outfile, "_agg-agg-agg") + ".1.fastq" fastq2 = P.snip(outfile, "_agg-agg-agg") + ".2.fastq" salmon_options = PARAMS["salmon_quantoptions"] statement = ''' samtools fastq -1 %(fastq1)s -2 %(fastq2)s %(infile)s; salmon quant -i %(salmonIndex)s --libType A -1 %(fastq1)s -2 %(fastq2)s -o %(outfile)s --threads %(job_threads)s %(salmon_options)s; checkpoint; mv %(outfile)s/quant.sf %(outfile)s.sf; rm %(fastq1)s; rm %(fastq2)s ''' if infile.endswith(".remote"): token = glob.glob("gdc-user-token*") filename = "temp_bams/%s" % basefile tmpfilename = P.getTempFilename() if len(token) > 0: token = token[0] else: token = None s, infile = Sra.process_remote_BAM( infile, token, filename, filter_bed=os.path.join( PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs_bed"])) infile = " ".join(infile) statement = "; checkpoint; ".join( ["mkdir %(filename)s", s, statement, "rm -r %(filename)s"]) P.run()
def runKalistoOnRemoteBAM(infiles, outfile): '''running kalisto on .bam or .remote files''' job_memory="6G" infile, kallisto_index = infiles outdir = os.path.dirname(outfile) outfile = P.snip(outfile, ".gz") statement = [] tempfastq1 = P.getTempFilename() tempfastq2 = P.getTempFilename() rm_files = [tempfastq1, tempfastq2] if infile.endswith(".remote"): tempbam = P.getTempFilename() s, infiles = Sra.process_remote_BAM(infile, outdir=tempbam) infile = " ".join(infiles) statement.append(s) rm_files.append(tempbam) statement.append('''samtools fastq %(infile)s -1 %(tempfastq1)s -2 %(tempfastq2)s''') statement.append('''kallisto quant -i %(kallisto_index)s -o %(outdir)s %(tempfastq1)s %(tempfastq2)s''') rm_files = " ".join(rm_files) statement.append('''rm -R %(rm_files)s''') statement.append('''gzip %(outfile)s''') statement = "; \n checkpoint;\n".join(statement) P.run()