def downloadFiles(infiles, outfile): infile = infiles basefile = os.path.basename(infile) filename = "temp_bams/%s" % basefile baseoutfile = os.path.basename(outfile) outdir = os.path.dirname(outfile) if infile.endswith(".remote"): for line in IOTools.open_file(infile): repo, acc = line.strip().split("\t")[:2] if repo == "SRA": if not os.path.isfile(outfile + ".1.gz"): statement = "; ".join( [Sra.prefetch(acc), Sra.extract(acc, outdir)]) P.run(statement) else: pass elif repo == "GDC": base = os.path.splitext(basefile) outfile = "bam.dir/" + base[0] + ".bam" token = glob.glob("gdc-user-token*") if len(token) > 0: token = token[0] else: token = None s, infile = Sra.process_remote_BAM( infile, token, filename, filter_bed=os.path.join( PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs_bed"])) infile = " ".join(infile) if not os.path.isfile(outfile): statement = "; ".join([ "mkdir -p %(filename)s", s, '''cp %(infile)s %(outfile)s; rm -r %(filename)s''' ]) P.run(statement) else: pass else: raise ValueError("Unknown repository: %s" % repo) else: pass
def downloadSequinsNeatData(outfile): ''' Download the neat Sequins data from NCBI''' address_base = 'ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX189' outfile2srr = { 'neat-A.fastq.1.gz': 'SRR3743147', 'neat-B.fastq.1.gz': 'SRR3743148' } srr2srx = {'SRR3743147': 'SRX1897294', 'SRR3743148': 'SRX1897295'} outfile_base = os.path.basename(outfile) srr = outfile2srr[outfile_base] srx = srr2srx[srr] outfile_name = P.snip(outfile_base, '.fastq.1.gz') statement = ''' wget %(address_base)s/%(srx)s/%(srr)s/%(srr)s.sra -O %(outfile_name)s.sra ''' P.run() outdir = os.path.dirname(outfile) statement = Sra.extract(outfile_name + '.sra', outdir) P.run() statement = ''' mv %(outdir)s/%(outfile_name)s_1.fastq.gz %(outdir)s/%(outfile_name)s.fastq.1.gz; checkpoint; mv %(outdir)s/%(outfile_name)s_2.fastq.gz %(outdir)s/%(outfile_name)s.fastq.2.gz''' P.run() os.unlink(outfile_name + '.sra')
def extractGSE65525(infile, outfile): ''' extract fastqs ''' statement = SRA.extract(infile, "GSE65525/fastqs.dir") P.run()
def extractGGSE53638(infile, outfile): ''' extract the fastqs from the SRA ''' statement = SRA.extract(infile, "GSE53638/fastqs.dir") P.run()