Esempio n. 1
0
def downloadFiles(infiles, outfile):

    infile = infiles
    basefile = os.path.basename(infile)
    filename = "temp_bams/%s" % basefile
    baseoutfile = os.path.basename(outfile)
    outdir = os.path.dirname(outfile)

    if infile.endswith(".remote"):
        for line in IOTools.open_file(infile):
            repo, acc = line.strip().split("\t")[:2]
            if repo == "SRA":
                if not os.path.isfile(outfile + ".1.gz"):
                    statement = "; ".join(
                        [Sra.prefetch(acc),
                         Sra.extract(acc, outdir)])
                    P.run(statement)
                else:
                    pass

            elif repo == "GDC":
                base = os.path.splitext(basefile)
                outfile = "bam.dir/" + base[0] + ".bam"

                token = glob.glob("gdc-user-token*")
                if len(token) > 0:
                    token = token[0]
                else:
                    token = None

                s, infile = Sra.process_remote_BAM(
                    infile,
                    token,
                    filename,
                    filter_bed=os.path.join(
                        PARAMS["annotations_dir"],
                        PARAMS["annotations_interface_contigs_bed"]))

                infile = " ".join(infile)
                if not os.path.isfile(outfile):
                    statement = "; ".join([
                        "mkdir -p %(filename)s", s,
                        '''cp %(infile)s %(outfile)s;
                            rm -r %(filename)s'''
                    ])
                    P.run(statement)
                else:
                    pass

            else:
                raise ValueError("Unknown repository: %s" % repo)
    else:
        pass
def downloadSequinsNeatData(outfile):
    ''' Download the neat Sequins data from NCBI'''

    address_base = 'ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX189'

    outfile2srr = {
        'neat-A.fastq.1.gz': 'SRR3743147',
        'neat-B.fastq.1.gz': 'SRR3743148'
    }

    srr2srx = {'SRR3743147': 'SRX1897294', 'SRR3743148': 'SRX1897295'}

    outfile_base = os.path.basename(outfile)

    srr = outfile2srr[outfile_base]
    srx = srr2srx[srr]

    outfile_name = P.snip(outfile_base, '.fastq.1.gz')

    statement = '''
    wget %(address_base)s/%(srx)s/%(srr)s/%(srr)s.sra
    -O %(outfile_name)s.sra
    '''
    P.run()

    outdir = os.path.dirname(outfile)
    statement = Sra.extract(outfile_name + '.sra', outdir)
    P.run()

    statement = '''
    mv %(outdir)s/%(outfile_name)s_1.fastq.gz
    %(outdir)s/%(outfile_name)s.fastq.1.gz; checkpoint;
    mv %(outdir)s/%(outfile_name)s_2.fastq.gz
    %(outdir)s/%(outfile_name)s.fastq.2.gz'''
    P.run()

    os.unlink(outfile_name + '.sra')
Esempio n. 3
0
def extractGSE65525(infile, outfile):
    ''' extract fastqs '''
    statement = SRA.extract(infile, "GSE65525/fastqs.dir")

    P.run()
Esempio n. 4
0
def extractGGSE53638(infile, outfile):
    ''' extract the fastqs from the SRA '''

    statement = SRA.extract(infile, "GSE53638/fastqs.dir")

    P.run()
def extractGSE65525(infile, outfile):
    ''' extract fastqs '''
    statement = SRA.extract(infile, "GSE65525/fastqs.dir")

    P.run()
def extractGGSE53638(infile, outfile):
    ''' extract the fastqs from the SRA '''

    statement = SRA.extract(infile, "GSE53638/fastqs.dir")

    P.run()