def assembleWithStringTie(infiles, outfile):

    infile, reference = infiles

    job_threads = PARAMS["stringtie_threads"]
    job_memory = PARAMS["stringtie_memory"]

    statement = '''stringtie %(infile)s
                           -p %(stringtie_threads)s
                           -G <(zcat %(reference)s)
                           %(stringtie_options)s
                           2> %(outfile)s.log
                   | gzip > %(outfile)s '''

    if infile.endswith(".remote"):
        token = glob.glob("gdc-user-token*")
        tmpfilename = P.getTempFilename()
        if len(token) > 0:
            token = token[0]
        else:
            token = None

        s, infile = Sra.process_remote_BAM(
            infile,
            token,
            tmpfilename,
            filter_bed=os.path.join(
                PARAMS["annotations_dir"],
                PARAMS["annotations_interface_contigs_bed"]))

        infile = " ".join(infile)
        statement = "; checkpoint ;".join(
            ["mkdir %(tmpfilename)s", s, statement, "rm -r %(tmpfilename)s"])

    P.run()
Esempio n. 2
0
def quantifyWithSalmon(infiles, outfile):
    '''Quantify existing samples against genesets'''
    job_threads = 2
    job_memory = "16G"

    infile, gtffile = infiles
    basefile = os.path.basename(infile)
    sample_name = basefile.split(os.extsep, 1)
    sorted_bam = "sorted_bams/" + sample_name[0] + "_sorted.bam"
    gtfbase = P.snip(os.path.basename(gtffile), ".gz")
    salmonIndex = "salmon_index/" + gtfbase + ".salmon.index"
    fastq1 = P.snip(outfile, "_agg-agg-agg") + ".1.fastq"
    fastq2 = P.snip(outfile, "_agg-agg-agg") + ".2.fastq"
    salmon_options = PARAMS["salmon_quantoptions"]

    statement = '''
    samtools sort -n %(infile)s -o %(sorted_bam)s;
    samtools fastq
         -1 %(fastq1)s
         -2 %(fastq2)s
         -0 /dev/null -s /dev/null -n -F 0x900
         %(sorted_bam)s; 
    salmon quant -i %(salmonIndex)s
        --libType IU
        -1 %(fastq1)s
        -2 %(fastq2)s
        -o %(outfile)s
        %(salmon_options)s; 
    mv %(outfile)s/quant.sf %(outfile)s.sf; 
    rm %(fastq1)s; rm %(fastq2)s; rm %(sorted_bam)s 
    '''

    if infile.endswith(".remote"):
        token = glob.glob("gdc-user-token*")
        filename = "temp_bams/%s" % basefile
        tmpfilename = P.get_temp_filename()
        if os.path.exists(tmpfilename):
            os.unlink(tmpfilename)

        if len(token) > 0:
            token = token[0]
        else:
            token = None

        s, infile = Sra.process_remote_BAM(
            infile,
            token,
            filename,
            filter_bed=os.path.join(
                PARAMS["annotations_dir"],
                PARAMS["annotations_interface_contigs_bed"]))

        infile = " ".join(infile)
        statement = "; ".join(
            ["mkdir %(filename)s", s, statement, "rm -r %(filename)s"])

    P.run(statement)
Esempio n. 3
0
def downloadFiles(infiles, outfile):

    infile = infiles
    basefile = os.path.basename(infile)
    filename = "temp_bams/%s" % basefile
    baseoutfile = os.path.basename(outfile)
    outdir = os.path.dirname(outfile)

    if infile.endswith(".remote"):
        for line in IOTools.open_file(infile):
            repo, acc = line.strip().split("\t")[:2]
            if repo == "SRA":
                if not os.path.isfile(outfile + ".1.gz"):
                    statement = "; ".join(
                        [Sra.prefetch(acc),
                         Sra.extract(acc, outdir)])
                    P.run(statement)
                else:
                    pass

            elif repo == "GDC":
                base = os.path.splitext(basefile)
                outfile = "bam.dir/" + base[0] + ".bam"

                token = glob.glob("gdc-user-token*")
                if len(token) > 0:
                    token = token[0]
                else:
                    token = None

                s, infile = Sra.process_remote_BAM(
                    infile,
                    token,
                    filename,
                    filter_bed=os.path.join(
                        PARAMS["annotations_dir"],
                        PARAMS["annotations_interface_contigs_bed"]))

                infile = " ".join(infile)
                if not os.path.isfile(outfile):
                    statement = "; ".join([
                        "mkdir -p %(filename)s", s,
                        '''cp %(infile)s %(outfile)s;
                            rm -r %(filename)s'''
                    ])
                    P.run(statement)
                else:
                    pass

            else:
                raise ValueError("Unknown repository: %s" % repo)
    else:
        pass
def assembleWithStringTie(infiles, outfile):

    infile, reference = infiles
    basefile = os.path.basename(infile)
    job_threads = PARAMS["stringtie_threads"]
    job_memory = PARAMS["stringtie_memory"]
    tmpfile = P.get_temp_filename()
    if os.path.exists(tmpfile):
        os.unlink(tmpfile)

    statement = '''
                    portcullis full 
                            -t 1
                            -o portcullis/%(basefile)s/
                            -r %(portcullis_bedref)s
                            -b 
                            %(portcullis_fastaref)s
                            %(infile)s &&
                    mv portcullis/%(basefile)s/portcullis.filtered.bam %(tmpfile)s &&
                    rm -r portcullis/%(basefile)s/ &&
                    stringtie %(tmpfile)s
                           -p %(stringtie_threads)s
                           -G <(zcat %(reference)s)
                           %(stringtie_options)s
                           2> %(outfile)s.log
                   | gzip > %(outfile)s &&
                   rm %(tmpfile)s'''

    if infile.endswith(".remote"):
        token = glob.glob("gdc-user-token*")
        tmpfilename = P.get_temp_filename()
        if os.path.exists(tmpfilename):
            os.unlink(tmpfilename)
        if len(token) > 0:
            token = token[0]
        else:
            token = None

        s, infile = Sra.process_remote_BAM(
            infile,
            token,
            tmpfilename,
            filter_bed=os.path.join(
                PARAMS["annotations_dir"],
                PARAMS["annotations_interface_contigs_bed"]))

        infile = " ".join(infile)
        statement = "; ".join([
            "mkdir -p %(tmpfilename)s", s, statement, "rm -r %(tmpfilename)s"
        ])

    P.run(statement)
Esempio n. 5
0
def quantifyWithSalmon(infiles, outfile):
    '''Quantify existing samples against genesets'''
    job_threads = 2
    job_memory = "8G"

    infile, gtffile = infiles
    basefile = os.path.basename(infile)
    gtfbase = P.snip(os.path.basename(gtffile), ".gz")
    salmonIndex = "salmon_index/" + gtfbase + ".salmon.index"
    fastq1 = P.snip(outfile, "_agg-agg-agg") + ".1.fastq"
    fastq2 = P.snip(outfile, "_agg-agg-agg") + ".2.fastq"
    salmon_options = PARAMS["salmon_quantoptions"]

    statement = '''
    samtools fastq
         -1 %(fastq1)s
         -2 %(fastq2)s
         %(infile)s; 
    salmon quant -i %(salmonIndex)s
        --libType A
        -1 %(fastq1)s
        -2 %(fastq2)s
        -o %(outfile)s
        --threads %(job_threads)s
        %(salmon_options)s; 
    checkpoint; 
    mv %(outfile)s/quant.sf %(outfile)s.sf; 
    rm %(fastq1)s; rm %(fastq2)s 
    '''

    if infile.endswith(".remote"):
        token = glob.glob("gdc-user-token*")
        filename = "temp_bams/%s" % basefile
        tmpfilename = P.getTempFilename()
        if len(token) > 0:
            token = token[0]
        else:
            token = None

        s, infile = Sra.process_remote_BAM(
            infile,
            token,
            filename,
            filter_bed=os.path.join(
                PARAMS["annotations_dir"],
                PARAMS["annotations_interface_contigs_bed"]))

        infile = " ".join(infile)
        statement = "; checkpoint; ".join(
            ["mkdir %(filename)s", s, statement, "rm -r %(filename)s"])

    P.run()
def runKalistoOnRemoteBAM(infiles, outfile):
    '''running kalisto on .bam or .remote files'''

    job_memory="6G"
    infile, kallisto_index = infiles

    outdir = os.path.dirname(outfile)
    outfile = P.snip(outfile, ".gz")

    statement = []
    

    tempfastq1 = P.getTempFilename()
    tempfastq2 = P.getTempFilename()
    rm_files = [tempfastq1, tempfastq2]
    
    
    
    if infile.endswith(".remote"):
            tempbam = P.getTempFilename()
	    s, infiles = Sra.process_remote_BAM(infile, outdir=tempbam)
    	    infile = " ".join(infiles)
            statement.append(s)
            rm_files.append(tempbam)


    statement.append('''samtools fastq %(infile)s 
                                      -1 %(tempfastq1)s 
                                      -2 %(tempfastq2)s''')

    statement.append('''kallisto quant -i %(kallisto_index)s
                                       -o %(outdir)s
                                       %(tempfastq1)s %(tempfastq2)s''')
    rm_files = " ".join(rm_files)
    statement.append('''rm -R %(rm_files)s''')
    statement.append('''gzip %(outfile)s''')
    statement = "; \n checkpoint;\n".join(statement)

    P.run()