Ejemplo n.º 1
0
def assemble(samples, data_dir, results_dir, seed_fa):
    """assemble using SSAKE."""
    # jobs = []
    for sample in samples:
        fastas = ngseq.getfilelist(datadir, sample + ".jnd.fa.gz")
        assert(len(fastas) == 1)
        gzipfasta = fastas[0]
        outdir = "%s/%s" % (results_dir, sample)
        fasta = outdir + "/" + op.splitext(op.basename(gzipfasta))[0]
        if not op.exists(fasta):
            bsub.poll(ngseq.extract(gzipfasta, fasta))
        cmd = "SSAKE -f " + fasta + " -s " + seed_fa + " -m 40 -o 50 -r 0.8 -b " + sample + " -p 1 -v 1 -d 200 -e 0.75 -k 10 -a 0.5 -x 50"
        jobid = bsub("3prime_seed_extension", cwd=outdir, R="select[mem>16] rusage[mem=16] span[hosts=1]", verbose=True)(cmd)
Ejemplo n.º 2
0
def join(samples, datadir, script):
    """joins paired-end data into SSAKE format."""
    jobs = []
    sub = bsub("join_reads", verbose=True)
    for sample in samples:
        # sort for ordering: R1 then R2
        fastqs = sorted(ngseq.getfilelist(datadir, sample + "_*.trm.fq.gz"))
        # check for output
        joinresult = datadir + "/" + sample + ".jnd.fa.gz"
        if op.exists(joinresult) or op.exists(joinresult + ".gz"): continue
        assert(len(fastqs) == 2)
        # usage: join_reads.py R1 R2 --insert 200
        cmd = "python " + script + " " + " ".join(fastqs) + " | gzip -c > " + joinresult
        jobs.append(sub(cmd))
    return jobs