Ejemplos de sh en Python, ejemplos de maize.apps.base.sh en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: genome.py Proyecto: orionzhou/robin

def clean_fasta(args):
    dirw = make_genomedir(args.species)
    os.chdir(dirw)
    for fname in ["raw.fix.fas.index", "11_genome.fas.index"]:
        if op.isfile(fname):
            os.remove(fname)
    if op.islink("10_genome.fna"): os.unlink("10_genome.fna")
   
    if op.isfile("10_genome.fna") and not args.overwrite:
        logging.debug("10_genome.fna already exits: skipped")
    elif op.isfile("08_seq_map/renamed.fna"):
        sh("ln -sf 08_seq_map/renamed.fna 10_genome.fna")
        if op.isfile("08_seq_map/renamed.sizes"):
            sh("ln -sf 08_seq_map/renamed.sizes 10_genome.sizes")
    else:
        logging.error("08_seq_map/renamed.fna not there")
        sys.exit(1)
    
    if not op.isdir("15_intervals"):
        mkdir("15_intervals")
    
    if op.isfile("15_intervals/01.chrom.bed") and not args.overwrite:
        logging.debug("01.chrom.bed already exits - skipped")
    else:
        sh("fasta size --bed 10_genome.fna > 15_intervals/01.chrom.bed")
    if op.isfile("15_intervals/01.chrom.sizes") and not args.overwrite:
        logging.debug("01.chrom.sizes already exits - skipped")
    else:
        sh("faSize -detailed 10_genome.fna > 15_intervals/01.chrom.sizes")
    
    if op.isfile("15_intervals/11.gap.bed") and not args.overwrite:
        logging.debug("11.gap.bed already exits - skipped")
    else:
        sh("fasta gaps 10_genome.fna > 15_intervals/11.gap.bed")

Ejemplo n.º 2

0

Mostrar archivo

def fpkm(args):
    """
    %prog fpkm fastafile *.bam

    Calculate FPKM values from BAM file.
    """
    p = OptionParser(fpkm.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile = args[0]
    bamfiles = args[1:]
    # Create a DUMMY gff file for cuffdiff
    gffile = fastafile.rsplit(".", 1)[0] + ".gff"
    if need_update(fastafile, gffile):
        fw = open(gffile, "w")
        f = Fasta(fastafile, lazy=True)
        for key, size in f.itersizes_ordered():
            print >> fw, "\t".join(str(x) for x in (key, "dummy", "transcript",\
                1, size, ".", ".", ".", "ID=" + key))
        fw.close()
        logging.debug("Dummy GFF created: {0}".format(gffile))

    cmd = "cuffdiff {0} {1}".format(gffile, " ".join(bamfiles))
    sh(cmd)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def fastq(args):
    """
    %prog fastq bamfile prefix

    Convert BAM files to paired FASTQ files.
    """
    p = OptionParser(fastq.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bamfile, pf = args
    singletons = pf + ".se.fastq"
    a = pf + ".read1.fastq"
    b = pf + ".read2.fastq"

    cmd  = "samtools collate -uOn 128 {} tmp-prefix".format(bamfile)
    cmd += " | samtools fastq -s {} -1 {} -2 {} -"\
                .format(singletons, a, b)
    sh(cmd)

    if os.stat(singletons).st_size == 0:  # singleton file is empty
        os.remove(singletons)
    return a, b

Ejemplo n.º 4

0

Mostrar archivo

def fastq(args):
    """
    %prog fastq bamfile prefix

    Convert BAM files to paired FASTQ files.
    """
    p = OptionParser(fastq.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bamfile, pf = args
    singletons = pf + ".se.fastq"
    a = pf + ".read1.fastq"
    b = pf + ".read2.fastq"

    cmd = "samtools collate -uOn 128 {} tmp-prefix".format(bamfile)
    cmd += " | samtools fastq -s {} -1 {} -2 {} -"\
                .format(singletons, a, b)
    sh(cmd)

    if os.stat(singletons).st_size == 0:  # singleton file is empty
        os.remove(singletons)
    return a, b

Ejemplo n.º 5

0

Mostrar archivo

def clean_fasta(args):
    dirw = make_genomedir(args.species)
    os.chdir(dirw)
    for fname in ["raw.fix.fas.index", "11_genome.fas.index"]:
        if op.isfile(fname):
            os.remove(fname)
    if op.islink("10_genome.fna"): os.unlink("10_genome.fna")

    if op.isfile("10_genome.fna") and not args.overwrite:
        logging.debug("10_genome.fna already exits: skipped")
    elif op.isfile("08_seq_map/renamed.fna"):
        sh("ln -sf 08_seq_map/renamed.fna 10_genome.fna")
        if op.isfile("08_seq_map/renamed.sizes"):
            sh("ln -sf 08_seq_map/renamed.sizes 10_genome.sizes")
    else:
        logging.error("08_seq_map/renamed.fna not there")
        sys.exit(1)

    if not op.isdir("15_intervals"):
        mkdir("15_intervals")

    if op.isfile("15_intervals/01.chrom.bed") and not args.overwrite:
        logging.debug("01.chrom.bed already exits - skipped")
    else:
        sh("fasta size --bed 10_genome.fna > 15_intervals/01.chrom.bed")
    if op.isfile("15_intervals/01.chrom.sizes") and not args.overwrite:
        logging.debug("01.chrom.sizes already exits - skipped")
    else:
        sh("faSize -detailed 10_genome.fna > 15_intervals/01.chrom.sizes")

    if op.isfile("15_intervals/11.gap.bed") and not args.overwrite:
        logging.debug("11.gap.bed already exits - skipped")
    else:
        sh("fasta gaps 10_genome.fna > 15_intervals/11.gap.bed")

Ejemplo n.º 6

0

Mostrar archivo

def run_megablast(infile=None, outfile=None, db=None, wordsize=None, \
        pctid=98, hitlen=100, best=None, evalue=0.01, task="megablast", cpus=16):

    assert db, "Need to specify database fasta file."

    db = get_abs_path(db)
    nin = db + ".nin"
    nin00 = db + ".00.nin"
    nin = nin00 if op.exists(nin00) else (db + ".nin")
    run_formatdb(infile=db, outfile=nin)

    cmd = "blastn"
    cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile)
    cmd += " -evalue {0} -outfmt 6 -num_threads {1}".format(evalue, cpus)
    cmd += " -task {0}".format(task)
    if wordsize:
        cmd += " -word_size {0}".format(wordsize)
    if pctid:
        cmd += " -perc_identity {0}".format(pctid)
    if best:
        cmd += " -max_target_seqs {0}".format(best)
    sh(cmd)

    if pctid and hitlen:
        blastfile = outfile
        filtered_blastfile = outfile + ".P{0}L{1}".format(pctid, hitlen)
        run_blast_filter(infile=blastfile, outfile=filtered_blastfile,
                pctid=pctid, hitlen=hitlen)
        shutil.move(filtered_blastfile, blastfile)

Ejemplo n.º 7

0

Mostrar archivo

def consensus(args):
    """
    %prog consensus fastafile bamfile

    Convert bam alignments to consensus FASTQ/FASTA.
    """
    p = OptionParser(consensus.__doc__)
    sp1.add_argument(
        "--fasta",
        default=False,
        action="store_true",
        help="Generate consensus FASTA sequences [default: %default]")
    sp1.add_argument("--mask",
                     default=0,
                     type="int",
                     help="Mask bases with quality lower than")
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile, bamfile = args
    fasta = args.fasta
    suffix = "fasta" if fasta else "fastq"
    pf = bamfile.rsplit(".", 1)[0]
    cnsfile = pf + ".cns.{0}".format(suffix)
    vcfgzfile = pf + ".vcf.gz"
    vcf([fastafile, bamfile, "-o", vcfgzfile])
    cmd += "zcat {0} | vcfutils.pl vcf2fq".format(vcfgzfile)
    if fasta:
        cmd += " | seqtk seq -q {0} -A -".format(args.mask)

    sh(cmd, outfile=cnsfile)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: aws.py Proyecto: orionzhou/robin

def push_to_s3(s3_store, obj_name):
    cmd = "sync" if op.isdir(obj_name) else "cp"
    s3address = "{0}/{1}".format(s3_store, obj_name)
    s3address = s3ify(s3address)
    cmd = "aws s3 {0} {1} {2} --sse".format(cmd, obj_name, s3address)
    sh(cmd)
    return s3address

Ejemplo n.º 9

0

Mostrar archivo

Archivo: fastq.py Proyecto: shanwai1234/maize

def trim(args):
    """
    %prog trim fastqfile

    Wraps `fastx_trimmer` to trim from begin or end of reads.
    """
    p = OptionParser(trim.__doc__)
    sp1.add_argument("-f",
                     dest="first",
                     default=0,
                     type="int",
                     help="First base to keep. Default is 1.")
    sp1.add_argument("-l",
                     dest="last",
                     default=0,
                     type="int",
                     help="Last base to keep. Default is entire read.")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastqfile, = args
    obfastqfile = op.basename(fastqfile)
    fq = obfastqfile.rsplit(".", 1)[0] + ".ntrimmed.fastq"
    if fastqfile.endswith(".gz"):
        fq = obfastqfile.rsplit(".", 2)[0] + ".ntrimmed.fastq.gz"

    cmd = "fastx_trimmer -Q33 "
    if args.first:
        cmd += "-f {0.first} ".format(opts)
    if args.last:
        cmd += "-l {0.last} ".format(opts)

    sh(cmd, infile=fastqfile, outfile=fq)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def consensus(args):
    """
    %prog consensus fastafile bamfile

    Convert bam alignments to consensus FASTQ/FASTA.
    """
    p = OptionParser(consensus.__doc__)
    sp1.add_argument("--fasta", default=False, action="store_true",
            help="Generate consensus FASTA sequences [default: %default]")
    sp1.add_argument("--mask", default=0, type="int",
            help="Mask bases with quality lower than")
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile, bamfile = args
    fasta = args.fasta
    suffix = "fasta" if fasta else "fastq"
    pf = bamfile.rsplit(".", 1)[0]
    cnsfile = pf + ".cns.{0}".format(suffix)
    vcfgzfile = pf + ".vcf.gz"
    vcf([fastafile, bamfile, "-o", vcfgzfile])
    cmd += "zcat {0} | vcfutils.pl vcf2fq".format(vcfgzfile)
    if fasta:
        cmd += " | seqtk seq -q {0} -A -".format(args.mask)

    sh(cmd, outfile=cnsfile)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: coords.py Proyecto: shanwai1234/maize

def merge(args):
    """
    %prog merge ref.fasta query.fasta *.delta

    Merge delta files into a single delta.
    """
    p = OptionParser(merge.__doc__)
    p.set_outfile(outfile="merged_results.delta")
    opts, args = p.parse_args(args)

    if len(args) < 3:
        sys.exit(not p.print_help())

    ref, query = args[:2]
    deltafiles = args[2:]
    outfile = args.outfile

    ref = get_abs_path(ref)
    query = get_abs_path(query)
    fw = must_open(outfile, "w")
    print >> fw, " ".join((ref, query))
    print >> fw, "NUCMER"
    fw.close()

    for d in deltafiles:
        cmd = "awk 'NR > 2 {{print $0}}' {0}".format(d)
        sh(cmd, outfile=outfile, append=True)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def fpkm(args):
    """
    %prog fpkm fastafile *.bam

    Calculate FPKM values from BAM file.
    """
    p = OptionParser(fpkm.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile = args[0]
    bamfiles = args[1:]
    # Create a DUMMY gff file for cuffdiff
    gffile = fastafile.rsplit(".", 1)[0] + ".gff"
    if need_update(fastafile, gffile):
        fw = open(gffile, "w")
        f = Fasta(fastafile, lazy=True)
        for key, size in f.itersizes_ordered():
            print >> fw, "\t".join(str(x) for x in (key, "dummy", "transcript",\
                1, size, ".", ".", ".", "ID=" + key))
        fw.close()
        logging.debug("Dummy GFF created: {0}".format(gffile))

    cmd = "cuffdiff {0} {1}".format(gffile, " ".join(bamfiles))
    sh(cmd)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: chain.py Proyecto: orionzhou/robin

def blat(args):
    """
    %prog blat old.fasta new.fasta

    Generate psl file using blat.
    """
    p = OptionParser(blat.__doc__)
    p.add_option("--minscore", default=100, type="int",
                 help="Matches minus mismatches gap penalty [default: %default]")
    p.add_option("--minid", default=98, type="int",
                 help="Minimum sequence identity [default: %default]")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    oldfasta, newfasta = args
    twobitfiles = []
    for fastafile in args:
        tbfile = faToTwoBit(fastafile)
        twobitfiles.append(tbfile)

    oldtwobit, newtwobit = twobitfiles
    cmd = "pblat -threads={0}".format(opts.cpus) if which("pblat") else "blat"
    cmd += " {0} {1}".format(oldtwobit, newfasta)
    cmd += " -tileSize=12 -minScore={0} -minIdentity={1} ".\
                format(opts.minscore, opts.minid)
    pslfile = "{0}.{1}.psl".format(*(op.basename(x).split('.')[0] \
                for x in (newfasta, oldfasta)))
    cmd += pslfile
    sh(cmd)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def get_minibam_bed(bamfile, bedfile, minibam=None):
    """ samtools view -L could do the work, but it is NOT random access. Here we
    are processing multiple regions sequentially. See also:

    https://www.biostars.org/p/49306/
    """
    pf = op.basename(bedfile).split(".")[0]
    minibamfile = minibam or op.basename(bamfile).replace(".bam", ".{}.bam".format(pf))
    minisamfile = minibam.replace(".bam", ".sam")
    baifile = minibamfile + ".bai"
    if op.exists(baifile):
        sh("rm {}".format(baifile))

    cmd = "samtools view -H {} > {}".format(bamfile, minisamfile)
    sh(cmd)

    cmd = "cat {}".format(bedfile)
    cmd += " | perl -lane 'print \"$F[0]:$F[1]-$F[2]\"'"
    cmd += " | xargs -n1 -t -I \{\}"
    cmd += " samtools view {}".format(bamfile)
    cmd += " \{\} >> " + minisamfile
    sh(cmd)

    cmd = "samtools view {} -b".format(minisamfile)
    cmd += " | samtools sort -"
    cmd += " -o {0}".format(minibamfile)

    sh(cmd)
    sh("samtools index {0}".format(minibamfile))
    return minibamfile

Ejemplo n.º 15

0

Mostrar archivo

Archivo: fastq.py Proyecto: shanwai1234/maize

def first(args):
    """
    %prog first N fastqfile(s)

    Get first N reads from file.
    """
    from maize.apps.base import need_update

    p = OptionParser(first.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    N = int(args[0])
    nlines = N * 4
    fastqfiles = args[1:]
    fastqfile = fastqfiles[0]
    outfile = args.outfile
    if not need_update(fastqfiles, outfile):
        logging.debug("File `{0}` exists. Will not overwrite.".format(outfile))
        return

    gz = fastqfile.endswith(".gz")
    for fastqfile in fastqfiles:
        if gz:
            cmd = "zcat {0} | head -n {1}".format(fastqfile, nlines)
        else:
            cmd = "head -n {0} {1}".format(nlines, fastqfile)

        sh(cmd, outfile=args.outfile, append=True)

Ejemplo n.º 16

0

Mostrar archivo

def get_minibam_bed(bamfile, bedfile, minibam=None):
    """ samtools view -L could do the work, but it is NOT random access. Here we
    are processing multiple regions sequentially. See also:

    https://www.biostars.org/p/49306/
    """
    pf = op.basename(bedfile).split(".")[0]
    minibamfile = minibam or op.basename(bamfile).replace(
        ".bam", ".{}.bam".format(pf))
    minisamfile = minibam.replace(".bam", ".sam")
    baifile = minibamfile + ".bai"
    if op.exists(baifile):
        sh("rm {}".format(baifile))

    cmd = "samtools view -H {} > {}".format(bamfile, minisamfile)
    sh(cmd)

    cmd = "cat {}".format(bedfile)
    cmd += " | perl -lane 'print \"$F[0]:$F[1]-$F[2]\"'"
    cmd += " | xargs -n1 -t -I \{\}"
    cmd += " samtools view {}".format(bamfile)
    cmd += " \{\} >> " + minisamfile
    sh(cmd)

    cmd = "samtools view {} -b".format(minisamfile)
    cmd += " | samtools sort -"
    cmd += " -o {0}".format(minibamfile)

    sh(cmd)
    sh("samtools index {0}".format(minibamfile))
    return minibamfile

Ejemplo n.º 17

0

Mostrar archivo

Archivo: aws.py Proyecto: orionzhou/robin

def sync_from_s3(s3_store, target_dir=None):
    s3_store = s3_store.rstrip("/")
    s3_store = s3ify(s3_store)
    if target_dir is None:
        target_dir = op.basename(s3_store)
    cmd = "aws s3 sync {}/ {}/".format(s3_store, target_dir)
    sh(cmd)
    return target_dir

Ejemplo n.º 18

0

Mostrar archivo

def build_bwa(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/bwa")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)

    if op.isfile("db.bwt") and not args.overwrite:
        logging.debug("db.bwt already exists - skipped")
    else:
        sh("bwa index -a bwtsw -p %s/db %s" % (dirw, fg))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: genome.py Proyecto: orionzhou/robin

def build_bwa(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/bwa")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)
   
    if op.isfile("db.bwt") and not args.overwrite:
        logging.debug("db.bwt already exists - skipped")
    else:
        sh("bwa index -a bwtsw -p %s/db %s" % (dirw, fg))

Ejemplo n.º 20

0

Mostrar archivo

Archivo: coords.py Proyecto: shanwai1234/maize

def filter(args):
    """
    %prog filter <deltafile|coordsfile>

    Produce a new delta/coords file and filter based on id% or cov%.
    Use `delta-filter` for .delta file.
    """
    p = OptionParser(filter.__doc__)
    p.set_align(pctid=0, hitlen=0)
    sp1.add_argument("--overlap", default=False, action="store_true",
            help="Print overlap status (e.g. terminal, contained)")

    opts, args = p.parse_args(args)
    if len(args) != 1:
        sys.exit(not p.print_help())

    pctid = args.pctid
    hitlen = args.hitlen

    filename, = args
    if pctid == 0 and hitlen == 0:
        return filename

    pf, suffix = filename.rsplit(".", 1)
    outfile = "".join((pf, ".P{0}L{1}.".format(int(pctid), int(hitlen)), suffix))
    if not need_update(filename, outfile):
        return outfile

    if suffix == "delta":
        cmd = "delta-filter -i {0} -l {1} {2}".format(pctid, hitlen, filename)
        sh(cmd, outfile=outfile)
        return outfile

    fp = open(filename)
    fw = must_open(outfile, "w")
    for row in fp:
        try:
            c = CoordsLine(row)
        except AssertionError:
            continue

        if c.identity < pctid:
            continue
        if c.len2 < hitlen:
            continue
        if args.overlap and not c.overlap:
            continue

        outrow = row.rstrip()
        if args.overlap:
            ov = Overlap_types[c.overlap]
            outrow += "\t" + ov
        print >> fw, outrow

    return outfile

Ejemplo n.º 21

0

Mostrar archivo

Archivo: genome.py Proyecto: orionzhou/robin

def build_blat(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/blat")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)
   
    if not args.overwrite and op.isfile('db.2bit'):
        logging.debug("db.2bit already exists - skipped")
    else:
        sh("faToTwoBit %s db.2bit" % fg)
        sh("blat db.2bit tmp.fas tmp.out -makeOoc=db.2bit.tile11.ooc")
    if op.isfile("tmp.out"): os.remove("tmp.out")

Ejemplo n.º 22

0

Mostrar archivo

def build_blat(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/blat")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)

    if not args.overwrite and op.isfile('db.2bit'):
        logging.debug("db.2bit already exists - skipped")
    else:
        sh("faToTwoBit %s db.2bit" % fg)
        sh("blat db.2bit tmp.fas tmp.out -makeOoc=db.2bit.tile11.ooc")
    if op.isfile("tmp.out"): os.remove("tmp.out")

Ejemplo n.º 23

0

Mostrar archivo

Archivo: fastq.py Proyecto: shanwai1234/maize

def fasta(args):
    """
    %prog fasta fastqfiles

    Convert fastq to fasta and qual file.
    """
    p = OptionParser(fasta.__doc__)
    sp1.add_argument("--seqtk",
                     default=False,
                     action="store_true",
                     help="Use seqtk to convert")
    p.set_outdir()
    p.set_outfile(outfile=None)
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fastqfiles = args
    outdir = args.outdir
    if outdir and outdir != ".":
        mkdir(outdir)

    fastqfile = fastqfiles[0]
    pf = op.basename(fastqfile)
    gzinput = pf.endswith(".gz")
    if gzinput:
        pf = pf.rsplit(".", 1)[0]

    pf, sf = pf.rsplit(".", 1)
    if sf not in ("fq", "fastq"):
        logging.debug("Assumed FASTA: suffix not `fq` or `fastq`")
        return fastqfile, None

    fastafile, qualfile = pf + ".fasta", pf + ".qual"
    outfile = args.outfile or fastafile
    outfile = op.join(outdir, outfile)
    if args.seqtk:
        if need_update(fastqfiles, outfile):
            for i, fastqfile in enumerate(fastqfiles):
                cmd = "seqtk seq -A {0} -L 30 -l 70".format(fastqfile)
                # First one creates file, following ones append to it
                sh(cmd, outfile=outfile, append=i)
        else:
            logging.debug("Outfile `{0}` already exists.".format(outfile))
        return outfile, None

    for fastqfile in fastqfiles:
        SeqIO.convert(fastqfile, "fastq", fastafile, "fasta")
        SeqIO.convert(fastqfile, "fastq", qualfile, "qual")

    return fastafile, qualfile

Ejemplo n.º 24

0

Mostrar archivo

def run_blat(infile=None, outfile=None, db="UniVec_Core",
             pctid=95, hitlen=50, cpus=16, overwrite=True):

    cmd = "pblat -threads={0}".format(cpus) if which("pblat") else "blat"
    cmd += ' {0} {1} -out=blast8 {2}'.format(db, infile, outfile)
    sh(cmd)

    blatfile = outfile
    filtered_blatfile = outfile + ".P{0}L{1}".format(pctid, hitlen)
    run_blast_filter(infile=blatfile, outfile=filtered_blatfile,
            pctid=pctid, hitlen=hitlen)
    if overwrite:
        shutil.move(filtered_blatfile, blatfile)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: aws.py Proyecto: orionzhou/robin

def pull_from_s3(s3_store, file_name=None, overwrite=True):
    is_dir = s3_store.endswith("/")
    if is_dir:
        s3_store = s3_store.rstrip("/")
    file_name = file_name or s3_store.split("/")[-1]
    if not op.exists(file_name):
        s3_store = s3ify(s3_store)
        if overwrite or (not op.exists(file_name)):
            cmd = "aws s3 cp {0} {1} --sse".format(s3_store, file_name)
            if is_dir:
                cmd += " --recursive"
            sh(cmd)
    return op.abspath(file_name)

Ejemplo n.º 26

0

Mostrar archivo

def build_gatk(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/gatk")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)

    if op.isfile("db.dict") and not args.overwrite:
        logging.debug("db.dict already exists - skipped")
    else:
        if op.exists("db.fasta"): sh("rm db.fasta")
        if op.exists("db.dict"): sh("rm db.dict")
        sh("cp ../../10_genome.fna db.fasta")
        sh("gatk CreateSequenceDictionary -R db.fasta")
        sh("samtools faidx db.fasta")

Ejemplo n.º 27

0

Mostrar archivo

Archivo: genome.py Proyecto: orionzhou/robin

def build_gatk(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/gatk")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)
   
    if op.isfile("db.dict") and not args.overwrite:
        logging.debug("db.dict already exists - skipped")
    else:
        if op.exists("db.fasta"): sh("rm db.fasta")
        if op.exists("db.dict"): sh("rm db.dict")
        sh("cp ../../10_genome.fna db.fasta")
        sh("gatk CreateSequenceDictionary -R db.fasta")
        sh("samtools faidx db.fasta")

Ejemplo n.º 28

0

Mostrar archivo

Archivo: chain.py Proyecto: orionzhou/robin

def chainstat(args):
    sh("chain 2bed %s > tmp.bed" % args.fi)
    logging.debug("total size")
    sh("bed size tmp.bed")
    logging.debug("tgt noredundant size")
    sh("cut -f1-3 tmp.bed | sortBed -i stdin | mergeBed -i stdin | bed size -")
    logging.debug("qry noredundant size")
    sh("cut -f5-7 tmp.bed | sortBed -i stdin | mergeBed -i stdin | bed size -")

Ejemplo n.º 29

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def index(args):
    """
    %prog index samfile/bamfile

    If SAM file, convert to BAM, sort and then index, using SAMTOOLS
    """
    p = OptionParser(index.__doc__)
    sp1.add_argument("--fasta", dest="fasta", default=None,
            help="add @SQ header to the BAM file [default: %default]")
    sp1.add_argument("--unique", default=False, action="store_true",
            help="only retain uniquely mapped reads [default: %default]")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(p.print_help())

    samfile, = args
    cpus = args.cpus
    fastafile = args.fasta
    if fastafile:
        assert op.exists(fastafile)

    bamfile = samfile.replace(".sam", ".bam")
    if fastafile:
        faifile = fastafile + ".fai"
        if need_update(fastafile, faifile):
            sh("samtools faidx {0}".format(fastafile))
        cmd = "samtools view -bt {0} {1} -o {2}".\
                format(faifile, samfile, bamfile)
    else:
        cmd = "samtools view -bS {0} -o {1}".\
                format(samfile, bamfile)

    cmd += " -@ {0}".format(cpus)
    if args.unique:
        cmd += " -q 1"

    if samfile.endswith(".sam") and need_update(samfile, bamfile):
        sh(cmd)

    # Already sorted?
    if bamfile.endswith(".sorted.bam"):
        sortedbamfile = bamfile
    else:
        prefix = bamfile.replace(".bam", "")
        sortedbamfile = prefix + ".sorted.bam"

    if need_update(bamfile, sortedbamfile):
        cmd = "samtools sort {0} -o {1}".format(bamfile, sortedbamfile)
        cmd += " -@ {0}".format(cpus)
        sh(cmd)

    baifile = sortedbamfile + ".bai"
    if need_update(sortedbamfile, baifile):
        sh("samtools index {0}".format(sortedbamfile))

    return sortedbamfile

Ejemplo n.º 30

0

Mostrar archivo

Archivo: genome.py Proyecto: orionzhou/robin

def build_star(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/star")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)
  
    f_gtf = "../../50_annotation/10.gtf"
    if op.isfile("SA") and not args.overwrite:
        logging.debug("SA already exists - skipped")
    elif not op.isfile(f_gtf):
        logging.error("no gtf file: %s" % f_gtf )
        sys.exit()
    else:
        sh("STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s \
                --genomeFastaFiles %s --sjdbGTFfile %s" %
                (args.p, ".", fg, f_gtf))

Ejemplo n.º 31

0

Mostrar archivo

def run_vecscreen(infile=None, outfile=None, db="UniVec_Core",
        pctid=None, hitlen=None):
    """
    BLASTN parameters reference:
    http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html
    """
    db = get_abs_path(db)
    nin = db + ".nin"
    run_formatdb(infile=db, outfile=nin)

    cmd = "blastn"
    cmd += " -task blastn"
    cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile)
    cmd += " -penalty -5 -gapopen 4 -gapextend 4 -dust yes -soft_masking true"
    cmd += " -searchsp 1750000000000 -evalue 0.01 -outfmt 6 -num_threads 8"
    sh(cmd)

Ejemplo n.º 32

0

Mostrar archivo

def build_star(args):
    dirg, fg = get_genomedir(args.species)
    dirw = op.join(dirg, "21_dbs/star")
    if not op.isdir(dirw): mkdir(dirw)
    os.chdir(dirw)

    f_gtf = "../../50_annotation/10.gtf"
    if op.isfile("SA") and not args.overwrite:
        logging.debug("SA already exists - skipped")
    elif not op.isfile(f_gtf):
        logging.error("no gtf file: %s" % f_gtf)
        sys.exit()
    else:
        sh("STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s \
                --genomeFastaFiles %s --sjdbGTFfile %s" %
           (args.p, ".", fg, f_gtf))

Ejemplo n.º 33

0

Mostrar archivo

def bed(args):
    """
    %prog bed bedfile bamfiles

    Convert bam files to bed.
    """
    p = OptionParser(bed.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    bedfile = args[0]
    bamfiles = args[1:]
    for bamfile in bamfiles:
        cmd = "bamToBed -i {0}".format(bamfile)
        sh(cmd, outfile=bedfile, append=True)

Ejemplo n.º 34

0

Mostrar archivo

Archivo: sizes.py Proyecto: shanwai1234/maize

    def __init__(self, filename, select=None):
        assert op.exists(filename), "File `{0}` not found".format(filename)

        # filename can be both .sizes file or FASTA formatted file
        sizesname = filename

        if not filename.endswith(".sizes"):
            sizesname = filename + ".sizes"
            filename = get_abs_path(filename)
            if need_update(filename, sizesname):
                cmd = "faSize"
                if which(cmd):
                    cmd += " -detailed {0}".format(filename)
                    sh(cmd, outfile=sizesname)
                else:
                    from jcvi.formats.fasta import Fasta

                    f = Fasta(filename)
                    fw = open(sizesname, "w")
                    for k, size in f.itersizes_ordered():
                        fw.write("\t".join((k, str(size))) + "\n")
                    fw.close()

            filename = sizesname

        assert filename.endswith(".sizes")

        super(Sizes, self).__init__(filename)
        self.fp = open(filename)
        self.filename = filename

        # get sizes for individual contigs, both in list and dict
        # this is to preserve the input order in the sizes file
        sizes = list(self.iter_sizes())
        if select:
            assert select > 0
            sizes = [x for x in sizes if x[1] >= select]
        self.sizes_mapping = dict(sizes)

        # get cumulative sizes, both in list and dict
        ctgs, sizes = zip(*sizes)
        self.sizes = sizes
        cumsizes = np.cumsum([0] + list(sizes))
        self.ctgs = ctgs
        self.cumsizes = cumsizes
        self.cumsizes_mapping = dict(zip(ctgs, cumsizes))

Ejemplo n.º 35

0

Mostrar archivo

def sort(args):
    """
    %prog sort <blastfile|coordsfile>

    Sort lines so that same query grouped together with scores descending. The
    sort is 'in-place'.
    """
    p = OptionParser(sort.__doc__)
    sp1.add_argument("--query", default=False, action="store_true",
            help="Sort by query position [default: %default]")
    sp1.add_argument("--ref", default=False, action="store_true",
            help="Sort by reference position [default: %default]")
    sp1.add_argument("--refscore", default=False, action="store_true",
            help="Sort by reference name, then score descending [default: %default]")
    sp1.add_argument("--coords", default=False, action="store_true",
            help="File is .coords generated by NUCMER [default: %default]")
    p.set_tmpdir()

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    blastfile, = args

    if opts.coords:
        if opts.query:
            key = "-k13,13 -k3,3n"
        elif opts.ref:
            key = "-k12,12 -k1,1n"

    else:
        if opts.query:
            key = "-k1,1 -k7,7n"
        elif opts.ref:
            key = "-k2,2 -k9,9n"
        elif opts.refscore:
            key = "-k2,2 -k12,12gr"
        else:
            key = "-k1,1 -k12,12gr"

    cmd = "sort"
    if opts.tmpdir:
        cmd += " -T {0}".format(opts.tmpdir)
    cmd += " {0} {1} -o {1}".format(key, blastfile)
    sh(cmd)

Ejemplo n.º 36

0

Mostrar archivo

Archivo: base.py Proyecto: orionzhou/robin

    def merge(self, checkexists=False):
        outfile = self.outfile
        if checkexists and not need_update(self.filelist, outfile):
            logging.debug("File `{0}` exists. Merge skipped.".format(outfile))
            return

        files = " ".join(self.filelist)
        ingz, outgz = self.ingz, self.outgz
        if ingz and outgz:  # can merge gz files directly
            cmd = "cat {0} > {1}".format(files, outfile)
            sh(cmd)
        else:
            cmd = "zcat" if self.ingz else "cat"
            cmd += " " + files
            sh(cmd, outfile=outfile)

        return outfile

Ejemplo n.º 37

0

Mostrar archivo

Archivo: fetch.py Proyecto: orionzhou/robin

def get_cookies(name="*****@*****.**", cookies="cookies"):
    from getpass import getpass

    # Check if cookies is still good
    if op.exists(cookies) and last_updated(cookies) < 3600:
        return cookies

    username = raw_input("Phytozome Login [{0}]: ".format(name))
    if username.strip() == '':
        username = name

    pw = getpass("Phytozome Password: "******"curl https://signon.jgi.doe.gov/signon/create --data-ascii"
    cmd += " login={0}\&password={1} -b {2} -c {2}".format(username, pw, cookies)
    sh(cmd, outfile="/dev/null", errfile="/dev/null", log=False)

    return cookies

Ejemplo n.º 38

0

Mostrar archivo

def vcf(args):
    """
    %prog vcf fastafile bamfiles > out.vcf.gz

    Call SNPs on bam files.
    """
    from maize.apps.grid import Jobs

    valid_callers = ("mpileup", "freebayes")
    p = OptionParser(vcf.__doc__)
    p.set_outfile(outfile="out.vcf.gz")
    sp1.add_argument("--nosort",
                     default=False,
                     action="store_true",
                     help="Do not sort the BAM files")
    sp1.add_argument("--caller",
                     default="mpileup",
                     choices=valid_callers,
                     help="Use variant caller [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile = args[0]
    bamfiles = args[1:]
    caller = args.caller

    unsorted = [x for x in bamfiles if ".sorted." not in x]
    if args.nosort:
        bamfiles = unsorted
    else:
        jargs = [[[x, "--unique"]] for x in unsorted]
        jobs = Jobs(index, args=jargs)
        jobs.run()
        bamfiles = [x.replace(".sorted.bam", ".bam") for x in bamfiles]
        bamfiles = [x.replace(".bam", ".sorted.bam") for x in bamfiles]

    if caller == "mpileup":
        cmd = "samtools mpileup -E -uf"
        cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
        cmd += " | bcftools call -vmO v"
    elif caller == "freebayes":
        cmd = "freebayes -f"
        cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
    sh(cmd, outfile=args.outfile)

Ejemplo n.º 39

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def bed(args):
    """
    %prog bed bedfile bamfiles

    Convert bam files to bed.
    """
    p = OptionParser(bed.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    bedfile = args[0]
    bamfiles = args[1:]
    for bamfile in bamfiles:
        cmd = "bamToBed -i {0}".format(bamfile)
        sh(cmd, outfile=bedfile, append=True)

Ejemplo n.º 40

0

Mostrar archivo

Archivo: coords.py Proyecto: shanwai1234/maize

def fromdelta(args):
    """
    %prog fromdelta deltafile

    Convert deltafile to coordsfile.
    """
    p = OptionParser(fromdelta.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    deltafile, = args
    coordsfile = deltafile.rsplit(".", 1)[0] + ".coords"
    cmd = "show-coords -rclH {0}".format(deltafile)
    sh(cmd, outfile=coordsfile)

    return coordsfile

Ejemplo n.º 41

0

Mostrar archivo

def update_conda(args):
    envs1 = '''base snk work
        blast hisat2 bismark
        alfred egglib
        multiqc primer3
        python2 wasp test'''.split()
    envs2 = '''base snk work'''.split()
    envs = envs1
    if args.opt == 2:
        envs = envs2
    print("will update %d environments: %s" % (len(envs), ' '.join(envs)))
    for env in envs:
        print("updating %s" % env)
        p = Popen(["conda update -n %s --all" % env], stdin=PIPE, shell=True)
        outs, errs = p.communicate(input=b'y\n')
        p.terminate()
        sh("conda env export -n %s --no-builds > $snk/envs/%s.yml" %
           (env, env))

Ejemplo n.º 42

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def noclip(args):
    """
    %prog noclip bamfile

    Remove clipped reads from BAM.
    """
    p = OptionParser(noclip.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    bamfile, = args
    noclipbam = bamfile.replace(".bam", ".noclip.bam")
    cmd = "samtools view -h {} | awk -F '\t' '($6 !~ /H|S/)'".format(bamfile)
    cmd += " | samtools view -@ 4 -b -o {}".format(noclipbam)
    sh(cmd)

    sh("samtools index {}".format(noclipbam))

Ejemplo n.º 43

0

Mostrar archivo

def noclip(args):
    """
    %prog noclip bamfile

    Remove clipped reads from BAM.
    """
    p = OptionParser(noclip.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    bamfile, = args
    noclipbam = bamfile.replace(".bam", ".noclip.bam")
    cmd = "samtools view -h {} | awk -F '\t' '($6 !~ /H|S/)'".format(bamfile)
    cmd += " | samtools view -@ 4 -b -o {}".format(noclipbam)
    sh(cmd)

    sh("samtools index {}".format(noclipbam))

Ejemplo n.º 44

0

Mostrar archivo

Archivo: chain.py Proyecto: orionzhou/robin

def frompsl(args):
    """
    %prog frompsl old.new.psl old.fasta new.fasta

    Generate chain file from psl file. The pipeline is describe in:
    <http://genomewiki.ucsc.edu/index.php/Minimal_Steps_For_LiftOver>
    """
    from maize.formats.sizes import Sizes

    p = OptionParser(frompsl.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pslfile, oldfasta, newfasta = args
    pf = oldfasta.split(".")[0]

    # Chain together alignments from using axtChain
    chainfile = pf + ".chain"
    twobitfiles = []
    for fastafile in (oldfasta, newfasta):
        tbfile = faToTwoBit(fastafile)
        twobitfiles.append(tbfile)
    oldtwobit, newtwobit = twobitfiles

    if need_update(pslfile, chainfile):
        cmd = "axtChain -linearGap=medium -psl {0}".format(pslfile)
        cmd += " {0} {1} {2}".format(oldtwobit, newtwobit, chainfile)
        sh(cmd)

    # Sort chain files
    sortedchain = chainfile.rsplit(".", 1)[0] + ".sorted.chain"
    if need_update(chainfile, sortedchain):
        cmd = "chainSort {0} {1}".format(chainfile, sortedchain)
        sh(cmd)

    # Make alignment nets from chains
    netfile = pf + ".net"
    oldsizes = Sizes(oldfasta).filename
    newsizes = Sizes(newfasta).filename
    if need_update((sortedchain, oldsizes, newsizes), netfile):
        cmd = "chainNet {0} {1} {2}".format(sortedchain, oldsizes, newsizes)
        cmd += " {0} /dev/null".format(netfile)
        sh(cmd)

    # Create liftOver chain file
    liftoverfile = pf + ".liftover.chain"
    if need_update((netfile, sortedchain), liftoverfile):
        cmd = "netChainSubset {0} {1} {2}".\
                format(netfile, sortedchain, liftoverfile)
        sh(cmd)

Ejemplo n.º 45

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def vcf(args):
    """
    %prog vcf fastafile bamfiles > out.vcf.gz

    Call SNPs on bam files.
    """
    from maize.apps.grid import Jobs

    valid_callers = ("mpileup", "freebayes")
    p = OptionParser(vcf.__doc__)
    p.set_outfile(outfile="out.vcf.gz")
    sp1.add_argument("--nosort", default=False, action="store_true",
                 help="Do not sort the BAM files")
    sp1.add_argument("--caller", default="mpileup", choices=valid_callers,
                 help="Use variant caller [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile = args[0]
    bamfiles = args[1:]
    caller = args.caller

    unsorted = [x for x in bamfiles if ".sorted." not in x]
    if args.nosort:
        bamfiles = unsorted
    else:
        jargs = [[[x, "--unique"]] for x in unsorted]
        jobs = Jobs(index, args=jargs)
        jobs.run()
        bamfiles = [x.replace(".sorted.bam", ".bam") for x in bamfiles]
        bamfiles = [x.replace(".bam", ".sorted.bam") for x in bamfiles]

    if caller == "mpileup":
        cmd = "samtools mpileup -E -uf"
        cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
        cmd += " | bcftools call -vmO v"
    elif caller == "freebayes":
        cmd = "freebayes -f"
        cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
    sh(cmd, outfile=args.outfile)

Ejemplo n.º 46

0

Mostrar archivo

Archivo: fastq.py Proyecto: shanwai1234/maize

def fromsra(args):
    """
    %prog fromsra srafile

    Convert sra file to fastq using the sratoolkit `fastq-dump`
    """
    p = OptionParser(fromsra.__doc__)
    sp1.add_argument("--paired", default=False, action="store_true",
            help="Specify if library layout is paired-end " + \
                 "[default: %default]")
    sp1.add_argument("--compress",
                     default=None,
                     choices=["gzip", "bzip2"],
                     help="Compress output fastq files [default: %default]")
    p.set_outdir()
    p.set_grid()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    srafile, = args
    paired = args.paired
    compress = args.compress
    outdir = args.outdir

    script_path = which("fastq-dump")
    if not script_path:
        logging.error("Cannot find `fastq-dump` in the PATH")
        sys.exit()

    cmd = [script_path]
    if compress:
        cmd.append("--{0}".format(compress))
    if paired:
        cmd.append("--split-files")
    if outdir:
        cmd.append("--outdir {0}".format(outdir))
    cmd.append(srafile)

    outcmd = " ".join(cmd)
    sh(outcmd, grid=args.grid)

Ejemplo n.º 47

0

Mostrar archivo

Archivo: fastq.py Proyecto: shanwai1234/maize

def convert(args):
    """
    %prog convert in.fastq

    illumina fastq quality encoding uses offset 64, and sanger uses 33. This
    script creates a new file with the correct encoding. Output gzipped file if
    input is also gzipped.
    """
    p = OptionParser(convert.__doc__)
    p.set_phred()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    infastq, = args
    phred = args.phred or str(guessoffset([infastq]))
    ophred = {"64": "33", "33": "64"}[phred]

    gz = infastq.endswith(".gz")
    outfastq = infastq.rsplit(".", 1)[0] if gz else infastq
    pf, sf = outfastq.rsplit(".", 1)
    outfastq = "{0}.q{1}.{2}".format(pf, ophred, sf)
    if gz:
        outfastq += ".gz"

    fin = "illumina" if phred == "64" else "sanger"
    fout = "sanger" if phred == "64" else "illumina"

    seqret = "seqret"
    if infastq.endswith(".gz"):
        cmd = "zcat {0} | ".format(infastq)
        cmd += seqret + " fastq-{0}::stdin fastq-{1}::stdout".\
                format(fin, fout)
    else:
        cmd = seqret + " fastq-{0}::{1} fastq-{2}::stdout".\
                format(fin, infastq, fout)

    sh(cmd, outfile=outfastq)

    return outfastq

Ejemplo n.º 48

0

Mostrar archivo

Archivo: sam2.py Proyecto: orionzhou/robin

def count(args):
    """
    %prog count bamfile gtf

    Count the number of reads mapped using `htseq-count`.
    """
    p = OptionParser(count.__doc__)
    sp1.add_argument("--type", default="exon",
                 help="Only count feature type")
    p.set_cpus(cpus=8)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bamfile, gtf = args
    cpus = args.cpus
    pf = bamfile.split(".")[0]
    countfile = pf + ".count"
    if not need_update(bamfile, countfile):
        return

    nsorted = pf + "_nsorted"
    nsortedbam, nsortedsam = nsorted + ".bam", nsorted + ".sam"
    if need_update(bamfile, nsortedsam):
        cmd = "samtools sort -@ {0} -n {1} {2}".format(cpus, bamfile, nsorted)
        sh(cmd)
        cmd = "samtools view -@ {0} -h {1}".format(cpus, nsortedbam)
        sh(cmd, outfile=nsortedsam)

    if need_update(nsortedsam, countfile):
        cmd = "htseq-count --stranded=no --minaqual=10"
        cmd += " -t {0}".format(args.type)
        cmd += " {0} {1}".format(nsortedsam, gtf)
        sh(cmd, outfile=countfile)

Ejemplo n.º 49

0

Mostrar archivo

def count(args):
    """
    %prog count bamfile gtf

    Count the number of reads mapped using `htseq-count`.
    """
    p = OptionParser(count.__doc__)
    sp1.add_argument("--type", default="exon", help="Only count feature type")
    p.set_cpus(cpus=8)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bamfile, gtf = args
    cpus = args.cpus
    pf = bamfile.split(".")[0]
    countfile = pf + ".count"
    if not need_update(bamfile, countfile):
        return

    nsorted = pf + "_nsorted"
    nsortedbam, nsortedsam = nsorted + ".bam", nsorted + ".sam"
    if need_update(bamfile, nsortedsam):
        cmd = "samtools sort -@ {0} -n {1} {2}".format(cpus, bamfile, nsorted)
        sh(cmd)
        cmd = "samtools view -@ {0} -h {1}".format(cpus, nsortedbam)
        sh(cmd, outfile=nsortedsam)

    if need_update(nsortedsam, countfile):
        cmd = "htseq-count --stranded=no --minaqual=10"
        cmd += " -t {0}".format(args.type)
        cmd += " {0} {1}".format(nsortedsam, gtf)
        sh(cmd, outfile=countfile)

Ejemplo n.º 50

0

Mostrar archivo

def pairs(args):
    """
    See __doc__ for OptionParser.set_pairs().
    """
    import maize.formats.bed

    p = OptionParser(pairs.__doc__)
    p.set_pairs()
    opts, targs = p.parse_args(args)

    if len(targs) != 1:
        sys.exit(not p.print_help())

    samfile, = targs
    bedfile = samfile.rsplit(".", 1)[0] + ".bed"
    if need_update(samfile, bedfile):
        cmd = "bamToBed -i {0}".format(samfile)
        sh(cmd, outfile=bedfile)

    args[args.index(samfile)] = bedfile

    return maize.formats.bed.pairs(args)

Ejemplo n.º 51

0

Mostrar archivo

def last(args, dbtype=None):
    """
    %prog database.fasta query.fasta

    Run LAST by calling LASTDB and LASTAL. LAST program available:
    <http://last.cbrc.jp>

    Works with LAST-719.
    """
    query, db = args.query, args.db
    path = args.path
    nthread = args.thread
    if not dbtype:
        dbtype = args.dbtype
    getpath = lambda x: op.join(path, x) if path else x
    lastdb_bin = getpath("lastdb")
    lastal_bin = getpath("lastal")

    u = 2 if args.mask else 0
    cmd = "{0} -u {1}".format(lastal_bin, u)
    cmd += " -P {0} -i3G".format(nthread)
    cmd += " -f {0}".format(args.format)
    cmd += " {0} {1}".format(db, query)

    minlen = args.minlen
    minid = args.minid
    extra = args.params
    assert minid != 100, "Perfect match not yet supported"
    mm = minid / (100 - minid)

    if minlen:
        extra += " -e{0}".format(minlen)
    if minid:
        extra += " -r1 -q{0} -a{0} -b{0}".format(mm)
    if extra:
        cmd += " " + extra.strip()

    sh(cmd)

Ejemplo n.º 52

0

Mostrar archivo

def coverage(args):
    """
    %prog coverage fastafile bamfile

    Calculate coverage for BAM file. BAM file will be sorted unless with
    --nosort.
    """
    p = OptionParser(coverage.__doc__)
    sp1.add_argument("--format",
                     default="bigwig",
                     choices=("bedgraph", "bigwig", "coverage"),
                     help="Output format")
    sp1.add_argument("--nosort",
                     default=False,
                     action="store_true",
                     help="Do not sort BAM")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, bamfile = args
    format = args.format
    if args.nosort:
        logging.debug("BAM sorting skipped")
    else:
        bamfile = index([bamfile, "--fasta={0}".format(fastafile)])

    pf = bamfile.rsplit(".", 2)[0]
    sizesfile = Sizes(fastafile).filename
    cmd = "genomeCoverageBed -ibam {0} -g {1}".format(bamfile, sizesfile)
    if format in ("bedgraph", "bigwig"):
        cmd += " -bg"
        bedgraphfile = pf + ".bedgraph"
        sh(cmd, outfile=bedgraphfile)

        if format == "bedgraph":
            return bedgraphfile

        bigwigfile = pf + ".bigwig"
        cmd = "bedGraphToBigWig {0} {1} {2}".\
                    format(bedgraphfile, sizesfile, bigwigfile)
        sh(cmd)
        return bigwigfile

    coveragefile = pf + ".coverage"
    if need_update(fastafile, coveragefile):
        sh(cmd, outfile=coveragefile)

    gcf = GenomeCoverageFile(coveragefile)
    fw = must_open(args.outfile, "w")
    for seqid, cov in gcf.iter_coverage_seqid():
        print >> fw, "\t".join((seqid, "{0:.1f}".format(cov)))
    fw.close()