Python OptionParser.set_firstN 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: jcvi.apps.base

클래스/타입: OptionParser

메소드/함수: set_firstN

hotexamples.com에서의 예제들: 11

Python OptionParser.set_firstN - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 jcvi.apps.base.OptionParser.set_firstN에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

OptionParser(30)

parse_args(30)

add_option(30)

set_outfile(30)

set_image_options(30)

set_home(30)

set_cpus(30)

print_help(30)

set_align(21)

set_beds(17)

set_outdir(13)

set_params(9)

set_aws_opts(8)

set_grid_opts(7)

set_depth(6)

set_grid(6)

set_firstN(5)

set_phred(5)

map_async(5)

set_fastq_names(4)

set_pasa_opts(3)

add_option_group(3)

set_aligner(3)

set_db_opts(3)

set_cutoff(3)

set_pairs(3)

set_histogram(2)

set_annot_reformat_opts(2)

set_mingap(2)

set_dotplot_opts(2)

set_mates(2)

set_email(2)

run(2)

set_mateorientation(2)

items(1)

map(1)

build(1)

qStart(1)

delete(1)

parse(1)

gffline(1)

set_downloader(1)

invert(1)

draw(1)

set_chr(1)

add_cpus(1)

fixLibraryStats(1)

duplicate(1)

imap(1)

예제 #1

파일 보기

파일: fastq.py 프로젝트: Nicholas-NVS/jcvi

def readlen(args):
    """
    %prog readlen fastqfile

    Calculate read length, will only try the first N reads. Output min, max, and
    avg for each file.
    """
    p = OptionParser(readlen.__doc__)
    p.set_firstN()
    p.add_option("--silent", default=False, action="store_true", help="Do not print read length stats")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    f, = args
    if not is_fastq(f):
        logging.debug("File `{0}` does not endswith .fastq or .fq")
        return 0

    s = calc_readlen(f, opts.firstN)
    if not opts.silent:
        print "\t".join(str(x) for x in (f, s.min, s.max, s.mean, s.median))

    return int(s.max)

예제 #2

파일 보기

def readlen(args):
    """
    %prog readlen fastqfile

    Calculate read length, will only try the first N reads. Output min, max, and
    avg for each file.
    """
    p = OptionParser(readlen.__doc__)
    p.set_firstN()
    p.add_option("--silent",
                 default=False,
                 action="store_true",
                 help="Do not print read length stats")
    p.add_option("--nocheck",
                 default=False,
                 action="store_true",
                 help="Do not check file type suffix")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    f, = args
    if (not opts.nocheck) and (not is_fastq(f)):
        logging.debug("File `{}` does not endswith .fastq or .fq".format(f))
        return 0

    s = calc_readlen(f, opts.firstN)
    if not opts.silent:
        print "\t".join(str(x) for x in (f, s.min, s.max, s.mean, s.median))

    return int(s.max)

예제 #3

파일 보기

파일: automaton.py 프로젝트: kvefimov/jcvi_062915

def pairs(args):
    """
    %prog pairs folder reference.fasta

    Estimate insert size distribution. Compatible with a variety of aligners,
    including CLC, BOWTIE and BWA.
    """
    p = OptionParser(pairs.__doc__)
    p.set_firstN()
    p.set_mates()
    p.set_aligner()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    cwd = os.getcwd()
    aligner = opts.aligner
    work = "-".join(("pairs", aligner))
    mkdir(work)

    if aligner == "clc":
        from jcvi.apps.clc import align
        from jcvi.formats.cas import pairs as ps
    else:
        from jcvi.formats.sam import pairs as ps

    if aligner == "bowtie":
        from jcvi.apps.bowtie import align
    elif aligner == "bwa":
        from jcvi.apps.bwa import align

    folder, ref = args
    ref = get_abs_path(ref)
    messages = []
    for p, prefix in iter_project(folder, 2):
        samplefq = op.join(work, prefix + ".first.fastq")
        first([str(opts.firstN)] + p + ["-o", samplefq])

        os.chdir(work)
        align_args = [ref, op.basename(samplefq)]
        outfile, logfile = align(align_args)
        bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)])
        os.chdir(cwd)

        median = stats.median
        tag = "MP" if median > 1000 else "PE"
        median = str(median)
        pf, sf = median[:2], median[2:]
        if sf and int(sf) != 0:
            pf = str(int(pf) + 1)  # Get the first two effective digits
        lib = "{0}-{1}".format(tag, pf + "0" * len(sf))
        for i, xp in enumerate(p):
            suffix = "fastq.gz" if xp.endswith(".gz") else "fastq"
            link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""), i + 1, suffix)
            m = "\t".join(str(x) for x in (xp, link))
            messages.append(m)

    messages = "\n".join(messages)
    write_file("f.meta", messages, tee=True)

예제 #4

파일 보기

def pairs(args):
    """
    %prog pairs folder reference.fasta

    Estimate insert size distribution. Compatible with a variety of aligners,
    including BOWTIE and BWA.
    """
    p = OptionParser(pairs.__doc__)
    p.set_firstN()
    p.set_mates()
    p.set_aligner()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    cwd = os.getcwd()
    aligner = opts.aligner
    work = "-".join(("pairs", aligner))
    mkdir(work)

    from jcvi.formats.sam import pairs as ps

    if aligner == "bowtie":
        from jcvi.apps.bowtie import align
    elif aligner == "bwa":
        from jcvi.apps.bwa import align

    folder, ref = args
    ref = get_abs_path(ref)
    messages = []
    for p, prefix in iter_project(folder):
        samplefq = []
        for i in range(2):
            samplefq.append(
                op.join(work, prefix + "_{0}.first.fastq".format(i + 1)))
            first([str(opts.firstN)] + [p[i]] + ["-o", samplefq[i]])

        os.chdir(work)
        align_args = [ref] + [op.basename(fq) for fq in samplefq]
        outfile, logfile = align(align_args)
        bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)])
        os.chdir(cwd)

        median = stats.median
        tag = "MP" if median > 1000 else "PE"
        median = str(median)
        pf, sf = median[:2], median[2:]
        if sf and int(sf) != 0:
            pf = str(int(pf) + 1)  # Get the first two effective digits
        lib = "{0}-{1}".format(tag, pf + "0" * len(sf))
        for i, xp in enumerate(p):
            suffix = "fastq.gz" if xp.endswith(".gz") else "fastq"
            link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""),
                                            i + 1, suffix)
            m = "\t".join(str(x) for x in (xp, link))
            messages.append(m)

    messages = "\n".join(messages)
    write_file("f.meta", messages, tee=True)

예제 #5

파일 보기

파일: fastq.py 프로젝트: rrane/jcvi

def readlen(args):
    """
    %prog readlen fastqfile

    Calculate read length, will only try the first N reads. Output min, max, and
    avg for each file.
    """
    p = OptionParser(readlen.__doc__)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    f, = args
    s = calc_readlen(f, opts.firstN)
    print "\t".join(str(x) for x in (f, s.min, s.max, s.mean))
    return int(s.max)

예제 #6

파일 보기

파일: preprocess.py 프로젝트: Nicholas-NVS/jcvi

def contamination(args):
    """
    %prog contamination Ecoli.fasta genome.fasta read.fastq

    Check read contamination on a folder of paired reads. Use bowtie2 to compare
    the reads against:
    1. Ecoli.fsata - this will tell us the lower bound of contamination
    2. genome.fasta - this will tell us the upper bound of contamination
    """
    from jcvi.apps.bowtie import BowtieLogFile, align

    p = OptionParser(contamination.__doc__)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    ecoli, genome, fq = args
    firstN_opt = "--firstN={0}".format(opts.firstN)
    samfile, logfile = align([ecoli, fq, firstN_opt])
    bl = BowtieLogFile(logfile)
    lowerbound = bl.rate
    samfile, logfile = align([genome, fq, firstN_opt])
    bl = BowtieLogFile(logfile)
    upperbound = 100 - bl.rate

    median = (lowerbound + upperbound) / 2

    clogfile = fq + ".Ecoli"
    fw = open(clogfile, "w")
    lowerbound = "{0:.1f}".format(lowerbound)
    upperbound = "{0:.1f}".format(upperbound)
    median = "{0:.1f}".format(median)

    print >> fw, "\t".join((fq, lowerbound, median, upperbound))
    print >> sys.stderr, "{0}: Ecoli contamination rate {1}-{2}".\
                        format(fq, lowerbound, upperbound)
    fw.close()

예제 #7

파일 보기

def contamination(args):
    """
    %prog contamination Ecoli.fasta genome.fasta read.fastq

    Check read contamination on a folder of paired reads. Use bowtie2 to compare
    the reads against:
    1. Ecoli.fsata - this will tell us the lower bound of contamination
    2. genome.fasta - this will tell us the upper bound of contamination
    """
    from jcvi.apps.bowtie import BowtieLogFile, align

    p = OptionParser(contamination.__doc__)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    ecoli, genome, fq = args
    firstN_opt = "--firstN={0}".format(opts.firstN)
    samfile, logfile = align([ecoli, fq, firstN_opt])
    bl = BowtieLogFile(logfile)
    lowerbound = bl.rate
    samfile, logfile = align([genome, fq, firstN_opt])
    bl = BowtieLogFile(logfile)
    upperbound = 100 - bl.rate

    median = (lowerbound + upperbound) / 2

    clogfile = fq + ".Ecoli"
    fw = open(clogfile, "w")
    lowerbound = "{0:.1f}".format(lowerbound)
    upperbound = "{0:.1f}".format(upperbound)
    median = "{0:.1f}".format(median)

    print >> fw, "\t".join((fq, lowerbound, median, upperbound))
    print >> sys.stderr, "{0}: Ecoli contamination rate {1}-{2}".\
                        format(fq, lowerbound, upperbound)
    fw.close()

예제 #8

파일 보기

파일: preprocess.py 프로젝트: Nicholas-NVS/jcvi

def expand(args):
    """
    %prog expand bes.fasta reads.fastq

    Expand sequences using short reads. Useful, for example for getting BAC-end
    sequences. The template to use, in `bes.fasta` may just contain the junction
    sequences, then align the reads to get the 'flanks' for such sequences.
    """
    import math

    from jcvi.formats.fasta import Fasta, SeqIO
    from jcvi.formats.fastq import readlen, first, fasta
    from jcvi.formats.blast import Blast
    from jcvi.formats.base import FileShredder
    from jcvi.apps.bowtie import align, get_samfile
    from jcvi.apps.align import blast

    p = OptionParser(expand.__doc__)
    p.set_depth(depth=200)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bes, reads = args
    size = Fasta(bes).totalsize
    rl = readlen([reads])
    expected_size = size + 2 * rl
    nreads = expected_size * opts.depth / rl
    nreads = int(math.ceil(nreads / 1000.)) * 1000

    # Attract reads
    samfile, logfile = align([bes, reads, "--reorder", "--mapped",
           "--firstN={0}".format(opts.firstN)])

    samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True)
    logging.debug("Extract first {0} reads from `{1}`.".format(nreads, mapped))

    pf = mapped.split(".")[0]
    pf = pf.split("-")[0]
    bespf = bes.split(".")[0]
    reads = pf + ".expand.fastq"
    first([str(nreads), mapped, "-o", reads])

    # Perform mini-assembly
    fastafile = reads.rsplit(".", 1)[0] + ".fasta"
    qualfile = ""
    if need_update(reads, fastafile):
        fastafile, qualfile = fasta([reads])

    contigs = op.join(pf, "454LargeContigs.fna")
    if need_update(fastafile, contigs):
        cmd = "runAssembly -o {0} -cpu 8 {1}".format(pf, fastafile)
        sh(cmd)
    assert op.exists(contigs)

    # Annotate contigs
    blastfile = blast([bes, contigs])
    mapping = {}
    for query, b in Blast(blastfile).iter_best_hit():
        mapping[query] = b

    f = Fasta(contigs, lazy=True)
    annotatedfasta = ".".join((pf, bespf, "fasta"))
    fw = open(annotatedfasta, "w")
    keys = list(Fasta(bes).iterkeys_ordered())  # keep an ordered list
    recs = []
    for key, v in f.iteritems_ordered():
        vid = v.id
        if vid not in mapping:
            continue
        b = mapping[vid]
        subject = b.subject
        rec = v.reverse_complement() if b.orientation == '-' else v
        rec.id = rid = "_".join((pf, vid, subject))
        rec.description = ""
        recs.append((keys.index(subject), rid, rec))

    recs = [x[-1] for x in sorted(recs)]
    SeqIO.write(recs, fw, "fasta")
    fw.close()

    FileShredder([samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf])
    logging.debug("Annotated seqs (n={0}) written to `{1}`.".\
                    format(len(recs), annotatedfasta))

    return annotatedfasta

예제 #9

파일 보기

def expand(args):
    """
    %prog expand bes.fasta reads.fastq

    Expand sequences using short reads. Useful, for example for getting BAC-end
    sequences. The template to use, in `bes.fasta` may just contain the junction
    sequences, then align the reads to get the 'flanks' for such sequences.
    """
    import math

    from jcvi.formats.fasta import Fasta, SeqIO
    from jcvi.formats.fastq import readlen, first, fasta
    from jcvi.formats.blast import Blast
    from jcvi.formats.base import FileShredder
    from jcvi.apps.bowtie import align, get_samfile
    from jcvi.apps.align import blast

    p = OptionParser(expand.__doc__)
    p.set_depth(depth=200)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bes, reads = args
    size = Fasta(bes).totalsize
    rl = readlen([reads])
    expected_size = size + 2 * rl
    nreads = expected_size * opts.depth / rl
    nreads = int(math.ceil(nreads / 1000.)) * 1000

    # Attract reads
    samfile, logfile = align([bes, reads, "--reorder", "--mapped",
           "--firstN={0}".format(opts.firstN)])

    samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True)
    logging.debug("Extract first {0} reads from `{1}`.".format(nreads, mapped))

    pf = mapped.split(".")[0]
    pf = pf.split("-")[0]
    bespf = bes.split(".")[0]
    reads = pf + ".expand.fastq"
    first([str(nreads), mapped, "-o", reads])

    # Perform mini-assembly
    fastafile = reads.rsplit(".", 1)[0] + ".fasta"
    qualfile = ""
    if need_update(reads, fastafile):
        fastafile, qualfile = fasta([reads])

    contigs = op.join(pf, "454LargeContigs.fna")
    if need_update(fastafile, contigs):
        cmd = "runAssembly -o {0} -cpu 8 {1}".format(pf, fastafile)
        sh(cmd)
    assert op.exists(contigs)

    # Annotate contigs
    blastfile = blast([bes, contigs])
    mapping = {}
    for query, b in Blast(blastfile).iter_best_hit():
        mapping[query] = b

    f = Fasta(contigs, lazy=True)
    annotatedfasta = ".".join((pf, bespf, "fasta"))
    fw = open(annotatedfasta, "w")
    keys = list(Fasta(bes).iterkeys_ordered())  # keep an ordered list
    recs = []
    for key, v in f.iteritems_ordered():
        vid = v.id
        if vid not in mapping:
            continue
        b = mapping[vid]
        subject = b.subject
        rec = v.reverse_complement() if b.orientation == '-' else v
        rec.id = rid = "_".join((pf, vid, subject))
        rec.description = ""
        recs.append((keys.index(subject), rid, rec))

    recs = [x[-1] for x in sorted(recs)]
    SeqIO.write(recs, fw, "fasta")
    fw.close()

    FileShredder([samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf])
    logging.debug("Annotated seqs (n={0}) written to `{1}`.".\
                    format(len(recs), annotatedfasta))

    return annotatedfasta

예제 #10

파일 보기

파일: bowtie.py 프로젝트: rrane/jcvi

def align(args):
    """
    %prog align database.fasta read1.fq [read2.fq]

    Wrapper for `bowtie2` single-end or paired-end, depending on the number of args.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(align.__doc__)
    p.set_firstN(firstN=0)
    p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]")
    p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]")
    p.set_cutoff(cutoff=800)
    p.set_mateorientation(mateorientation="+-")
    p.set_sam_options(bowtie=True)

    opts, args = p.parse_args(args)
    extra = opts.extra
    mo = opts.mateorientation
    if mo == "+-":
        extra += ""
    elif mo == "-+":
        extra += "--rf"
    else:
        extra += "--ff"

    PE = True
    if len(args) == 2:
        logging.debug("Single-end alignment")
        PE = False
    elif len(args) == 3:
        logging.debug("Paired-end alignment")
    else:
        sys.exit(not p.print_help())

    firstN = opts.firstN
    mapped = opts.mapped
    unmapped = opts.unmapped
    gl = "--end-to-end" if opts.full else "--local"

    dbfile, readfile = args[0:2]
    dbfile = get_abs_path(dbfile)
    safile = check_index(dbfile)
    prefix = get_prefix(readfile, dbfile)
    samfile, mapped, unmapped = get_samfile(
        readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam
    )
    logfile = prefix + ".log"
    offset = guessoffset([readfile])

    if not need_update(safile, samfile):
        logging.error("`{0}` exists. `bowtie2` already run.".format(samfile))
        return samfile, logfile

    cmd = "bowtie2 -x {0}".format(dbfile)
    if PE:
        r1, r2 = args[1:3]
        cmd += " -1 {0} -2 {1}".format(r1, r2)
        cmd += " --maxins {0}".format(opts.cutoff)
        mtag, utag = "--al-conc", "--un-conc"
    else:
        cmd += " -U {0}".format(readfile)
        mtag, utag = "--al", "--un"

    if mapped:
        cmd += " {0} {1}".format(mtag, mapped)
    if unmapped:
        cmd += " {0} {1}".format(utag, unmapped)

    if firstN:
        cmd += " --upto {0}".format(firstN)
    cmd += " -p {0}".format(opts.cpus)
    cmd += " --phred{0}".format(offset)
    cmd += " {0}".format(gl)
    if opts.reorder:
        cmd += " --reorder"

    cmd += " {0}".format(extra)
    # Finally the log
    cmd += " 2> {0}".format(logfile)

    cmd = output_bam(cmd, samfile)
    sh(cmd)
    print >>sys.stderr, open(logfile).read()

    return samfile, logfile

예제 #11

파일 보기

def align(args):
    """
    %prog align database.fasta read1.fq [read2.fq]

    Wrapper for `bowtie2` single-end or paired-end, depending on the number of args.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(align.__doc__)
    p.set_firstN(firstN=0)
    p.add_option("--full",
                 default=False,
                 action="store_true",
                 help="Enforce end-to-end alignment [default: local]")
    p.add_option("--reorder",
                 default=False,
                 action="store_true",
                 help="Keep the input read order [default: %default]")
    p.add_option("--null",
                 default=False,
                 action="store_true",
                 help="Do not write to SAM/BAM output")
    p.add_option("--fasta",
                 default=False,
                 action="store_true",
                 help="Query reads are FASTA")
    p.set_cutoff(cutoff=800)
    p.set_mateorientation(mateorientation="+-")
    p.set_sam_options(bowtie=True)

    opts, args = p.parse_args(args)
    extra = opts.extra
    mo = opts.mateorientation
    if mo == '+-':
        extra += ""
    elif mo == '-+':
        extra += "--rf"
    else:
        extra += "--ff"

    PE = True
    if len(args) == 2:
        logging.debug("Single-end alignment")
        PE = False
    elif len(args) == 3:
        logging.debug("Paired-end alignment")
    else:
        sys.exit(not p.print_help())

    firstN = opts.firstN
    mapped = opts.mapped
    unmapped = opts.unmapped
    fasta = opts.fasta
    gl = "--end-to-end" if opts.full else "--local"

    dbfile, readfile = args[0:2]
    dbfile = check_index(dbfile)
    prefix = get_prefix(readfile, dbfile)
    samfile, mapped, unmapped = get_samfile(readfile,
                                            dbfile,
                                            bowtie=True,
                                            mapped=mapped,
                                            unmapped=unmapped,
                                            bam=opts.bam)
    logfile = prefix + ".log"
    if not fasta:
        offset = guessoffset([readfile])

    if not need_update(dbfile, samfile):
        logging.error("`{0}` exists. `bowtie2` already run.".format(samfile))
        return samfile, logfile

    cmd = "bowtie2 -x {0}".format(dbfile)
    if PE:
        r1, r2 = args[1:3]
        cmd += " -1 {0} -2 {1}".format(r1, r2)
        cmd += " --maxins {0}".format(opts.cutoff)
        mtag, utag = "--al-conc", "--un-conc"
    else:
        cmd += " -U {0}".format(readfile)
        mtag, utag = "--al", "--un"

    if mapped:
        cmd += " {0} {1}".format(mtag, mapped)
    if unmapped:
        cmd += " {0} {1}".format(utag, unmapped)

    if firstN:
        cmd += " --upto {0}".format(firstN)
    cmd += " -p {0}".format(opts.cpus)
    if fasta:
        cmd += " -f"
    else:
        cmd += " --phred{0}".format(offset)
    cmd += " {0}".format(gl)
    if opts.reorder:
        cmd += " --reorder"

    cmd += " {0}".format(extra)
    # Finally the log
    cmd += " 2> {0}".format(logfile)

    if opts.null:
        samfile = "/dev/null"

    cmd = output_bam(cmd, samfile)
    sh(cmd)
    print(open(logfile).read(), file=sys.stderr)

    return samfile, logfile