Exemple #1
0
def sampe(args, opts):
    """
    %prog sampe database.fasta read1.fq read2.fq

    Wrapper for `bwa sampe`. Output will be read1.sam.
    """
    dbfile, read1file, read2file = args
    dbfile = check_index(dbfile)
    sai1file = check_aln(dbfile, read1file, cpus=opts.cpus)
    sai2file = check_aln(dbfile, read2file, cpus=opts.cpus)

    samfile, _, unmapped = get_samfile(read1file,
                                       dbfile,
                                       bam=opts.bam,
                                       unmapped=opts.unmapped)
    if not need_update((dbfile, sai1file, sai2file), samfile):
        logging.error("`{0}` exists. `bwa samse` already run.".format(samfile))
        return "", samfile

    cmd = "bwa sampe " + " ".join(
        (dbfile, sai1file, sai2file, read1file, read2file))
    cmd += " " + opts.extra
    if opts.cutoff:
        cmd += " -a {0}".format(opts.cutoff)
    if opts.uniq:
        cmd += " -n 1"

    return cmd, samfile
Exemple #2
0
def sampe(args, opts):
    """
    %prog sampe database.fasta read1.fq read2.fq

    Wrapper for `bwa sampe`. Output will be read1.sam.
    """
    dbfile, read1file, read2file = args
    safile = check_index(dbfile)
    sai1file = check_aln(dbfile, read1file, cpus=opts.cpus)
    sai2file = check_aln(dbfile, read2file, cpus=opts.cpus)

    samfile, _, unmapped = get_samfile(read1file, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update((safile, sai1file, sai2file), samfile):
        logging.error("`{0}` exists. `bwa samse` already run.".format(samfile))
        return "", samfile

    cmd = "bwa sampe " + " ".join((dbfile, sai1file, sai2file, \
                                   read1file, read2file))
    cmd += " " + opts.extra
    if opts.cutoff:
        cmd += " -a {0}".format(opts.cutoff)
    if opts.uniq:
        cmd += " -n 1"

    return cmd, samfile
Exemple #3
0
def mem(args, opts):
    """
    %prog mem database.fasta read1.fq [read2.fq]

    Wrapper for `bwa mem`. Output will be read1.sam.
    """
    dbfile, read1file = args[:2]
    readtype = opts.readtype
    pl = readtype or "illumina"

    pf = op.basename(read1file).split(".")[0]
    rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:{1}".format(pf, pl)
    dbfile = check_index(dbfile)
    args[0] = dbfile
    samfile, _, unmapped = get_samfile(read1file,
                                       dbfile,
                                       bam=opts.bam,
                                       unmapped=opts.unmapped)
    if not need_update(read1file, samfile):
        logging.error("`{0}` exists. `bwa mem` already run.".format(samfile))
        return "", samfile

    cmd = "{} mem".format(opts.bwa)
    """
    -M Mark shorter split hits as secondary (for Picard compatibility).
    """
    cmd += " -M -t {0}".format(opts.cpus)
    cmd += ' -R "{0}"'.format(rg)
    if readtype:
        cmd += " -x {0}".format(readtype)
    cmd += " " + opts.extra
    cmd += " ".join(args)

    return cmd, samfile
Exemple #4
0
def mem(args, opts):
    """
    %prog mem database.fasta read1.fq [read2.fq]

    Wrapper for `bwa mem`. Output will be read1.sam.
    """
    dbfile, read1file = args[:2]

    pf = op.basename(read1file).split(".")[0]
    rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:illumina".format(pf)
    dbfile = check_index(dbfile)
    args[0] = dbfile
    samfile, _, unmapped = get_samfile(read1file, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update(read1file, samfile):
        logging.error("`{0}` exists. `bwa mem` already run.".format(samfile))
        return "", samfile

    cmd = "bwa mem " + " ".join(args)
    cmd += " -M -t {0}".format(opts.cpus)
    cmd += ' -R "{0}"'.format(rg)
    if opts.readtype:
        cmd += " -x {0}".format(opts.readtype)
    cmd += " " + opts.extra
    return cmd, samfile
Exemple #5
0
def mem(args, opts):
    """
    %prog mem database.fasta read1.fq [read2.fq]

    Wrapper for `bwa mem`. Output will be read1.sam.
    """
    dbfile, read1file = args[:2]
    readtype = opts.readtype
    pl = readtype or "illumina"

    pf = op.basename(read1file).split(".")[0]
    rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:{1}".format(pf, pl)
    dbfile = check_index(dbfile)
    args[0] = dbfile
    samfile, _, unmapped = get_samfile(read1file, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update(read1file, samfile):
        logging.error("`{0}` exists. `bwa mem` already run.".format(samfile))
        return "", samfile

    cmd = "{} mem".format(opts.bwa)
    '''
    -M Mark shorter split hits as secondary (for Picard compatibility).
    '''
    cmd += " -M -t {0}".format(opts.cpus)
    cmd += ' -R "{0}"'.format(rg)
    if readtype:
        cmd += " -x {0}".format(readtype)
    cmd += " " + opts.extra
    cmd += " ".join(args)

    return cmd, samfile
Exemple #6
0
def bwasw(args, opts):
    """
    %prog bwasw database.fasta long_read.fastq

    Wrapper for `bwa bwasw`. Output will be long_read.sam.
    """
    dbfile, readfile = args
    safile = check_index(dbfile)

    samfile, _, unmapped = get_samfile(readfile, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update(safile, samfile):
        logging.error("`{0}` exists. `bwa bwasw` already run.".format(samfile))
        return "", samfile

    cmd = "bwa bwasw " + " ".join(args)
    cmd += " -t {0}".format(opts.cpus)
    cmd += " " + opts.extra
    return cmd, samfile
Exemple #7
0
def mem(args, opts):
    """
    %prog mem database.fasta read1.fq [read2.fq]

    Wrapper for `bwa mem`. Output will be read1.sam.
    """
    dbfile, read1file = args[:2]

    check_index(dbfile)
    samfile, _, unmapped = get_samfile(read1file, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update(read1file, samfile):
        logging.error("`{0}` exists. `bwa mem` already run.".format(samfile))
        return "", samfile

    cmd = "bwa mem " + " ".join(args)
    cmd += " -t {0}".format(opts.cpus)
    cmd += " " + opts.extra
    return cmd, samfile
Exemple #8
0
def bwasw(args, opts):
    """
    %prog bwasw database.fasta long_read.fastq

    Wrapper for `bwa bwasw`. Output will be long_read.sam.
    """
    dbfile, readfile = args
    dbfile = check_index(dbfile)

    samfile, _, unmapped = get_samfile(readfile, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update(dbfile, samfile):
        logging.error("`{0}` exists. `bwa bwasw` already run.".format(samfile))
        return "", samfile

    cmd = "bwa bwasw " + " ".join(args)
    cmd += " -t {0}".format(opts.cpus)
    cmd += " " + opts.extra
    return cmd, samfile
Exemple #9
0
def samse(args, opts):
    """
    %prog samse database.fasta short_read.fastq

    Wrapper for `bwa samse`. Output will be short_read.sam.
    """
    dbfile, readfile = args
    safile = check_index(dbfile)
    saifile = check_aln(dbfile, readfile, cpus=opts.cpus)

    samfile, _, unmapped = get_samfile(readfile, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update((safile, saifile), samfile):
        logging.error("`{0}` exists. `bwa samse` already run.".format(samfile))
        return "", samfile

    cmd = "bwa samse {0} {1} {2}".format(dbfile, saifile, readfile)
    cmd += " " + opts.extra
    if opts.uniq:
        cmd += " -n 1"

    return cmd, samfile
Exemple #10
0
def samse(args, opts):
    """
    %prog samse database.fasta short_read.fastq

    Wrapper for `bwa samse`. Output will be short_read.sam.
    """
    dbfile, readfile = args
    dbfile = check_index(dbfile)
    saifile = check_aln(dbfile, readfile, cpus=opts.cpus)

    samfile, _, unmapped = get_samfile(readfile, dbfile,
                                       bam=opts.bam, unmapped=opts.unmapped)
    if not need_update((dbfile, saifile), samfile):
        logging.error("`{0}` exists. `bwa samse` already run.".format(samfile))
        return "", samfile

    cmd = "bwa samse {0} {1} {2}".format(dbfile, saifile, readfile)
    cmd += " " + opts.extra
    if opts.uniq:
        cmd += " -n 1"

    return cmd, samfile
Exemple #11
0
def mem(args, opts):
    """
    %prog mem database.fasta read1.fq [read2.fq]

    Wrapper for `bwa mem`. Output will be read1.sam.
    """
    dbfile, read1file = args[:2]

    check_index(dbfile)
    samfile, _, unmapped = get_samfile(read1file,
                                       dbfile,
                                       bam=opts.bam,
                                       unmapped=opts.unmapped)
    if not need_update(read1file, samfile):
        logging.error("`{0}` exists. `bwa mem` already run.".format(samfile))
        return "", samfile

    cmd = "bwa mem " + " ".join(args)
    cmd += " -t {0}".format(opts.cpus)
    if opts.readtype:
        cmd += " -x {0}".format(opts.readtype)
    cmd += " " + opts.extra
    return cmd, samfile
Exemple #12
0
def align(args):
    """
    %prog align database.fasta read1.fq [read2.fq]

    Wrapper for `bowtie2` single-end or paired-end, depending on the number of args.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(align.__doc__)
    p.set_firstN(firstN=0)
    p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]")
    p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]")
    p.set_cutoff(cutoff=800)
    p.set_mateorientation(mateorientation="+-")
    p.set_sam_options(bowtie=True)

    opts, args = p.parse_args(args)
    extra = opts.extra
    mo = opts.mateorientation
    if mo == "+-":
        extra += ""
    elif mo == "-+":
        extra += "--rf"
    else:
        extra += "--ff"

    PE = True
    if len(args) == 2:
        logging.debug("Single-end alignment")
        PE = False
    elif len(args) == 3:
        logging.debug("Paired-end alignment")
    else:
        sys.exit(not p.print_help())

    firstN = opts.firstN
    mapped = opts.mapped
    unmapped = opts.unmapped
    gl = "--end-to-end" if opts.full else "--local"

    dbfile, readfile = args[0:2]
    dbfile = get_abs_path(dbfile)
    safile = check_index(dbfile)
    prefix = get_prefix(readfile, dbfile)
    samfile, mapped, unmapped = get_samfile(
        readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam
    )
    logfile = prefix + ".log"
    offset = guessoffset([readfile])

    if not need_update(safile, samfile):
        logging.error("`{0}` exists. `bowtie2` already run.".format(samfile))
        return samfile, logfile

    cmd = "bowtie2 -x {0}".format(dbfile)
    if PE:
        r1, r2 = args[1:3]
        cmd += " -1 {0} -2 {1}".format(r1, r2)
        cmd += " --maxins {0}".format(opts.cutoff)
        mtag, utag = "--al-conc", "--un-conc"
    else:
        cmd += " -U {0}".format(readfile)
        mtag, utag = "--al", "--un"

    if mapped:
        cmd += " {0} {1}".format(mtag, mapped)
    if unmapped:
        cmd += " {0} {1}".format(utag, unmapped)

    if firstN:
        cmd += " --upto {0}".format(firstN)
    cmd += " -p {0}".format(opts.cpus)
    cmd += " --phred{0}".format(offset)
    cmd += " {0}".format(gl)
    if opts.reorder:
        cmd += " --reorder"

    cmd += " {0}".format(extra)
    # Finally the log
    cmd += " 2> {0}".format(logfile)

    cmd = output_bam(cmd, samfile)
    sh(cmd)
    print >>sys.stderr, open(logfile).read()

    return samfile, logfile
Exemple #13
0
def align(args):
    """
    %prog align database.fasta read1.fq [read2.fq]

    Wrapper for `bowtie2` single-end or paired-end, depending on the number of args.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(align.__doc__)
    p.set_firstN(firstN=0)
    p.add_option("--full",
                 default=False,
                 action="store_true",
                 help="Enforce end-to-end alignment [default: local]")
    p.add_option("--reorder",
                 default=False,
                 action="store_true",
                 help="Keep the input read order [default: %default]")
    p.add_option("--null",
                 default=False,
                 action="store_true",
                 help="Do not write to SAM/BAM output")
    p.add_option("--fasta",
                 default=False,
                 action="store_true",
                 help="Query reads are FASTA")
    p.set_cutoff(cutoff=800)
    p.set_mateorientation(mateorientation="+-")
    p.set_sam_options(bowtie=True)

    opts, args = p.parse_args(args)
    extra = opts.extra
    mo = opts.mateorientation
    if mo == '+-':
        extra += ""
    elif mo == '-+':
        extra += "--rf"
    else:
        extra += "--ff"

    PE = True
    if len(args) == 2:
        logging.debug("Single-end alignment")
        PE = False
    elif len(args) == 3:
        logging.debug("Paired-end alignment")
    else:
        sys.exit(not p.print_help())

    firstN = opts.firstN
    mapped = opts.mapped
    unmapped = opts.unmapped
    fasta = opts.fasta
    gl = "--end-to-end" if opts.full else "--local"

    dbfile, readfile = args[0:2]
    dbfile = check_index(dbfile)
    prefix = get_prefix(readfile, dbfile)
    samfile, mapped, unmapped = get_samfile(readfile,
                                            dbfile,
                                            bowtie=True,
                                            mapped=mapped,
                                            unmapped=unmapped,
                                            bam=opts.bam)
    logfile = prefix + ".log"
    if not fasta:
        offset = guessoffset([readfile])

    if not need_update(dbfile, samfile):
        logging.error("`{0}` exists. `bowtie2` already run.".format(samfile))
        return samfile, logfile

    cmd = "bowtie2 -x {0}".format(dbfile)
    if PE:
        r1, r2 = args[1:3]
        cmd += " -1 {0} -2 {1}".format(r1, r2)
        cmd += " --maxins {0}".format(opts.cutoff)
        mtag, utag = "--al-conc", "--un-conc"
    else:
        cmd += " -U {0}".format(readfile)
        mtag, utag = "--al", "--un"

    if mapped:
        cmd += " {0} {1}".format(mtag, mapped)
    if unmapped:
        cmd += " {0} {1}".format(utag, unmapped)

    if firstN:
        cmd += " --upto {0}".format(firstN)
    cmd += " -p {0}".format(opts.cpus)
    if fasta:
        cmd += " -f"
    else:
        cmd += " --phred{0}".format(offset)
    cmd += " {0}".format(gl)
    if opts.reorder:
        cmd += " --reorder"

    cmd += " {0}".format(extra)
    # Finally the log
    cmd += " 2> {0}".format(logfile)

    if opts.null:
        samfile = "/dev/null"

    cmd = output_bam(cmd, samfile)
    sh(cmd)
    print(open(logfile).read(), file=sys.stderr)

    return samfile, logfile