Example #1
0
def main():
    """
    %prog database.fa query.fa [options]

    Run LASTZ similar to the BLAST interface, and generates -m8 tabular format
    """
    p = OptionParser(main.__doc__)

    supported_formats = tuple(x.strip() for x in \
        "lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, "\
        "sam-, softsam-, cigar, BLASTN, BLASTN-, differences, rdotplot, text".split(','))

    p.add_option("--format", default="BLASTN-", choices=supported_formats,
            help="Ooutput format [default: %default]")
    p.add_option("--path", dest="lastz_path", default=None,
            help="specify LASTZ path")
    p.add_option("--mask", dest="mask", default=False, action="store_true",
            help="treat lower-case letters as mask info [default: %default]")
    p.add_option("--similar", default=False, action="store_true",
            help="Use options tuned for close comparison [default: %default]")
    p.set_cpus(cpus=32)
    p.set_params()
    p.set_outfile()
    opts, args = p.parse_args()

    if len(args) != 2:
        sys.exit(p.print_help())

    bfasta_fn, afasta_fn = args
    for fn in (afasta_fn, bfasta_fn):
        assert op.exists(fn)

    afasta_fn = op.abspath(afasta_fn)
    bfasta_fn = op.abspath(bfasta_fn)
    out_fh = must_open(opts.outfile, "w")

    extra = opts.extra
    if opts.similar:
        extra += similarOptions

    lastz_bin = opts.lastz_path or "lastz"
    assert lastz_bin.endswith("lastz"), "You need to include lastz in your path"

    mask = opts.mask
    cpus = opts.cpus
    logging.debug("Dispatch job to %d cpus" % cpus)
    format = opts.format
    blastline = (format == "BLASTN-")

    # The axt, maf, etc. format can only be run on splitted database (i.e. one
    # FASTA record per file). The splitted files are then parallelized for the
    # computation, as opposed to splitting queries through "subsample".
    outdir = "outdir"
    if not blastline:
        from jcvi.formats.fasta import Fasta
        from jcvi.formats.chain import faToTwoBit

        mkdir(outdir)

        bfasta_2bit = faToTwoBit(bfasta_fn)
        bids = list(Fasta(bfasta_fn, lazy=True).iterkeys_ordered())

        apf = op.basename(afasta_fn).split(".")[0]
        args = []
        # bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format
        for id in bids:
            bfasta = "/".join((bfasta_2bit, id))
            outfile = op.join(outdir, "{0}.{1}.{2}".format(apf, id, format))
            args.append((bfasta, afasta_fn, outfile, \
                         lastz_bin, extra, mask, format))

        p = Pool(cpus)
        p.map(lastz_2bit, args)

        return

    lock = Lock()

    args = [(k + 1, cpus, bfasta_fn, afasta_fn, out_fh,
            lock, lastz_bin, extra, mask) for k in xrange(cpus)]
    g = Jobs(target=lastz, args=args)
    g.run()
Example #2
0
def main():
    """
    %prog database.fa query.fa [options]

    Run LASTZ similar to the BLAST interface, and generates -m8 tabular format
    """
    p = OptionParser(main.__doc__)

    supported_formats = tuple(x.strip() for x in \
        "lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, "\
        "sam-, softsam-, cigar, BLASTN, BLASTN-, differences, rdotplot, text".split(','))

    p.add_option("--format", default="BLASTN-", choices=supported_formats,
            help="Ooutput format [default: %default]")
    p.add_option("--path", dest="lastz_path", default=None,
            help="specify LASTZ path")
    p.add_option("--mask", dest="mask", default=False, action="store_true",
            help="treat lower-case letters as mask info [default: %default]")
    p.add_option("--similar", default=False, action="store_true",
            help="Use options tuned for close comparison [default: %default]")
    p.set_cpus(cpus=32)
    p.set_params()
    p.set_outfile()
    opts, args = p.parse_args()

    if len(args) != 2:
        sys.exit(p.print_help())

    bfasta_fn, afasta_fn = args
    for fn in (afasta_fn, bfasta_fn):
        assert op.exists(fn)

    afasta_fn = op.abspath(afasta_fn)
    bfasta_fn = op.abspath(bfasta_fn)
    out_fh = must_open(opts.outfile, "w")

    extra = opts.extra
    if opts.similar:
        extra += similarOptions

    lastz_bin = opts.lastz_path or "lastz"
    assert lastz_bin.endswith("lastz"), "You need to include lastz in your path"

    mask = opts.mask
    cpus = opts.cpus
    logging.debug("Dispatch job to %d cpus" % cpus)
    format = opts.format
    blastline = (format == "BLASTN-")

    # The axt, maf, etc. format can only be run on splitted database (i.e. one
    # FASTA record per file). The splitted files are then parallelized for the
    # computation, as opposed to splitting queries through "subsample".
    outdir = "outdir"
    if not blastline:
        from jcvi.formats.fasta import Fasta
        from jcvi.formats.chain import faToTwoBit

        mkdir(outdir)

        bfasta_2bit = faToTwoBit(bfasta_fn)
        bids = list(Fasta(bfasta_fn, lazy=True).iterkeys_ordered())

        apf = op.basename(afasta_fn).split(".")[0]
        args = []
        # bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format
        for id in bids:
            bfasta = "/".join((bfasta_2bit, id))
            outfile = op.join(outdir, "{0}.{1}.{2}".format(apf, id, format))
            args.append((bfasta, afasta_fn, outfile, \
                         lastz_bin, extra, mask, format))

        p = Pool(cpus)
        p.map(lastz_2bit, args)

        return

    lock = Lock()

    args = [(k + 1, cpus, bfasta_fn, afasta_fn, out_fh,
            lock, lastz_bin, extra, mask) for k in xrange(cpus)]
    g = Jobs(target=lastz, args=args)
    g.run()