예제 #1
0
파일: quota.py 프로젝트: rrane/jcvi
def main(args):
    p = OptionParser(__doc__)

    p.set_beds()
    p.add_option("--quota", default="1:1",
            help="`quota mapping` procedure -- screen blocks to constrain mapping"\
                    " (useful for orthology), "\
                    "put in the format like (#subgenomes expected for genome X):"\
                    "(#subgenomes expected for genome Y) "\
                    "[default: %default]")
    p.add_option("--Nm", dest="Nmax", type="int", default=10,
            help="distance cutoff to tolerate two blocks that are "\
                    "slightly overlapping (cutoff for `quota mapping`) "\
                    "[default: %default units (gene or bp dist)]")

    supported_solvers = ("SCIP", "GLPK")
    p.add_option("--self", dest="self_match",
            action="store_true", default=False,
            help="you might turn this on when screening paralogous blocks, "\
                 "esp. if you have reduced mirrored blocks into non-redundant set")
    p.add_option("--solver", default="SCIP", choices=supported_solvers,
            help="use MIP solver [default: %default]")
    p.add_option("--verbose", action="store_true",
            default=False, help="show verbose solver output")

    p.add_option("--screen", default=False, action="store_true",
            help="generate new anchors file [default: %default]")

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    qa_file, = args
    qbed, sbed, qorder, sorder, is_self = check_beds(qa_file, p, opts)

    # sanity check for the quota
    if opts.quota:
        try:
            qa, qb = opts.quota.split(":")
            qa, qb = int(qa), int(qb)
        except:
            print >> sys.stderr, "quota string should be the form x:x (2:4, 1:3, etc.)"
            sys.exit(1)

        if opts.self_match and qa != qb:
            raise Exception, "when comparing genome to itself, " \
                    "quota must be the same number " \
                    "(like 1:1, 2:2) you have %s" % opts.quota
        quota = (qa, qb)

    self_match = opts.self_match

    clusters = read_clusters(qa_file, qorder, sorder)
    for cluster in clusters:
        assert len(cluster) > 0

    # below runs `quota mapping`
    work_dir = op.join(op.dirname(op.abspath(qa_file)), "work")

    selected_ids = solve_lp(clusters, quota, work_dir=work_dir, \
            Nmax=opts.Nmax, self_match=self_match, \
            solver=opts.solver, verbose=opts.verbose)

    logging.debug("Selected {0} blocks.".format(len(selected_ids)))
    prefix = qa_file.rsplit(".", 1)[0]
    suffix = "{0}x{1}".format(qa, qb)
    outfile = ".".join((prefix, suffix))
    fw = must_open(outfile, "w")
    print >> fw, ",".join(str(x) for x in selected_ids)
    fw.close()
    logging.debug("Screened blocks ids written to `{0}`.".format(outfile))

    if opts.screen:
        from jcvi.compara.synteny import screen

        new_qa_file = ".".join((prefix, suffix, "anchors"))
        largs = [qa_file, new_qa_file, "--ids", outfile]
        if opts.qbed and opts.sbed:
            largs += ["--qbed={0}".format(opts.qbed)]
            largs += ["--sbed={0}".format(opts.sbed)]
        screen(largs)
예제 #2
0
파일: quota.py 프로젝트: zhimenggan/jcvi
def main(args):
    p = OptionParser(__doc__)

    p.set_beds()
    p.add_option(
        "--quota",
        default="1:1",
        help="`quota mapping` procedure -- screen blocks to constrain mapping"
        " (useful for orthology), "
        "put in the format like (#subgenomes expected for genome X):"
        "(#subgenomes expected for genome Y) "
        "[default: %default]")
    p.add_option("--Nm",
                 dest="Nmax",
                 type="int",
                 default=10,
                 help="distance cutoff to tolerate two blocks that are "
                 "slightly overlapping (cutoff for `quota mapping`) "
                 "[default: %default units (gene or bp dist)]")

    supported_solvers = ("SCIP", "GLPK")
    p.add_option(
        "--self",
        dest="self_match",
        action="store_true",
        default=False,
        help="you might turn this on when screening paralogous blocks, "
        "esp. if you have reduced mirrored blocks into non-redundant set")
    p.add_option("--solver",
                 default="SCIP",
                 choices=supported_solvers,
                 help="use MIP solver [default: %default]")
    p.set_verbose(help="Show verbose solver output")

    p.add_option("--screen",
                 default=False,
                 action="store_true",
                 help="generate new anchors file [default: %default]")

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    qa_file, = args
    qbed, sbed, qorder, sorder, is_self = check_beds(qa_file, p, opts)

    # sanity check for the quota
    if opts.quota:
        try:
            qa, qb = opts.quota.split(":")
            qa, qb = int(qa), int(qb)
        except:
            print("quota string should be the form x:x (2:4, 1:3, etc.)",
                  file=sys.stderr)
            sys.exit(1)

        if opts.self_match and qa != qb:
            raise Exception("when comparing genome to itself, "
                            "quota must be the same number "
                            "(like 1:1, 2:2) you have %s" % opts.quota)
        quota = (qa, qb)

    self_match = opts.self_match

    clusters = read_clusters(qa_file, qorder, sorder)
    for cluster in clusters:
        assert len(cluster) > 0

    # below runs `quota mapping`
    work_dir = op.join(op.dirname(op.abspath(qa_file)), "work")

    selected_ids = solve_lp(clusters,
                            quota,
                            work_dir=work_dir,
                            Nmax=opts.Nmax,
                            self_match=self_match,
                            solver=opts.solver,
                            verbose=opts.verbose)

    logging.debug("Selected {0} blocks.".format(len(selected_ids)))
    prefix = qa_file.rsplit(".", 1)[0]
    suffix = "{0}x{1}".format(qa, qb)
    outfile = ".".join((prefix, suffix))
    fw = must_open(outfile, "w")
    print(",".join(str(x) for x in selected_ids), file=fw)
    fw.close()
    logging.debug("Screened blocks ids written to `{0}`.".format(outfile))

    if opts.screen:
        from jcvi.compara.synteny import screen

        new_qa_file = ".".join((prefix, suffix, "anchors"))
        largs = [qa_file, new_qa_file, "--ids", outfile]
        if opts.qbed and opts.sbed:
            largs += ["--qbed={0}".format(opts.qbed)]
            largs += ["--sbed={0}".format(opts.sbed)]
        screen(largs)