def main(args): p = OptionParser(__doc__) p.set_beds() p.add_option("--quota", default="1:1", help="`quota mapping` procedure -- screen blocks to constrain mapping"\ " (useful for orthology), "\ "put in the format like (#subgenomes expected for genome X):"\ "(#subgenomes expected for genome Y) "\ "[default: %default]") p.add_option("--Nm", dest="Nmax", type="int", default=10, help="distance cutoff to tolerate two blocks that are "\ "slightly overlapping (cutoff for `quota mapping`) "\ "[default: %default units (gene or bp dist)]") supported_solvers = ("SCIP", "GLPK") p.add_option("--self", dest="self_match", action="store_true", default=False, help="you might turn this on when screening paralogous blocks, "\ "esp. if you have reduced mirrored blocks into non-redundant set") p.add_option("--solver", default="SCIP", choices=supported_solvers, help="use MIP solver [default: %default]") p.add_option("--verbose", action="store_true", default=False, help="show verbose solver output") p.add_option("--screen", default=False, action="store_true", help="generate new anchors file [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) qa_file, = args qbed, sbed, qorder, sorder, is_self = check_beds(qa_file, p, opts) # sanity check for the quota if opts.quota: try: qa, qb = opts.quota.split(":") qa, qb = int(qa), int(qb) except: print >> sys.stderr, "quota string should be the form x:x (2:4, 1:3, etc.)" sys.exit(1) if opts.self_match and qa != qb: raise Exception, "when comparing genome to itself, " \ "quota must be the same number " \ "(like 1:1, 2:2) you have %s" % opts.quota quota = (qa, qb) self_match = opts.self_match clusters = read_clusters(qa_file, qorder, sorder) for cluster in clusters: assert len(cluster) > 0 # below runs `quota mapping` work_dir = op.join(op.dirname(op.abspath(qa_file)), "work") selected_ids = solve_lp(clusters, quota, work_dir=work_dir, \ Nmax=opts.Nmax, self_match=self_match, \ solver=opts.solver, verbose=opts.verbose) logging.debug("Selected {0} blocks.".format(len(selected_ids))) prefix = qa_file.rsplit(".", 1)[0] suffix = "{0}x{1}".format(qa, qb) outfile = ".".join((prefix, suffix)) fw = must_open(outfile, "w") print >> fw, ",".join(str(x) for x in selected_ids) fw.close() logging.debug("Screened blocks ids written to `{0}`.".format(outfile)) if opts.screen: from jcvi.compara.synteny import screen new_qa_file = ".".join((prefix, suffix, "anchors")) largs = [qa_file, new_qa_file, "--ids", outfile] if opts.qbed and opts.sbed: largs += ["--qbed={0}".format(opts.qbed)] largs += ["--sbed={0}".format(opts.sbed)] screen(largs)
def main(args): p = OptionParser(__doc__) p.set_beds() p.add_option( "--quota", default="1:1", help="`quota mapping` procedure -- screen blocks to constrain mapping" " (useful for orthology), " "put in the format like (#subgenomes expected for genome X):" "(#subgenomes expected for genome Y) " "[default: %default]") p.add_option("--Nm", dest="Nmax", type="int", default=10, help="distance cutoff to tolerate two blocks that are " "slightly overlapping (cutoff for `quota mapping`) " "[default: %default units (gene or bp dist)]") supported_solvers = ("SCIP", "GLPK") p.add_option( "--self", dest="self_match", action="store_true", default=False, help="you might turn this on when screening paralogous blocks, " "esp. if you have reduced mirrored blocks into non-redundant set") p.add_option("--solver", default="SCIP", choices=supported_solvers, help="use MIP solver [default: %default]") p.set_verbose(help="Show verbose solver output") p.add_option("--screen", default=False, action="store_true", help="generate new anchors file [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) qa_file, = args qbed, sbed, qorder, sorder, is_self = check_beds(qa_file, p, opts) # sanity check for the quota if opts.quota: try: qa, qb = opts.quota.split(":") qa, qb = int(qa), int(qb) except: print("quota string should be the form x:x (2:4, 1:3, etc.)", file=sys.stderr) sys.exit(1) if opts.self_match and qa != qb: raise Exception("when comparing genome to itself, " "quota must be the same number " "(like 1:1, 2:2) you have %s" % opts.quota) quota = (qa, qb) self_match = opts.self_match clusters = read_clusters(qa_file, qorder, sorder) for cluster in clusters: assert len(cluster) > 0 # below runs `quota mapping` work_dir = op.join(op.dirname(op.abspath(qa_file)), "work") selected_ids = solve_lp(clusters, quota, work_dir=work_dir, Nmax=opts.Nmax, self_match=self_match, solver=opts.solver, verbose=opts.verbose) logging.debug("Selected {0} blocks.".format(len(selected_ids))) prefix = qa_file.rsplit(".", 1)[0] suffix = "{0}x{1}".format(qa, qb) outfile = ".".join((prefix, suffix)) fw = must_open(outfile, "w") print(",".join(str(x) for x in selected_ids), file=fw) fw.close() logging.debug("Screened blocks ids written to `{0}`.".format(outfile)) if opts.screen: from jcvi.compara.synteny import screen new_qa_file = ".".join((prefix, suffix, "anchors")) largs = [qa_file, new_qa_file, "--ids", outfile] if opts.qbed and opts.sbed: largs += ["--qbed={0}".format(opts.qbed)] largs += ["--sbed={0}".format(opts.sbed)] screen(largs)