def ortholog(args): """ %prog ortholog species_a species_b Run a sensitive pipeline to find orthologs between two species a and b. The pipeline runs LAST and generate .lifted.anchors. `--full` mode would assume 1-to-1 quota synteny blocks as the backbone of such predictions. Extra orthologs will be recruited from reciprocal best match (RBH). """ from jcvi.apps.last import main as last_main from jcvi.compara.blastfilter import main as blastfilter_main from jcvi.compara.quota import main as quota_main from jcvi.compara.synteny import scan, mcscan, liftover from jcvi.formats.blast import cscore, filter p = OptionParser(ortholog.__doc__) p.add_option("--full", default=False, action="store_true", help="Run in full mode, including blocks and RBH") p.add_option("--cscore", default=0.7, type="float", help="C-score cutoff [default: %default]") p.add_option("--dist", default=20, type="int", help="Extent of flanking regions to search") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) a, b = args abed, afasta = a + ".bed", a + ".cds" bbed, bfasta = b + ".bed", b + ".cds" ccscore = opts.cscore dist = "--dist={0}".format(opts.dist) aprefix = afasta.split(".")[0] bprefix = bfasta.split(".")[0] pprefix = ".".join((aprefix, bprefix)) qprefix = ".".join((bprefix, aprefix)) last = pprefix + ".last" if need_update((afasta, bfasta), last): last_main([bfasta, afasta, "-o", last]) if a == b: last = filter([last, "--hitlen=0", "--pctid=98", "--inverse"]) filtered_last = last + ".filtered" if need_update(last, filtered_last): blastfilter_main([last, "--cscore={0}".format(ccscore)]) anchors = pprefix + ".anchors" lifted_anchors = pprefix + ".lifted.anchors" if not opts.full: if need_update(filtered_last, lifted_anchors): scan([filtered_last, anchors, dist, "--liftover={0}".format(last)]) return if need_update(filtered_last, anchors): scan([filtered_last, anchors, dist]) ooanchors = pprefix + ".1x1.anchors" if need_update(anchors, ooanchors): quota_main([anchors, "--quota=1:1", "--screen"]) lifted_anchors = pprefix + ".1x1.lifted.anchors" if need_update((last, ooanchors), lifted_anchors): liftover([last, ooanchors, dist]) pblocks = pprefix + ".1x1.blocks" qblocks = qprefix + ".1x1.blocks" if need_update(lifted_anchors, [pblocks, qblocks]): mcscan([abed, lifted_anchors, "--iter=1", "-o", pblocks]) mcscan([bbed, lifted_anchors, "--iter=1", "-o", qblocks]) rbh = pprefix + ".rbh" if need_update(last, rbh): cscore([last, "-o", rbh]) portho = pprefix + ".ortholog" qortho = qprefix + ".ortholog" if need_update([pblocks, qblocks, rbh], [portho, qortho]): make_ortholog(pblocks, rbh, portho) make_ortholog(qblocks, rbh, qortho)
def ortholog(args): """ %prog ortholog a.bed a.cds b.bed b.cds Run a sensitive pipeline to find orthologs between two species a and b. The pipeline runs LAST and 1-to-1 quota synteny blocks as the backbone of such predictions. Extra orthologs will be recruited from reciprocal best match (RBH). """ from jcvi.apps.last import main as last_main from jcvi.compara.blastfilter import main as blastfilter_main from jcvi.compara.quota import main as quota_main from jcvi.compara.synteny import scan, mcscan, liftover from jcvi.formats.blast import cscore p = OptionParser(ortholog.__doc__) p.add_option("--cscore", default=0.99, type="float", help="C-score cutoff [default: %default]") opts, args = p.parse_args(args) if len(args) != 4: sys.exit(not p.print_help()) abed, afasta, bbed, bfasta = args ccscore = opts.cscore aprefix = afasta.split(".")[0] bprefix = bfasta.split(".")[0] pprefix = ".".join((aprefix, bprefix)) qprefix = ".".join((bprefix, aprefix)) last = pprefix + ".last" if need_update((afasta, bfasta), last): last_main([bfasta, afasta, "-o", last]) filtered_last = last + ".filtered" bstring = ["--qbed=" + abed, "--sbed=" + bbed] if need_update(last, filtered_last): blastfilter_main([last, "--cscore={0}".format(ccscore), "--tandem_Nmax=10"] + bstring) anchors = pprefix + ".anchors" lifted_anchors = pprefix + ".lifted.anchors" if need_update(filtered_last, lifted_anchors): scan([filtered_last, anchors] + bstring) ooanchors = pprefix + ".1x1.anchors" if need_update(anchors, ooanchors): quota_main([anchors, "--quota=1:1", "--screen"] + bstring) lifted_anchors = pprefix + ".1x1.lifted.anchors" if need_update((last, ooanchors), lifted_anchors): liftover([last, ooanchors] + bstring) pblocks = pprefix + ".1x1.blocks" qblocks = qprefix + ".1x1.blocks" if need_update(lifted_anchors, [pblocks, qblocks]): mcscan([abed, lifted_anchors, "--iter=1", "-o", pblocks]) mcscan([bbed, lifted_anchors, "--iter=1", "-o", qblocks]) rbh = pprefix + ".rbh" if need_update(last, rbh): cscore([last, "-o", rbh]) portho = pprefix + ".ortholog" qortho = qprefix + ".ortholog" if need_update([pblocks, qblocks, rbh], [portho, qortho]): make_ortholog(pblocks, rbh, portho) make_ortholog(qblocks, rbh, qortho)