def get_accuracy(query, gff_file, evidences_bed, sizesfile, type, key): """ Get sensitivity, specificity and accuracy given gff_file, and a query range that look like "chr1:1-10000". """ from jcvi.formats.bed import evaluate bed_file = get_bed_file(gff_file, type, key) b = evaluate([bed_file, evidences_bed, sizesfile, "--query={0}".format(query)]) return b
def batcheval(args): """ %prog batcheval model.ids gff_file evidences.bed fastafile Get the accuracy for a list of models against evidences in the range of the genes. For example: $ %prog batcheval all.gff3 isoforms.ids proteins.bed scaffolds.fasta Outfile contains the scores for the models can be found in models.scores """ from jcvi.formats.bed import evaluate from jcvi.formats.gff import make_index p = OptionParser(evaluate.__doc__) p.add_option( "--type", default="CDS", help="list of features to extract, use comma to separate (e.g." "'five_prime_UTR,CDS,three_prime_UTR') [default: %default]") opts, args = p.parse_args(args) if len(args) != 4: sys.exit(not p.print_help()) model_ids, gff_file, evidences_bed, fastafile = args type = set(opts.type.split(",")) g = make_index(gff_file) fp = open(model_ids) prefix = model_ids.rsplit(".", 1)[0] fwscores = open(prefix + ".scores", "w") for row in fp: cid = row.strip() b = next(g.parents(cid, 1)) query = "{0}:{1}-{2}".format(b.chrom, b.start, b.stop) children = [c for c in g.children(cid, 1)] cidbed = prefix + ".bed" fw = open(cidbed, "w") for c in children: if c.featuretype not in type: continue fw.write(c.to_bed()) fw.close() b = evaluate( [cidbed, evidences_bed, fastafile, "--query={0}".format(query)]) print("\t".join((cid, b.score)), file=fwscores) fwscores.flush()
def batcheval(args): """ %prog batcheval model.ids gff_file evidences.bed fastafile Get the accuracy for a list of models against evidences in the range of the genes. For example: $ %prog batcheval all.gff3 isoforms.ids proteins.bed scaffolds.fasta Outfile contains the scores for the models can be found in models.scores """ from jcvi.formats.bed import evaluate from jcvi.formats.gff import make_index p = OptionParser(evaluate.__doc__) p.add_option("--type", default="CDS", help="list of features to extract, use comma to separate (e.g." "'five_prime_UTR,CDS,three_prime_UTR') [default: %default]") opts, args = p.parse_args(args) if len(args) != 4: sys.exit(not p.print_help()) model_ids, gff_file, evidences_bed, fastafile = args type = set(opts.type.split(",")) g = make_index(gff_file) fp = open(model_ids) prefix = model_ids.rsplit(".", 1)[0] fwscores = open(prefix + ".scores", "w") for row in fp: cid = row.strip() b = g.parents(cid, 1).next() query = "{0}:{1}-{2}".format(b.chrom, b.start, b.stop) children = [c for c in g.children(cid, 1)] cidbed = prefix + ".bed" fw = open(cidbed, "w") for c in children: if c.featuretype not in type: continue fw.write(c.to_bed()) fw.close() b = evaluate([cidbed, evidences_bed, fastafile, "--query={0}".format(query)]) print >> fwscores, "\t".join((cid, b.score)) fwscores.flush()