Esempio n. 1
0
    def write_misclassified(self, tax_assign_map, tax_assign_conf, out_fname):
        t = self.taxonomy        
        with open(out_fname, 'w') as fout:
            fout.write("; List of misclassified sequences.\n")
            fout.write("; Format: 4 lines per sequence\n")
            fout.write("; 1 sequence ID\n")
            fout.write("; 2 correct assignment\n")
            fout.write("; 3 epatax assignment\n")
            fout.write("; 4 rank confidence levels\n\n")

            for sid in self.miss_list:
                fout.write(sid + "\n")
                true_ranks = t.get_seq_ranks(sid)
                fout.write(Taxonomy.lineage_str(true_ranks) + "\n")
                epa_ranks = tax_assign_map[sid]
                fout.write(Taxonomy.lineage_str(epa_ranks) + "\n")
                rank_conf = tax_assign_conf[sid]
                fout.write("\t".join(["%.3f" % conf for conf in rank_conf]) + "\n")
                fout.write("\n")
Esempio n. 2
0
    def write_misclassified(self, tax_assign_map, tax_assign_conf, out_fname):
        t = self.taxonomy
        with open(out_fname, 'w') as fout:
            fout.write("; List of misclassified sequences.\n")
            fout.write("; Format: 4 lines per sequence\n")
            fout.write("; 1 sequence ID\n")
            fout.write("; 2 correct assignment\n")
            fout.write("; 3 epatax assignment\n")
            fout.write("; 4 rank confidence levels\n\n")

            for sid in self.miss_list:
                fout.write(sid + "\n")
                true_ranks = t.get_seq_ranks(sid)
                fout.write(Taxonomy.lineage_str(true_ranks) + "\n")
                epa_ranks = tax_assign_map[sid]
                fout.write(Taxonomy.lineage_str(epa_ranks) + "\n")
                rank_conf = tax_assign_conf[sid]
                fout.write("\t".join(["%.3f" % conf
                                      for conf in rank_conf]) + "\n")
                fout.write("\n")
    if out_stem.endswith(".mis"):
        out_stem = out_stem[:-4]
#    mis_fname = os.path.join(out_path, out_stem + ".mis")

    e = EpataxEvaluator(config, args.taxonomy_fname)
    e.calc_mislabel_stats(mislabel_map, true_taxonomy.seq_ranks_map, False)

    fp_fname = os.path.join(out_path,
                            out_stem + ".eval%d.fp" % int(args.min_conf * 100))
    with open(fp_fname, "w") as fout:
        for sid in e.fp:
            ranks = mislabel_map[sid]["ranks"]
            lvl = mislabel_map[sid]["mis_rank"]
            true_ranks = e.taxonomy.get_seq_ranks(sid)
            fout.write("%s\t%s\t%s\t%s\n" %
                       (sid, lvl, Taxonomy.lineage_str(true_ranks),
                        Taxonomy.lineage_str(ranks)))

    tp_fname = os.path.join(out_path,
                            out_stem + ".eval%d.tp" % int(args.min_conf * 100))
    with open(tp_fname, "w") as fout:
        for sid in e.tp:
            ranks = mislabel_map[sid]["ranks"]
            fout.write("%s\t%s\n" % (sid, Taxonomy.lineage_str(ranks)))

    fn_fname = os.path.join(out_path,
                            out_stem + ".eval%d.fn" % int(args.min_conf * 100))
    with open(fn_fname, "w") as fout:
        for sid in e.fn:
            true_ranks = true_taxonomy.get_seq_ranks(sid)
            ranks = e.taxonomy.get_seq_ranks(sid)
    if args.output_dir:
        out_path = args.output_dir
    if out_stem.endswith(".mis"):
        out_stem = out_stem[:-4]
#    mis_fname = os.path.join(out_path, out_stem + ".mis")
    
    e = EpataxEvaluator(config, args.taxonomy_fname)
    e.calc_mislabel_stats(mislabel_map, true_taxonomy.seq_ranks_map, False)
    
    fp_fname = os.path.join(out_path, out_stem + ".eval%d.fp" % int(args.min_conf * 100))
    with open(fp_fname, "w") as fout:
        for sid in e.fp:
            ranks = mislabel_map[sid]["ranks"]
            lvl = mislabel_map[sid]["mis_rank"]
            true_ranks = e.taxonomy.get_seq_ranks(sid)
            fout.write("%s\t%s\t%s\t%s\n" % (sid, lvl, Taxonomy.lineage_str(true_ranks), Taxonomy.lineage_str(ranks)))

    tp_fname = os.path.join(out_path, out_stem + ".eval%d.tp" % int(args.min_conf * 100))
    with open(tp_fname, "w") as fout:
        for sid in e.tp:
            ranks = mislabel_map[sid]["ranks"]
            fout.write("%s\t%s\n" % (sid, Taxonomy.lineage_str(ranks)))

    fn_fname = os.path.join(out_path, out_stem + ".eval%d.fn" % int(args.min_conf * 100))
    with open(fn_fname, "w") as fout:
        for sid in e.fn:
            true_ranks = true_taxonomy.get_seq_ranks(sid)
            ranks = e.taxonomy.get_seq_ranks(sid)
            fout.write("%s\t%s\t%s\n" % (sid, Taxonomy.lineage_str(true_ranks), Taxonomy.lineage_str(ranks)))

    full_fname = os.path.join(out_path, out_stem + ".eval%d.full" % int(args.min_conf * 100))