def write_misclassified(self, tax_assign_map, tax_assign_conf, out_fname): t = self.taxonomy with open(out_fname, 'w') as fout: fout.write("; List of misclassified sequences.\n") fout.write("; Format: 4 lines per sequence\n") fout.write("; 1 sequence ID\n") fout.write("; 2 correct assignment\n") fout.write("; 3 epatax assignment\n") fout.write("; 4 rank confidence levels\n\n") for sid in self.miss_list: fout.write(sid + "\n") true_ranks = t.get_seq_ranks(sid) fout.write(Taxonomy.lineage_str(true_ranks) + "\n") epa_ranks = tax_assign_map[sid] fout.write(Taxonomy.lineage_str(epa_ranks) + "\n") rank_conf = tax_assign_conf[sid] fout.write("\t".join(["%.3f" % conf for conf in rank_conf]) + "\n") fout.write("\n")
if out_stem.endswith(".mis"): out_stem = out_stem[:-4] # mis_fname = os.path.join(out_path, out_stem + ".mis") e = EpataxEvaluator(config, args.taxonomy_fname) e.calc_mislabel_stats(mislabel_map, true_taxonomy.seq_ranks_map, False) fp_fname = os.path.join(out_path, out_stem + ".eval%d.fp" % int(args.min_conf * 100)) with open(fp_fname, "w") as fout: for sid in e.fp: ranks = mislabel_map[sid]["ranks"] lvl = mislabel_map[sid]["mis_rank"] true_ranks = e.taxonomy.get_seq_ranks(sid) fout.write("%s\t%s\t%s\t%s\n" % (sid, lvl, Taxonomy.lineage_str(true_ranks), Taxonomy.lineage_str(ranks))) tp_fname = os.path.join(out_path, out_stem + ".eval%d.tp" % int(args.min_conf * 100)) with open(tp_fname, "w") as fout: for sid in e.tp: ranks = mislabel_map[sid]["ranks"] fout.write("%s\t%s\n" % (sid, Taxonomy.lineage_str(ranks))) fn_fname = os.path.join(out_path, out_stem + ".eval%d.fn" % int(args.min_conf * 100)) with open(fn_fname, "w") as fout: for sid in e.fn: true_ranks = true_taxonomy.get_seq_ranks(sid) ranks = e.taxonomy.get_seq_ranks(sid)
if args.output_dir: out_path = args.output_dir if out_stem.endswith(".mis"): out_stem = out_stem[:-4] # mis_fname = os.path.join(out_path, out_stem + ".mis") e = EpataxEvaluator(config, args.taxonomy_fname) e.calc_mislabel_stats(mislabel_map, true_taxonomy.seq_ranks_map, False) fp_fname = os.path.join(out_path, out_stem + ".eval%d.fp" % int(args.min_conf * 100)) with open(fp_fname, "w") as fout: for sid in e.fp: ranks = mislabel_map[sid]["ranks"] lvl = mislabel_map[sid]["mis_rank"] true_ranks = e.taxonomy.get_seq_ranks(sid) fout.write("%s\t%s\t%s\t%s\n" % (sid, lvl, Taxonomy.lineage_str(true_ranks), Taxonomy.lineage_str(ranks))) tp_fname = os.path.join(out_path, out_stem + ".eval%d.tp" % int(args.min_conf * 100)) with open(tp_fname, "w") as fout: for sid in e.tp: ranks = mislabel_map[sid]["ranks"] fout.write("%s\t%s\n" % (sid, Taxonomy.lineage_str(ranks))) fn_fname = os.path.join(out_path, out_stem + ".eval%d.fn" % int(args.min_conf * 100)) with open(fn_fname, "w") as fout: for sid in e.fn: true_ranks = true_taxonomy.get_seq_ranks(sid) ranks = e.taxonomy.get_seq_ranks(sid) fout.write("%s\t%s\t%s\n" % (sid, Taxonomy.lineage_str(true_ranks), Taxonomy.lineage_str(ranks))) full_fname = os.path.join(out_path, out_stem + ".eval%d.full" % int(args.min_conf * 100))