def check_seq_tax_labels(self, seq_name, orig_ranks, ranks, lws): mislabel_lvl = -1 min_len = min(len(orig_ranks),len(ranks)) for rank_lvl in range(min_len): if ranks[rank_lvl] != Taxonomy.EMPTY_RANK and ranks[rank_lvl] != orig_ranks[rank_lvl]: mislabel_lvl = rank_lvl break if mislabel_lvl >= 0: real_lvl = self.guess_rank_level(orig_ranks, mislabel_lvl) mis_rec = {} mis_rec['name'] = EpacConfig.strip_ref_prefix(seq_name) mis_rec['orig_level'] = mislabel_lvl mis_rec['real_level'] = real_lvl mis_rec['level_name'] = self.rank_level_name(real_lvl)[1] mis_rec['inv_level'] = -1 * real_lvl # just for sorting mis_rec['orig_ranks'] = orig_ranks mis_rec['ranks'] = ranks mis_rec['lws'] = lws mis_rec['conf'] = lws[mislabel_lvl] self.mislabels.append(mis_rec) return mis_rec else: return None
def check_seq_ids(self): # check that seq IDs in taxonomy and alignment correspond self.mis_ids = [] for sid in self.taxonomy.seq_ranks_map.iterkeys(): unprefixed_sid = EpacConfig.strip_ref_prefix(sid) if not self.alignment.has_seq(unprefixed_sid): self.mis_ids.append(unprefixed_sid) if len(self.mis_ids) > 0 and self.verbose: errmsg = "ERROR: Following %d sequence(s) are missing in your alignment file:\n%s\n\n" % (len(self.mis_ids), "\n".join(self.mis_ids)) errmsg += "Please make sure sequence IDs in taxonomic annotation file and in alignment are identical!\n" self.cfg.exit_user_error(errmsg) return self.mis_ids
def check_seq_ids(self): # check that seq IDs in taxonomy and alignment correspond self.mis_ids = [] for sid in self.taxonomy.seq_ranks_map.iterkeys(): unprefixed_sid = EpacConfig.strip_ref_prefix(sid) if not self.alignment.has_seq(unprefixed_sid): self.mis_ids.append(unprefixed_sid) if len(self.mis_ids) > 0 and self.verbose: errmsg = "ERROR: Following %d sequence(s) are missing in your alignment file:\n%s\n\n" % ( len(self.mis_ids), "\n".join(self.mis_ids)) errmsg += "Please make sure sequence IDs in taxonomic annotation file and in alignment are identical!\n" self.cfg.exit_user_error(errmsg) return self.mis_ids
def mis_rec_to_string(self, mis_rec): lvl = mis_rec['orig_level'] uncorr_name = EpacConfig.strip_ref_prefix(self.refjson.get_uncorr_seqid(mis_rec['name'])) uncorr_orig_ranks = self.refjson.get_uncorr_ranks(mis_rec['orig_ranks']) uncorr_ranks = self.refjson.get_uncorr_ranks(mis_rec['ranks']) output = uncorr_name + "\t" if lvl >= 0: output += "%s\t%s\t%s\t%.3f\t" % (mis_rec['level_name'], uncorr_orig_ranks[lvl], uncorr_ranks[lvl], mis_rec['lws'][lvl]) else: output += "%s\t%s\t%s\t%.3f\t" % (mis_rec['level_name'], "NA", "NA", mis_rec['lws'][0]) output += Taxonomy.lineage_str(uncorr_orig_ranks) + "\t" output += Taxonomy.lineage_str(uncorr_ranks) + "\t" output += ";".join(["%.3f" % conf for conf in mis_rec['lws']]) if 'rank_conf' in mis_rec: output += "\t%.3f" % mis_rec['rank_conf'] return output
def mis_rec_to_string(self, mis_rec): lvl = mis_rec['orig_level'] uncorr_name = EpacConfig.strip_ref_prefix( self.refjson.get_uncorr_seqid(mis_rec['name'])) uncorr_orig_ranks = self.refjson.get_uncorr_ranks( mis_rec['orig_ranks']) uncorr_ranks = self.refjson.get_uncorr_ranks(mis_rec['ranks']) output = uncorr_name + "\t" if lvl >= 0: output += "%s\t%s\t%s\t%.3f\t" % ( mis_rec['level_name'], uncorr_orig_ranks[lvl], uncorr_ranks[lvl], mis_rec['lws'][lvl]) else: output += "%s\t%s\t%s\t%.3f\t" % (mis_rec['level_name'], "NA", "NA", mis_rec['lws'][0]) output += Taxonomy.lineage_str(uncorr_orig_ranks) + "\t" output += Taxonomy.lineage_str(uncorr_ranks) + "\t" output += ";".join(["%.3f" % conf for conf in mis_rec['lws']]) if 'rank_conf' in mis_rec: output += "\t%.3f" % mis_rec['rank_conf'] return output