def validate(self, options): """Validate command""" check_file_exists(options.taxonomy_file) taxonomy = Taxonomy() t = taxonomy.read(options.taxonomy_file) errors = taxonomy.validate(t, check_prefixes=not options.no_prefix, check_ranks=not options.no_all_ranks, check_hierarchy=not options.no_hierarhcy, check_species=not options.no_species, check_group_names=True, check_duplicate_names=True, report_errors=True) invalid_ranks, invalid_prefixes, invalid_species_name, invalid_hierarchies, invalid_group_name = errors if sum([len(e) for e in errors]) == 0: self.logger.info('No errors identified in taxonomy file.') else: self.logger.info('Identified %d incomplete taxonomy strings.' % len(invalid_ranks)) self.logger.info('Identified %d rank prefix errors.' % len(invalid_prefixes)) self.logger.info('Identified %d invalid species names.' % len(invalid_species_name)) self.logger.info('Identified %d taxa with multiple parents.' % len(invalid_hierarchies)) self.logger.info('Identified %d invalid group names.' % len(invalid_group_name))
def validate(self, options): """Check taxonomy file is formatted as expected.""" check_file_exists(options.input_taxonomy) taxonomy = Taxonomy() t = taxonomy.read(options.input_taxonomy) taxonomy.validate(t, check_prefixes=True, check_ranks=True, check_hierarchy=True, check_species=True, check_group_names=True, check_duplicate_names=True, report_errors=True) self.logger.info('Finished performing validation tests.')
def validate(self, options): """Validate command""" check_file_exists(options.taxonomy_file) taxonomy = Taxonomy() t = taxonomy.read(options.taxonomy_file) errors = taxonomy.validate(t, not options.no_prefix, not options.no_all_ranks, not options.no_hierarhcy, not options.no_species, True) invalid_ranks, invalid_prefixes, invalid_species_name, invalid_hierarchies = errors if sum([len(e) for e in errors]) == 0: self.logger.info('No errors identified in taxonomy file.') else: self.logger.info('Identified %d incomplete taxonomy strings.' % len(invalid_ranks)) self.logger.info('Identified %d rank prefix errors.' % len(invalid_prefixes)) self.logger.info('Identified %d invalid species names.' % len(invalid_species_name)) self.logger.info('Identified %d taxa with multiple parents.' % len(invalid_hierarchies))
def check_tree(self, options): """Validate taxonomy of decorated tree and check for polyphyletic groups.""" check_file_exists(options.decorated_tree) # validate taxonomy taxonomy = Taxonomy() if options.taxonomy_file: t = taxonomy.read(options.taxonomy_file) else: t = taxonomy.read_from_tree(options.decorated_tree) taxonomy.validate(t, check_prefixes=True, check_ranks=True, check_hierarchy=True, check_species=True, check_group_names=True, check_duplicate_names=True, report_errors=True) # check for polyphyletic groups polyphyletic_groups = set() tree = dendropy.Tree.get_from_path(options.decorated_tree, schema='newick', rooting="force-rooted", preserve_underscores=True) if options.taxonomy_file: # reduce taxonomy to taxa in tree and map taxon labels to Taxon objects reduced_taxonomy = {} taxon_map = {} for leaf in tree.leaf_node_iter(): reduced_taxonomy[leaf.taxon.label] = t[leaf.taxon.label] taxon_map[leaf.taxon.label] = leaf.taxon # find taxa with an MRCA spanning additional taxa for rank_label in Taxonomy.rank_labels[1:]: extant_taxa = taxonomy.extant_taxa_for_rank( rank_label, reduced_taxonomy) for taxon, taxa_ids in extant_taxa.items(): mrca = tree.mrca(taxa=[taxon_map[t] for t in taxa_ids]) mrca_leaf_count = sum([1 for leaf in mrca.leaf_iter()]) if mrca_leaf_count != len(taxa_ids): polyphyletic_groups.add(taxon) else: # find duplicate taxon labels in tree taxa = set() for node in tree.preorder_node_iter(lambda n: not n.is_leaf()): _support, taxon_label, _aux_info = parse_label(node.label) if taxon_label: for taxon in [t.strip() for t in taxon_label.split(';')]: if taxon in taxa: polyphyletic_groups.add(taxon) taxa.add(taxon) if len(polyphyletic_groups): print('') print('Tree contains polyphyletic groups:') for taxon in polyphyletic_groups: print('%s' % (taxon)) self.logger.info('Finished performing validation tests.')