def outliers(self, options): """Create information for identifying taxnomic outliers""" check_file_exists(options.input_tree) check_file_exists(options.taxonomy_file) if options.plot_taxa_file: check_file_exists(options.plot_taxa_file) if options.trusted_taxa_file: check_file_exists(options.trusted_taxa_file) if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) if options.highlight_polyphyly and not options.fmeasure_table: self.logger.error( "The '--highlight_polyphyly' flag must be used with the '--fmeasure_table' flag." ) return o = Outliers(options.skip_mpld3, options.dpi, options.output_dir) o.run(options.input_tree, options.taxonomy_file, options.viral, options.plot_taxa_file, options.plot_dist_taxa_only, options.plot_domain, options.highlight_polyphyly, options.highlight_taxa_file, options.trusted_taxa_file, options.fixed_root, options.min_children, options.min_support, options.mblet, options.fmeasure_table, options.min_fmeasure, options.fmeasure_mono, options.verbose_table) self.logger.info('Done.')
def outliers(self, options): """Create information for identifying taxnomic outliers""" check_file_exists(options.input_tree) if options.plot_taxa_file: check_file_exists(options.plot_taxa_file) if options.trusted_taxa_file: check_file_exists(options.trusted_taxa_file) if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) o = Outliers(options.dpi) o.run(options.input_tree, options.taxonomy_file, options.output_dir, options.plot_taxa_file, options.plot_dist_taxa_only, options.plot_domain, options.trusted_taxa_file, options.fixed_root, options.min_children, options.min_support, options.verbose_table) self.logger.info('Done.')
def _median_rank_rd(self, tree, placed_taxon, taxonomy, trusted_taxa_file, min_children, min_support): """Calculate median relative divergence to each node and thresholds for each taxonomic rank. Parameters ---------- tree : Tree Dendropy Tree. placed_taxon : set Taxon currently placed in tree which can be used for relative divergence inference. taxonomy: d[taxon_id] -> taxonomy info Taxonomic information for extant taxa. trusted_taxa_file : str File specifying trusted taxa to consider when inferring distribution. Set to None to consider all taxa. min_children : int Only consider taxa with at least the specified number of children taxa when inferring distribution. min_support : float Only consider taxa with at least this level of support when inferring distribution. Returns ------- d[rank_index] -> float Median relative divergence for each taxonomic rank. """ # read trusted taxa trusted_taxa = None if trusted_taxa_file: trusted_taxa = read_taxa_file(trusted_taxa_file) # determine taxa to be used for inferring distribution taxa_for_dist_inference = filter_taxa_for_dist_inference(tree, taxonomy, trusted_taxa, min_children, min_support) taxa_for_dist_inference.intersection_update(placed_taxon) # infer distribution outliers = Outliers() phylum_rel_dists, rel_node_dists = outliers.median_rd_over_phyla(tree, taxa_for_dist_inference, taxonomy) median_for_rank = outliers.rank_median_rd(phylum_rel_dists, taxa_for_dist_inference) # set edge lengths to median value over all rootings tree.seed_node.rel_dist = 0.0 for n in tree.preorder_node_iter(lambda n: n != tree.seed_node): n.rel_dist = np_median(rel_node_dists[n.id]) return median_for_rank