Esempio n. 1
0
 def test_zoo_taxcode(self):
     self.taxcode = TaxCode("ZOO")
     tests = []
     ranks = "Animalia;Chordata;Mammalia;Rodentia;Muridae;Apodemus;agrarius"
     expected_levels = "kingdom;phylum;class;order;family;genus;species"
     tests += [(ranks, expected_levels)]
     ranks = "Animalia;Chordata;Mammalia;Primates;Hominidae;Homininae;Hominini;Hominina;H**o;sapiens"
     expected_levels = "kingdom;phylum;class;order;family;subfamily;tribe;subtribe;genus;species"
     tests += [(ranks, expected_levels)]
     self.check_ranks(tests)
Esempio n. 2
0
    def load_refjson(self, refjson_fname):
        try:
            self.refjson = RefJsonParser(refjson_fname)
        except ValueError:
            self.cfg.exit_user_error("ERROR: Invalid json file format!")

        #validate input json format
        (valid, err) = self.refjson.validate()
        if not valid:
            self.cfg.log.error(
                "ERROR: Parsing reference JSON file failed:\n%s", err)
            self.cfg.exit_user_error()

        self.rate = self.refjson.get_rate()
        self.node_height = self.refjson.get_node_height()
        self.origin_taxonomy = self.refjson.get_origin_taxonomy()
        self.tax_tree = self.refjson.get_tax_tree()
        self.cfg.compress_patterns = self.refjson.get_pattern_compression()

        self.bid_taxonomy_map = self.refjson.get_branch_tax_map()
        if not self.bid_taxonomy_map:
            # old file format (before 1.6), need to rebuild this map from scratch
            th = TaxTreeHelper(self.cfg, self.origin_taxonomy)
            th.set_mf_rooted_tree(self.tax_tree)
            th.set_bf_unrooted_tree(self.refjson.get_reftree())
            self.bid_taxonomy_map = th.get_bid_taxonomy_map()

        self.write_bid_tax_map(self.bid_taxonomy_map, final=False)

        reftree_str = self.refjson.get_raxml_readable_tree()
        self.reftree = Tree(reftree_str)
        self.reftree_size = len(self.reftree.get_leaves())

        # IMPORTANT: set EPA heuristic rate based on tree size!
        self.cfg.resolve_auto_settings(self.reftree_size)
        # If we're loading the pre-optimized model, we MUST set the same rate het. mode as in the ref file
        if self.cfg.epa_load_optmod:
            self.cfg.raxml_model = self.refjson.get_ratehet_model()

        self.classify_helper = TaxClassifyHelper(self.cfg,
                                                 self.bid_taxonomy_map,
                                                 self.rate, self.node_height)
        self.taxtree_helper = TaxTreeHelper(self.cfg, self.origin_taxonomy,
                                            self.tax_tree)

        tax_code_name = self.refjson.get_taxcode()
        self.tax_code = TaxCode(tax_code_name)

        self.taxonomy = Taxonomy(prefix=EpacConfig.REF_SEQ_PREFIX,
                                 tax_map=self.origin_taxonomy)
        self.tax_common_ranks = self.taxonomy.get_common_ranks()
        #        print "Common ranks: ", self.tax_common_ranks

        self.mislabels_cnt = [0] * TaxCode.UNI_TAX_LEVELS
        self.rank_mislabels_cnt = [0] * TaxCode.UNI_TAX_LEVELS
Esempio n. 3
0
 def test_bac_taxcode(self):
     self.taxcode = TaxCode("BAC")
     tests = []
     ranks = "Bacteria;Firmicutes;Clostridia;Clostridiales;Clostridiaceae;Clostridium;Clostridium rectum"
     expected_levels = "kingdom;phylum;class;order;family;genus;species"
     tests += [(ranks, expected_levels)]
     ranks = "Bacteria;Actinobacteria;Actinobacteria;Actinobacteridae;Actinomycetales;Micrococcineae;Micrococcaceae;Acaricomes;Acaricomes phytoseiuli"
     expected_levels = "kingdom;phylum;class;subclass;order;suborder;family;genus;species"
     tests += [(ranks, expected_levels)]
     ranks = "k__Bacteria; p__Acidobacteria; c__[Chloracidobacteria]; o__[Chloracidobacterales]; f__[Chloracidobacteraceae]; g__Candidatus Chloracidobacterium; s__"
     expected_levels = "kingdom;phylum;class;order;family;genus;species"
     tests += [(ranks, expected_levels)]
     self.check_ranks(tests)
Esempio n. 4
0
    def test_bot_taxcode(self):
        self.taxcode = TaxCode("BOT")
        tests = []
        ranks = "Apocynoideae;Nerieae;Neriinae;Adenium;Adenium_swazicum"
        expected_levels = "subfamily;tribe;subtribe;genus;species"
        tests += [(ranks, expected_levels)]
        ranks = "Apocynoideae;Echiteae;Parsonsiinae;Parsonsia;Parsonsia_heterophylla"
        expected_levels = "subfamily;tribe;subtribe;genus;species"
        tests += [(ranks, expected_levels)]
#        ranks = "Eukaryota;Opisthokonta;Nucletmycea;Fungi;Dikarya;Ascomycota;Saccharomycotina;Saccharomycetes;Saccharomycetales;Metschnikowiaceae;Clavispora;Clavispora lusitaniae ATCC 42720"
        ranks = "Eukaryota;Fungi;Dikarya;Ascomycota;Saccharomycotina;Saccharomycetes;Saccharomycetales;Metschnikowiaceae;Clavispora;Clavispora lusitaniae ATCC 42720"
        expected_levels = "domain;kingdom;subkingdom;phylum;subphylum;class;order;family;genus;species"
        tests += [(ranks, expected_levels)]
        self.check_ranks(tests)