コード例 #1
0
    def run_leave_seq_out_test(self):
        job_name = self.cfg.subst_name("l1out_seq_%NAME%")
        if self.jplace_fname:
            jp = EpaJsonParser(self.jplace_fname)
        else:        
            jp = self.raxml.run_epa(job_name, self.refalign_fname, self.reftree_fname, self.optmod_fname, mode="l1o_seq")

        placements = jp.get_placement()
        seq_count = 0
        for place in placements:
            seq_name = place["n"][0]
            
            # get original taxonomic label
            orig_ranks = self.get_orig_ranks(seq_name)

            # get EPA tax label
            ranks, lws = self.classify_seq(place)
            # check if they match
            mis_rec = self.check_seq_tax_labels(seq_name, orig_ranks, ranks, lws)
            # cross-check with higher rank mislabels
            if self.ranktest and mis_rec:
                rank_conf = 0
                for lvl in range(2,len(orig_ranks)):
                    tax_path = Taxonomy.get_rank_uid(orig_ranks, lvl)
                    if tax_path in self.misrank_conf_map:
                        rank_conf = max(rank_conf, self.misrank_conf_map[tax_path])
                mis_rec['rank_conf'] = rank_conf
            seq_count += 1

        return seq_count    
コード例 #2
0
ファイル: epa_classifier.py プロジェクト: pbordron/sativa
    def classify(self, query_fname, minp=0.9, ptp=False):
        if self.jplace_fname:
            jp = EpaJsonParser(self.jplace_fname)
        else:
            self.checkinput(query_fname, minp)
            jp = self.run_epa()

        self.cfg.log.info(
            "Assigning taxonomic labels based on EPA placements...\n")

        placements = jp.get_placement()

        if self.out_assign_fname:
            fo = open(self.out_assign_fname, "w")
        else:
            fo = None

        noassign_list = []
        for place in placements:
            taxon_name = place["n"][0]
            origin_taxon_name = EpacConfig.strip_query_prefix(taxon_name)
            edges = place["p"]

            ranks, lws = self.classify_helper.classify_seq(edges)
            rankout = self.print_ranks(ranks, lws, self.cfg.min_lhw)

            if rankout == None:
                noassign_list.append(origin_taxon_name)
            else:
                output = "%s\t%s\t" % (origin_taxon_name, rankout)
                if self.cfg.check_novelty:
                    isnovo = self.novelty_check(place_edge=str(edges[0][0]),
                                                ranks=ranks,
                                                lws=lws)
                    output += "*" if isnovo else "o"
                self.print_result_line(fo, output)

        noassign_list += self.get_noalign_list()

        for taxon_name in noassign_list:
            output = "%s\t\t\t?" % origin_taxon_name
            self.print_result_line(fo, output)

        if fo:
            fo.close()

        #############################################
        #
        # EPA-PTP species delimitation
        #
        #############################################
        if ptp:
            self.run_ptp(jp)
コード例 #3
0
ファイル: sativa.py プロジェクト: anukat2015/sativa
    def run_leave_seq_out_test(self):
        job_name = self.cfg.subst_name("l1out_seq_%NAME%")
        placements = []
        if self.cfg.jplace_fname:
            if os.path.isdir(self.cfg.jplace_fname):
                jplace_fmask = os.path.join(self.cfg.jplace_fname, '*.jplace')
            else:
                jplace_fmask = self.cfg.jplace_fname

            jplace_fname_list = glob.glob(jplace_fmask)
            for jplace_fname in jplace_fname_list:
                jp = EpaJsonParser(jplace_fname)
                placements += jp.get_placement()
                
            config.log.debug("Loaded %d placements from %s\n", len(placements), jplace_fmask)
        else:        
            jp = self.raxml.run_epa(job_name, self.refalign_fname, self.reftree_fname, self.optmod_fname, mode="l1o_seq")
            placements = jp.get_placement()
            if self.cfg.output_interim_files:
                out_jplace_fname = self.cfg.out_fname("%NAME%.l1out_seq.jplace")
                self.raxml.copy_epa_jplace(job_name, out_jplace_fname, move=True, mode="l1o_seq")
        
        seq_count = 0
        l1out_ass = {}
        for place in placements:
            seq_name = place["n"][0]
            
            # get original taxonomic label
#            orig_ranks = self.get_orig_ranks(seq_name)
            orig_ranks =  self.taxtree_helper.get_seq_ranks_from_tree(seq_name)

            # get EPA tax label
            ranks, lws = self.classify_seq(place)
            l1out_ass[seq_name] = (ranks, lws)
            
            # check if they match
            mis_rec = self.check_seq_tax_labels(seq_name, orig_ranks, ranks, lws)
            # cross-check with higher rank mislabels
            if self.cfg.ranktest and mis_rec:
                rank_conf = 0
                for lvl in range(2,len(orig_ranks)):
                    tax_path = Taxonomy.get_rank_uid(orig_ranks, lvl)
                    if tax_path in self.misrank_conf_map:
                        rank_conf = max(rank_conf, self.misrank_conf_map[tax_path])
                mis_rec['rank_conf'] = rank_conf
            seq_count += 1

        self.write_assignments(l1out_ass, final=False)
            
        return seq_count    
コード例 #4
0
ファイル: test_classify.py プロジェクト: pbordron/sativa
    def test_assign_taxonomy(self):
        assign_fname = os.path.join(self.testfile_dir, "true_assign.txt")
        expected_assign_map = {}
        with open(assign_fname) as inf:
            for line in inf:
                sid, ranks_str, lws = line.strip().split("\t")
                expected_assign_map[sid] = ranks_str.split(";")

        jplace_fname = os.path.join(self.testfile_dir, "test.jplace")
        parser = EpaJsonParser(jplace_fname)
        for p in parser.get_placement():
            sid = p["n"][0]
            edges = p["p"]
            ranks, conf = self.classify_helper.classify_seq(edges)
            #            for e in edges: print self.bid_tax_map[str(e[0])], e[2]
            #            print sid, "\t", ";".join(ranks) #, conf
            self.assertEqual(ranks, expected_assign_map[sid])
コード例 #5
0
ファイル: test_json.py プロジェクト: amkozlov/sativa
 def test_jplace_read(self):
     jplace_fname = os.path.join(self.testfile_dir, "test.jplace")
     parser = EpaJsonParser(jplace_fname)
     self.assertEquals(parser.get_raxml_version(), "8.2.3")
     t = Tree(parser.get_tree())
     t_len = len(t)
     self.assertEquals(t_len, 32)
     self.assertEquals(len(parser.get_placement()), 6)
     for p in parser.get_placement():
         self.assertFalse(p["n"][0] in t) 
         self.assertTrue(len(p["p"]) > 0) 
         for edge in p["p"]:
             branch = int(edge[0])
             lh = edge[1]
             lhw = edge[2]
             self.assertTrue(branch >= 0 and branch < (t_len * 2 - 3)) 
             self.assertTrue(lhw >= 0.0 and lhw <= 1.0) 
コード例 #6
0
ファイル: test_classify.py プロジェクト: amkozlov/sativa
    def test_assign_taxonomy(self):
        assign_fname = os.path.join(self.testfile_dir, "true_assign.txt")
        expected_assign_map = {}
        with open(assign_fname) as inf:
            for line in inf:
                sid, ranks_str, lws = line.strip().split("\t")
                expected_assign_map[sid] = ranks_str.split(";")
    
        jplace_fname = os.path.join(self.testfile_dir, "test.jplace")
        parser = EpaJsonParser(jplace_fname)
        for p in parser.get_placement():
            sid = p["n"][0]
            edges = p["p"]
            ranks, conf = self.classify_helper.classify_seq(edges)
#            for e in edges: print self.bid_tax_map[str(e[0])], e[2]
#            print sid, "\t", ";".join(ranks) #, conf
            self.assertEqual(ranks, expected_assign_map[sid])
コード例 #7
0
 def test_jplace_read(self):
     jplace_fname = os.path.join(self.testfile_dir, "test.jplace")
     parser = EpaJsonParser(jplace_fname)
     self.assertEqual(parser.get_raxml_version(), "8.2.3")
     t = Tree(parser.get_tree())
     t_len = len(t)
     self.assertEqual(t_len, 32)
     self.assertEqual(len(parser.get_placement()), 6)
     for p in parser.get_placement():
         self.assertFalse(p["n"][0] in t)
         self.assertTrue(len(p["p"]) > 0)
         for edge in p["p"]:
             branch = int(edge[0])
             lh = edge[1]
             lhw = edge[2]
             self.assertTrue(branch >= 0 and branch < (t_len * 2 - 3))
             self.assertTrue(lhw >= 0.0 and lhw <= 1.0)
コード例 #8
0
    def classify(self, query_fname, fout = None, method = "1", minlw = 0.0, pv = 0.02, minp = 0.9, ptp = False):
        if self.jplace_fname:
            jp = EpaJsonParser(self.jplace_fname)
        else:        
            self.checkinput(query_fname, minp)
            raxml = RaxmlWrapper(config)
            reftree_fname = self.cfg.tmp_fname("ref_%NAME%.tre")
            self.refjson.get_raxml_readable_tree(reftree_fname)
            optmod_fname = self.cfg.tmp_fname("%NAME%.opt")
            self.refjson.get_binary_model(optmod_fname)
            job_name = self.cfg.subst_name("epa_%NAME%")

            reftree_str = self.refjson.get_raxml_readable_tree()
            reftree = Tree(reftree_str)

            self.reftree_size = len(reftree.get_leaves())

            # IMPORTANT: set EPA heuristic rate based on tree size!                
            self.cfg.resolve_auto_settings(self.reftree_size)
            # If we're loading the pre-optimized model, we MUST set the same rate het. mode as in the ref file        
            if self.cfg.epa_load_optmod:
                self.cfg.raxml_model = self.refjson.get_ratehet_model()

            reduced_align_fname = raxml.reduce_alignment(self.epa_alignment)

            jp = raxml.run_epa(job_name, reduced_align_fname, reftree_fname, optmod_fname)
        
        placements = jp.get_placement()
        
        if fout:
            fo = open(fout, "w")
        else:
            fo = None
        
        output2 = ""
        for place in placements:
            output = None
            taxon_name = place["n"][0]
            origin_taxon_name = EpacConfig.strip_query_prefix(taxon_name)
            edges = place["p"]
#            edges = self.erlang_filter(edges, p = pv)
            if len(edges) > 0:
                ranks, lws = self.classify_helper.classify_seq(edges, method, minlw)
                
                isnovo = self.novelty_check(place_edge = str(edges[0][0]), ranks =ranks, lws = lws, minlw = minlw)
                rankout = self.print_ranks(ranks, lws, minlw)
                
                if rankout == None:
                    output2 = output2 + origin_taxon_name+ "\t\t\t?\n"
                else:
                    output = "%s\t%s\t" % (origin_taxon_name, self.print_ranks(ranks, lws, minlw))
                    if isnovo: 
                        output += "*"
                    else:
                        output +="o"
                    if self.cfg.verbose:
                        print(output) 
                    if fo:
                        fo.write(output + "\n")
            else:
                output2 = output2 + origin_taxon_name+ "\t\t\t?\n"
        
        if os.path.exists(self.noalign):
            with open(self.noalign) as fnoa:
                lines = fnoa.readlines()
                for line in lines:
                    taxon_name = line.strip()[1:]
                    origin_taxon_name = EpacConfig.strip_query_prefix(taxon_name)
                    output = "%s\t\t\t?" % origin_taxon_name
                    if self.cfg.verbose:
                        print(output)
                    if fo:
                        fo.write(output + "\n")
        
        if self.cfg.verbose:
            print(output2)
        
        if fo:
            fo.write(output2)
            fo.close()

        #############################################
        #
        # EPA-PTP species delimitation
        #
        #############################################
        if ptp:
            full_aln = SeqGroup(self.epa_alignment)
            species_list = epa_2_ptp(epa_jp = jp, ref_jp = self.refjson, full_alignment = full_aln, min_lw = 0.5, debug = self.cfg.debug)
            
            if self.cfg.verbose:
                print "Species clusters:"

            if fout:
                fo2 = open(fout+".species", "w")
            else:
                fo2 = None

            for sp_cluster in species_list:
                translated_taxa = []
                for taxon in sp_cluster:
                    origin_taxon_name = EpacConfig.strip_query_prefix(taxon)
                    translated_taxa.append(origin_taxon_name)
                s = ",".join(translated_taxa)
                if fo2:
                    fo2.write(s + "\n")
                if self.cfg.verbose:
                    print s

            if fo2:
                fo2.close()
        #############################################
        
        if not self.jplace_fname:
            if not self.cfg.debug:
                raxml.cleanup(job_name)
                FileUtils.remove_if_exists(reduced_align_fname)
                FileUtils.remove_if_exists(reftree_fname)
                FileUtils.remove_if_exists(optmod_fname)
コード例 #9
0
ファイル: sativa.py プロジェクト: sdwfrost/sativa
    def run_final_epa_test(self):
        self.reftree_outgroup = self.refjson.get_outgroup()

        pruned_reftree = self.prune_mislabels_from_tree(self.reftree, "reference")
        pruned_taxtree = self.prune_mislabels_from_tree(self.reftree, "taxonomic")

        # remove unifurcation at the root
        if len(pruned_reftree.children) == 1:
            pruned_reftree = pruned_reftree.children[0]
            
        self.mislabels = []

        th = TaxTreeHelper(self.cfg, self.origin_taxonomy)
        th.set_mf_rooted_tree(pruned_taxtree)
         
        reftree_epalbl_str = None    
        if self.cfg.final_jplace_fname:
            if os.path.isdir(self.cfg.final_jplace_fname):
                jplace_fmask = os.path.join(self.cfg.final_jplace_fname, '*.jplace')
            else:
                jplace_fmask = self.cfg.final_jplace_fname

            jplace_fname_list = glob.glob(jplace_fmask)
            placements = []
            for jplace_fname in jplace_fname_list:
                jp = EpaJsonParser(jplace_fname)
                placements += jp.get_placement()
                if not reftree_epalbl_str:
                  reftree_epalbl_str = jp.get_std_newick_tree()        
                
            config.log.debug("Loaded %d final epa placements from %s\n", len(placements), jplace_fmask)
        else:
            epa_result = self.run_epa_once(pruned_reftree)
            reftree_epalbl_str = epa_result.get_std_newick_tree()        
            placements = epa_result.get_placement()
        
        # update branchid-taxonomy mapping to account for possible changes in branch numbering
        reftree_tax = Tree(reftree_epalbl_str)
        th.set_bf_unrooted_tree(reftree_tax)
        bid_tax_map = th.get_bid_taxonomy_map()
        
        self.write_bid_tax_map(bid_tax_map, final=True)

        cl = TaxClassifyHelper(self.cfg, bid_tax_map, self.rate, self.node_height)
        
#        newtax_fname = self.cfg.subst_name("newtax_%NAME%.tre")
#        th.get_tax_tree().write(outfile=newtax_fname, format=3)

        final_ass = {}
        for place in placements:
            seq_name = place["n"][0]

            # get original taxonomic label
            orig_ranks = self.taxtree_helper.get_seq_ranks_from_tree(seq_name)

            # EXPERIMENTAL FEATURE - disabled for now!
            # It could happen that certain ranks were present in the "original" reference tree, but 
            # are completely missing in the pruned tree (e.g., all seqs of a species were considered "suspicious" 
            # after the leave-one-out test and thus pruned)
            # In this case, EPA has no chance to infer full original taxonomic annotation (=species) since the corresponding clade
            # is now missing. To account for this fact, we amend the original taxonomic annotation and set ranks missing from  
            # pruned tree to "Undefined".
#            orig_ranks = th.strip_missing_ranks(orig_ranks)
#            print orig_ranks

            # get EPA tax label
            ranks, lws = cl.classify_seq(place["p"])
            final_ass[seq_name] = (ranks, lws)

            #print seq_name, ": ", orig_ranks, "--->", ranks

            # check if they match
            mis_rec = self.check_seq_tax_labels(seq_name, orig_ranks, ranks, lws)

        self.write_assignments(final_ass, final=True)
コード例 #10
0
    def run_final_epa_test(self):
        self.reftree_outgroup = self.refjson.get_outgroup()

        pruned_reftree = self.prune_mislabels_from_tree(
            self.reftree, "reference")
        pruned_taxtree = self.prune_mislabels_from_tree(
            self.reftree, "taxonomic")

        # remove unifurcation at the root
        if len(pruned_reftree.children) == 1:
            pruned_reftree = pruned_reftree.children[0]

        self.mislabels = []

        th = TaxTreeHelper(self.cfg, self.origin_taxonomy)
        th.set_mf_rooted_tree(pruned_taxtree)

        reftree_epalbl_str = None
        if self.cfg.final_jplace_fname:
            if os.path.isdir(self.cfg.final_jplace_fname):
                jplace_fmask = os.path.join(self.cfg.final_jplace_fname,
                                            '*.jplace')
            else:
                jplace_fmask = self.cfg.final_jplace_fname

            jplace_fname_list = glob.glob(jplace_fmask)
            placements = []
            for jplace_fname in jplace_fname_list:
                jp = EpaJsonParser(jplace_fname)
                placements += jp.get_placement()
                if not reftree_epalbl_str:
                    reftree_epalbl_str = jp.get_std_newick_tree()

            config.log.debug("Loaded %d final epa placements from %s\n",
                             len(placements), jplace_fmask)
        else:
            epa_result = self.run_epa_once(pruned_reftree)
            reftree_epalbl_str = epa_result.get_std_newick_tree()
            placements = epa_result.get_placement()

        # update branchid-taxonomy mapping to account for possible changes in branch numbering
        reftree_tax = Tree(reftree_epalbl_str)
        th.set_bf_unrooted_tree(reftree_tax)
        bid_tax_map = th.get_bid_taxonomy_map()

        self.write_bid_tax_map(bid_tax_map, final=True)

        cl = TaxClassifyHelper(self.cfg, bid_tax_map, self.rate,
                               self.node_height)

        #        newtax_fname = self.cfg.subst_name("newtax_%NAME%.tre")
        #        th.get_tax_tree().write(outfile=newtax_fname, format=3)

        final_ass = {}
        for place in placements:
            seq_name = place["n"][0]

            # get original taxonomic label
            orig_ranks = self.taxtree_helper.get_seq_ranks_from_tree(seq_name)

            # EXPERIMENTAL FEATURE - disabled for now!
            # It could happen that certain ranks were present in the "original" reference tree, but
            # are completely missing in the pruned tree (e.g., all seqs of a species were considered "suspicious"
            # after the leave-one-out test and thus pruned)
            # In this case, EPA has no chance to infer full original taxonomic annotation (=species) since the corresponding clade
            # is now missing. To account for this fact, we amend the original taxonomic annotation and set ranks missing from
            # pruned tree to "Undefined".
            #            orig_ranks = th.strip_missing_ranks(orig_ranks)
            #            print orig_ranks

            # get EPA tax label
            ranks, lws = cl.classify_seq(place["p"])
            final_ass[seq_name] = (ranks, lws)

            #print seq_name, ": ", orig_ranks, "--->", ranks

            # check if they match
            mis_rec = self.check_seq_tax_labels(seq_name, orig_ranks, ranks,
                                                lws)

        self.write_assignments(final_ass, final=True)
コード例 #11
0
    def run_leave_seq_out_test(self):
        job_name = self.cfg.subst_name("l1out_seq_%NAME%")
        placements = []
        if self.cfg.jplace_fname:
            if os.path.isdir(self.cfg.jplace_fname):
                jplace_fmask = os.path.join(self.cfg.jplace_fname, '*.jplace')
            else:
                jplace_fmask = self.cfg.jplace_fname

            jplace_fname_list = glob.glob(jplace_fmask)
            for jplace_fname in jplace_fname_list:
                jp = EpaJsonParser(jplace_fname)
                placements += jp.get_placement()

            config.log.debug("Loaded %d placements from %s\n", len(placements),
                             jplace_fmask)
        else:
            jp = self.raxml.run_epa(job_name,
                                    self.refalign_fname,
                                    self.reftree_fname,
                                    self.optmod_fname,
                                    mode="l1o_seq")
            placements = jp.get_placement()
            if self.cfg.output_interim_files:
                out_jplace_fname = self.cfg.out_fname(
                    "%NAME%.l1out_seq.jplace")
                self.raxml.copy_epa_jplace(job_name,
                                           out_jplace_fname,
                                           move=True,
                                           mode="l1o_seq")

        seq_count = 0
        l1out_ass = {}
        for place in placements:
            seq_name = place["n"][0]

            # get original taxonomic label
            #            orig_ranks = self.get_orig_ranks(seq_name)
            orig_ranks = self.taxtree_helper.get_seq_ranks_from_tree(seq_name)

            # get EPA tax label
            ranks, lws = self.classify_seq(place)
            l1out_ass[seq_name] = (ranks, lws)

            # check if they match
            mis_rec = self.check_seq_tax_labels(seq_name, orig_ranks, ranks,
                                                lws)
            # cross-check with higher rank mislabels
            if self.cfg.ranktest and mis_rec:
                rank_conf = 0
                for lvl in range(2, len(orig_ranks)):
                    tax_path = Taxonomy.get_rank_uid(orig_ranks, lvl)
                    if tax_path in self.misrank_conf_map:
                        rank_conf = max(rank_conf,
                                        self.misrank_conf_map[tax_path])
                mis_rec['rank_conf'] = rank_conf
            seq_count += 1

        self.write_assignments(l1out_ass, final=False)

        return seq_count
コード例 #12
0
ファイル: epa_classifier.py プロジェクト: anukat2015/sativa
    def classify(self, query_fname, minp = 0.9, ptp = False):
        if self.jplace_fname:
            jp = EpaJsonParser(self.jplace_fname)
        else:        
            self.checkinput(query_fname, minp)

            self.cfg.log.info("Running RAxML-EPA to place %d query sequences...\n" % self.query_count)
            raxml = RaxmlWrapper(config)
            reftree_fname = self.cfg.tmp_fname("ref_%NAME%.tre")
            self.refjson.get_raxml_readable_tree(reftree_fname)
            optmod_fname = self.cfg.tmp_fname("%NAME%.opt")
            self.refjson.get_binary_model(optmod_fname)
            job_name = self.cfg.subst_name("epa_%NAME%")

            reftree_str = self.refjson.get_raxml_readable_tree()
            reftree = Tree(reftree_str)

            self.reftree_size = len(reftree.get_leaves())

            # IMPORTANT: set EPA heuristic rate based on tree size!                
            self.cfg.resolve_auto_settings(self.reftree_size)
            # If we're loading the pre-optimized model, we MUST set the same rate het. mode as in the ref file        
            if self.cfg.epa_load_optmod:
                self.cfg.raxml_model = self.refjson.get_ratehet_model()

            reduced_align_fname = raxml.reduce_alignment(self.epa_alignment)

            jp = raxml.run_epa(job_name, reduced_align_fname, reftree_fname, optmod_fname)
            
            raxml.copy_epa_jplace(job_name, self.out_jplace_fname, move=True)
        
        self.cfg.log.info("Assigning taxonomic labels based on EPA placements...\n")
 
        placements = jp.get_placement()
        
        if self.out_assign_fname:
            fo = open(self.out_assign_fname, "w")
        else:
            fo = None
        
        noassign_list = []
        for place in placements:
            taxon_name = place["n"][0]
            origin_taxon_name = EpacConfig.strip_query_prefix(taxon_name)
            edges = place["p"]
            if len(edges) > 0:
                ranks, lws = self.classify_helper.classify_seq(edges)
                
                isnovo = self.novelty_check(place_edge = str(edges[0][0]), ranks=ranks, lws=lws)
                rankout = self.print_ranks(ranks, lws, self.cfg.min_lhw)
                
                if rankout == None:
                    noassign_list.append(origin_taxon_name)
                else:
                    output = "%s\t%s\t" % (origin_taxon_name, rankout)
                    if isnovo: 
                        output += "*"
                    else:
                        output +="o"
                    if self.cfg.verbose:
                        print(output) 
                    if fo:
                        fo.write(output + "\n")
            else:
                noassign_list.append(origin_taxon_name)
        
        if os.path.exists(self.noalign):
            with open(self.noalign) as fnoa:
                lines = fnoa.readlines()
                for line in lines:
                    taxon_name = line.strip()[1:]
                    origin_taxon_name = EpacConfig.strip_query_prefix(taxon_name)
                    noassign_list.append(origin_taxon_name)
                        
        for taxon_name in noassign_list:
            output = "%s\t\t\t?" % origin_taxon_name
            if self.cfg.verbose:
                print(output)
            if fo:
                fo.write(output + "\n")
        
        if fo:
            fo.close()

        #############################################
        #
        # EPA-PTP species delimitation
        #
        #############################################
        if ptp:
            full_aln = SeqGroup(self.epa_alignment)
            species_list = epa_2_ptp(epa_jp = jp, ref_jp = self.refjson, full_alignment = full_aln, min_lw = 0.5, debug = self.cfg.debug)
            
            self.cfg.log.debug("Species clusters:")
 
            if fout:
                fo2 = open(fout+".species", "w")
            else:
                fo2 = None

            for sp_cluster in species_list:
                translated_taxa = []
                for taxon in sp_cluster:
                    origin_taxon_name = EpacConfig.strip_query_prefix(taxon)
                    translated_taxa.append(origin_taxon_name)
                s = ",".join(translated_taxa)
                if fo2:
                    fo2.write(s + "\n")
                self.cfg.log.debug(s)

            if fo2:
                fo2.close()