コード例 #1
0
ファイル: test_PhyloXML.py プロジェクト: zyha/biopython
 def test_singlular(self):
     """Clade, Phylogeny: Singular properties for plural attributes."""
     conf = PX.Confidence(0.9, "bootstrap")
     taxo = PX.Taxonomy(rank="genus")
     # Clade.taxonomy, Clade.confidence
     clade = PX.Clade(confidences=[conf], taxonomies=[taxo])
     self.assertEqual(clade.confidence.type, "bootstrap")
     self.assertEqual(clade.taxonomy.rank, "genus")
     # raise if len > 1
     clade.confidences.append(conf)
     self.assertRaises(AttributeError, getattr, clade, "confidence")
     clade.taxonomies.append(taxo)
     self.assertRaises(AttributeError, getattr, clade, "taxonomy")
     # None if []
     clade.confidences = []
     self.assertEqual(clade.confidence, None)
     clade.taxonomies = []
     self.assertEqual(clade.taxonomy, None)
     # Phylogeny.confidence
     tree = PX.Phylogeny(True, confidences=[conf])
     self.assertEqual(tree.confidence.type, "bootstrap")
     tree.confidences.append(conf)
     self.assertRaises(AttributeError, getattr, tree, "confidence")
     tree.confidences = []
     self.assertEqual(tree.confidence, None)
コード例 #2
0
ファイル: PhyloXMLIO.py プロジェクト: gitly110/python_exc
 def _parse_taxonomy(self, parent):
     taxonomy = PX.Taxonomy(**parent.attrib)
     for event, elem in self.context:
         namespace, tag = _split_namespace(elem.tag)
         if event == 'end':
             if tag == 'taxonomy':
                 parent.clear()
                 break
             if tag in ('id', 'uri'):
                 setattr(taxonomy, tag, getattr(self, tag)(elem))
             elif tag == 'common_name':
                 taxonomy.common_names.append(_collapse_wspace(elem.text))
             elif tag == 'synonym':
                 taxonomy.synonyms.append(elem.text)
             elif tag in ('code', 'scientific_name', 'authority', 'rank'):
                 # ENH: check_str on rank
                 setattr(taxonomy, tag, elem.text)
             elif namespace != NAMESPACES['phy']:
                 taxonomy.other.append(self.other(elem, namespace, tag))
                 parent.clear()
     return taxonomy
コード例 #3
0
ファイル: PhyloXMLIO.py プロジェクト: fabianegli/biopython
 def _parse_taxonomy(self, parent):
     """Parse taxonomic information for a clade (PRIVATE)."""
     taxonomy = PX.Taxonomy(**parent.attrib)
     for event, elem in self.context:
         namespace, tag = _split_namespace(elem.tag)
         if event == "end":
             if tag == "taxonomy":
                 parent.clear()
                 break
             if tag in ("id", "uri"):
                 setattr(taxonomy, tag, getattr(self, tag)(elem))
             elif tag == "common_name":
                 taxonomy.common_names.append(_collapse_wspace(elem.text))
             elif tag == "synonym":
                 taxonomy.synonyms.append(elem.text)
             elif tag in ("code", "scientific_name", "authority", "rank"):
                 # ENH: check_str on rank
                 setattr(taxonomy, tag, elem.text)
             elif namespace != NAMESPACES["phy"]:
                 taxonomy.other.append(self.other(elem, namespace, tag))
                 parent.clear()
     return taxonomy
コード例 #4
0
def prepare_species_tree(FILE_TREE_IN, FILE_TREE_OUT):
    clan_taxa = {}
    treexml = PhyloXMLIO.read(open(FILE_TREE_IN, 'r'))
    tree = treexml[0]
    treexml.attributes.pop('schemaLocation', None)  # not supported by Forester
    tree.rooted = True
    leaf_dict = {}
    for node in tree.clade.find_clades():
        if node.name:
            tax_id = node.name
            if tax_id.startswith('INT'):
                tax_id = tax_id[3:]
            taxon = PhyloXML.Taxonomy(
                id=PhyloXML.Id(tax_id, provider='ncbi_taxonomy'))
            try:
                taxon.scientific_name = find_tax_name(tax_id)
            except KeyError:
                taxon.scientific_name = '(NA)'
            node._set_taxonomy(taxon)
            node.name = None
        else:
            pass
    PhyloXMLIO.write(treexml, FILE_TREE_OUT)
コード例 #5
0
def reconcile_tree(gene_tree_file, reconciled_file, rec_tag, pfam_id, db):
    if (os.path.isfile(rec_tag + 'ids.pickle')) and (pplacer_flag == 1):
        id_information = pickle.load(open(rec_tag + 'ids.pickle', 'rb'))
        existing_genes = id_information['existing_genes']
        Sequnces = []
        p_ids = []
        new_genes = set([w['id'] for w in pplacer_queries[pfam_id]])
        if not (new_genes - set(existing_genes)):
            print "All %s Genes for family %s have already been placed in the reconciled tree." % (
                len(new_genes), pfam_id)
            print "Skip Reconciliation for %s" % pfam_id
            return

    txid_file = rec_tag + 'txid.xml'
    if not (os.path.isfile(rec_tag + 'ids.pickle')) or not (
            os.path.isfile(reconciled_file + '.gz')) or (pplacer_flag == 1):
        print "Running Reconciliation for: %s" % pfam_id

        rand_id = random.randint(1000000, 9999999)
        subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d" %
                              (tree_folder, pfam_id, gene_tree_file, rand_id),
                              shell=True)
        tree = ete2.PhyloTree('%s.%d' % (gene_tree_file, rand_id), format=0)
        tree.resolve_polytomy()
        tree.write(format=0, outfile=txid_file + '.tmp.nw')
        if os.path.exists('%s.%d' % (gene_tree_file, rand_id)):
            subprocess.check_call("rm  %s.%d" % (gene_tree_file, rand_id),
                                  shell=True)

        Phylo.convert(txid_file + '.tmp.nw', 'newick', txid_file + '.tmp.xml',
                      'phyloxml')
        treexml = PhyloXMLIO.read(open(txid_file + '.tmp.xml', 'r'))
        tree = treexml[0]
        treexml.attributes.pop('schemaLocation',
                               None)  # not supported by Forester
        tree.rooted = True
        my_ids = set([])
        my_query_by_taxid = {}
        for leaf in tree.clade.find_clades(terminal=True):
            up_name = leaf.name.split('/')[0]
            tax_id, tax_name = find_tax_id_unip(up_name, db)
            if tax_id not in all_species_txids:
                if tax_id in merged_taxid.keys():
                    tax_id = merged_taxid[tax_id]
                    tax_name = find_tax_name(tax_id, db)
                if tax_id in best_taxid_map.keys():
                    tax_id = best_taxid_map[tax_id]
                    tax_name = find_tax_name(tax_id, db)
                else:
                    tax_id0 = tax_id
                    tax_id, tax_name = find_best_taxid(tax_id, db)
                    if tax_id > 0:
                        best_taxid_map[tax_id0] = tax_id
            if tax_id < 0:
                if (-tax_id) in merged_taxid.keys():
                    tax_id = merged_taxid[-tax_id]
                    tax_name = find_tax_name(tax_id, db)
            if tax_id in my_query_by_taxid:
                my_query_by_taxid[tax_id].append(up_name)
            else:
                my_query_by_taxid[tax_id] = [up_name]
            my_ids.add(tax_id)
            my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy')
            taxon = PhyloXML.Taxonomy(id=my_tax_id)
            taxon.scientific_name = tax_name
            leaf._set_taxonomy(taxon)
        PhyloXMLIO.write(treexml, open(txid_file, 'w'))
        os.system('rm ' + txid_file + '.tmp.nw')
        os.system('rm ' + txid_file + '.tmp.xml')
        print "Taxid file done for: %s" % pfam_id
        existing_ids = list(set(my_ids) & set(all_species_txids))
        existing_genes = [
            g for txid in my_query_by_taxid.keys()
            for g in my_query_by_taxid[txid] if txid in existing_ids
        ]
        pickle.dump(
            {
                'pfam_id': pfam_id,
                'existing_ids': existing_ids,
                'existing_genes': existing_genes
            }, open(rec_tag + 'ids.pickle', 'wb'))
        print "Pickle file done for: %s" % pfam_id

    if os.path.exists(reconciled_file):
        os.system('rm ' + reconciled_file)
    os.system(
        "java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"
        % (lib_path, txid_file, species_tree_data_path, reconciled_file))
    if os.path.exists(reconciled_file):
        if os.path.exists(reconciled_file + '.gz'):
            subprocess.check_call("rm  %s.gz" % (reconciled_file), shell=True)
        subprocess.check_call("gzip %s" % (reconciled_file), shell=True)
    os.system('rm ' + rec_tag + 'reconciled_species_tree_used.xml')
    os.system('rm ' + rec_tag + 'reconciled_gsdi_log.txt')
    os.system('rm ' + txid_file)
    print "Reconciliation file done for: %s" % pfam_id