Beispiel #1
0
    def _stash_rewrite_and_call(self, fname, test_cases):
        """Safely run a series of tests on a parsed and rewritten file.

        Specifically: Parse a file, rename the source file to a backup, rewrite
        the file from the parsed object, check the rewritten file with the
        given series of test functions, then restore the original by renaming
        the backup copy.

        Python 2.4 support: This would make more sense as a context manager
        that simply handles renaming and finally restoring the original.
        """
        phx = PhyloXMLIO.read(fname)
        if os.path.exists(fname + "~"):
            os.remove(fname + "~")
        os.rename(fname, fname + "~")
        try:
            PhyloXMLIO.write(phx, fname)
            for cls, tests in test_cases:
                inst = cls("setUp")
                for test in tests:
                    getattr(inst, test)()
        finally:
            # XXX not safe!
            if os.path.exists(fname):
                os.remove(fname)
            os.rename(fname + "~", fname)
Beispiel #2
0
 def test_Phyloxml(self):
     """Instantiation of Phyloxml objects."""
     phx = PhyloXMLIO.read(EX_PHYLO)
     self.assertTrue(isinstance(phx, PX.Phyloxml))
     for tree in phx:
         self.assertTrue(isinstance(tree, PX.Phylogeny))
     for otr in phx.other:
         self.assertTrue(isinstance(otr, PX.Other))
 def test_Phyloxml(self):
     """Instantiation of Phyloxml objects."""
     phx = PhyloXMLIO.read(EX_PHYLO)
     self.assertTrue(isinstance(phx, PX.Phyloxml))
     for tree in phx:
         self.assertTrue(isinstance(tree, PX.Phylogeny))
     for otr in phx.other:
         self.assertTrue(isinstance(otr, PX.Other))
Beispiel #4
0
 def _rewrite_and_call(self, orig_fname, test_cases):
     """Parse, rewrite and retest a phyloXML example file."""
     with open(orig_fname) as infile:
         phx = PhyloXMLIO.read(infile)
     with open(DUMMY, "w") as outfile:
         PhyloXMLIO.write(phx, outfile)
     for cls, tests in test_cases:
         inst = cls("setUp")
         for test in tests:
             getattr(inst, test)()
 def _rewrite_and_call(self, orig_fname, test_cases):
     """Parse, rewrite and retest a phyloXML example file."""
     infile = open(orig_fname, "rb")
     phx = PhyloXMLIO.read(infile)
     infile.close()
     outfile = open(DUMMY, "w+b")
     PhyloXMLIO.write(phx, outfile)
     outfile.close()
     for cls, tests in test_cases:
         inst = cls("setUp")
         for test in tests:
             getattr(inst, test)()
Beispiel #6
0
 def _rewrite_and_call(self, orig_fname, test_cases):
     """Parse, rewrite and retest a phyloXML example file."""
     infile = open(orig_fname, "r")
     phx = PhyloXMLIO.read(infile)
     infile.close()
     outfile = open(DUMMY, "w")
     PhyloXMLIO.write(phx, outfile)
     outfile.close()
     for cls, tests in test_cases:
         inst = cls("setUp")
         for test in tests:
             getattr(inst, test)()
Beispiel #7
0
 def test_Other(self):
     """Instantiation of Other objects."""
     phx = PhyloXMLIO.read(EX_PHYLO)
     otr = phx.other[0]
     self.assertTrue(isinstance(otr, PX.Other))
     self.assertEqual(otr.tag, 'alignment')
     self.assertEqual(otr.namespace, 'http://example.org/align')
     self.assertEqual(len(otr.children), 3)
     for child, name, value in zip(otr, ('A', 'B', 'C'), (
       'acgtcgcggcccgtggaagtcctctcct', 'aggtcgcggcctgtggaagtcctctcct',
       'taaatcgc--cccgtgg-agtccc-cct')):
         self.assertEqual(child.tag, 'seq')
         self.assertEqual(child.attributes['name'], name)
         self.assertEqual(child.value, value)
Beispiel #8
0
 def test_Other(self):
     """Instantiation of Other objects."""
     phx = PhyloXMLIO.read(EX_PHYLO)
     otr = phx.other[0]
     self.assertTrue(isinstance(otr, PX.Other))
     self.assertEqual(otr.tag, 'alignment')
     self.assertEqual(otr.namespace, 'http://example.org/align')
     self.assertEqual(len(otr.children), 3)
     for child, name, value in zip(otr, ('A', 'B', 'C'), (
       'acgtcgcggcccgtggaagtcctctcct', 'aggtcgcggcctgtggaagtcctctcct',
       'taaatcgc--cccgtgg-agtccc-cct')):
         self.assertEqual(child.tag, 'seq')
         self.assertEqual(child.attributes['name'], name)
         self.assertEqual(child.value, value)
Beispiel #9
0
 def test_Other(self):
     """Instantiation of Other objects."""
     phx = PhyloXMLIO.read(EX_PHYLO)
     otr = phx.other[0]
     self.assertTrue(isinstance(otr, PX.Other))
     self.assertEqual(otr.tag, "alignment")
     self.assertEqual(otr.namespace, "http://example.org/align")
     self.assertEqual(len(otr.children), 3)
     for child, name, value in zip(
             otr, ("A", "B", "C"),
         ("acgtcgcggcccgtggaagtcctctcct", "aggtcgcggcctgtggaagtcctctcct",
          "taaatcgc--cccgtgg-agtccc-cct")):
         self.assertEqual(child.tag, "seq")
         self.assertEqual(child.attributes["name"], name)
         self.assertEqual(child.value, value)
Beispiel #10
0
 def test_Other(self):
     """Instantiation of Other objects."""
     phx = PhyloXMLIO.read(EX_PHYLO)
     otr = phx.other[0]
     self.assertTrue(isinstance(otr, PX.Other))
     self.assertEqual(otr.tag, "alignment")
     self.assertEqual(otr.namespace, "http://example.org/align")
     self.assertEqual(len(otr.children), 3)
     for child, name, value in zip(
         otr,
         ("A", "B", "C"),
         ("acgtcgcggcccgtggaagtcctctcct", "aggtcgcggcctgtggaagtcctctcct", "taaatcgc--cccgtgg-agtccc-cct"),
     ):
         self.assertEqual(child.tag, "seq")
         self.assertEqual(child.attributes["name"], name)
         self.assertEqual(child.value, value)
Beispiel #11
0
 def test_Polygon(self):
     """Instantiation of Polygon objects."""
     tree = PhyloXMLIO.read(EX_MADE).phylogenies[1]
     self.assertEqual(tree.name, "testing polygon")
     dist = tree.clade[0].distributions[0]
     for poly in dist.polygons:
         self.assertTrue(isinstance(poly, PX.Polygon))
         self.assertEqual(len(poly.points), 3)
     self.assertEqual(dist.polygons[0].points[0].alt_unit, "m")
     for point, lati, longi, alti in zip(
             chain(dist.polygons[0].points, dist.polygons[1].points),
         (47.481277, 35.155904, 47.376334, 40.481277, 25.155904, 47.376334),
         (8.769303, 136.915863, 8.548108, 8.769303, 136.915863, 7.548108),
         (472, 10, 452, 42, 10, 452),
     ):
         self.assertTrue(isinstance(point, PX.Point))
         self.assertEqual(point.geodetic_datum, "WGS84")
         self.assertEqual(point.lat, lati)
         self.assertEqual(point.long, longi)
         self.assertEqual(point.alt, alti)
Beispiel #12
0
 def test_Polygon(self):
     """Instantiation of Polygon objects."""
     tree = PhyloXMLIO.read(EX_MADE).phylogenies[1]
     self.assertEqual(tree.name, "testing polygon")
     dist = tree.clade[0].distributions[0]
     for poly in dist.polygons:
         self.assertTrue(isinstance(poly, PX.Polygon))
         self.assertEqual(len(poly.points), 3)
     self.assertEqual(dist.polygons[0].points[0].alt_unit, "m")
     for point, lati, longi, alti in zip(
         chain(dist.polygons[0].points, dist.polygons[1].points),
         (47.481277, 35.155904, 47.376334, 40.481277, 25.155904, 47.376334),
         (8.769303, 136.915863, 8.548108, 8.769303, 136.915863, 7.548108),
         (472, 10, 452, 42, 10, 452),
     ):
         self.assertTrue(isinstance(point, PX.Point))
         self.assertEqual(point.geodetic_datum, "WGS84")
         self.assertEqual(point.lat, lati)
         self.assertEqual(point.long, longi)
         self.assertEqual(point.alt, alti)
def prepare_species_tree(FILE_TREE_IN,FILE_TREE_OUT):
    clan_taxa = {}
    treexml = PhyloXMLIO.read(open(FILE_TREE_IN, 'r'))
    tree = treexml[0]
    treexml.attributes.pop('schemaLocation', None)  # not supported by Forester
    tree.rooted = True
    leaf_dict = {}
    for node in tree.clade.find_clades():
        if node.name:
            tax_id = node.name
            if tax_id.startswith('INT'):
                tax_id = tax_id[3:]
            taxon = PhyloXML.Taxonomy(id=PhyloXML.Id(tax_id, provider='ncbi_taxonomy'))
            try:
                taxon.scientific_name = find_tax_name(tax_id)
            except KeyError:
                taxon.scientific_name = '(NA)'
            node._set_taxonomy(taxon)
            node.name = None
        else:
            pass
    PhyloXMLIO.write(treexml, FILE_TREE_OUT)
def prepare_species_tree(FILE_TREE_IN, FILE_TREE_OUT):
    clan_taxa = {}
    treexml = PhyloXMLIO.read(open(FILE_TREE_IN, 'r'))
    tree = treexml[0]
    treexml.attributes.pop('schemaLocation', None)  # not supported by Forester
    tree.rooted = True
    leaf_dict = {}
    for node in tree.clade.find_clades():
        if node.name:
            tax_id = node.name
            if tax_id.startswith('INT'):
                tax_id = tax_id[3:]
            taxon = PhyloXML.Taxonomy(
                id=PhyloXML.Id(tax_id, provider='ncbi_taxonomy'))
            try:
                taxon.scientific_name = find_tax_name(tax_id)
            except KeyError:
                taxon.scientific_name = '(NA)'
            node._set_taxonomy(taxon)
            node.name = None
        else:
            pass
    PhyloXMLIO.write(treexml, FILE_TREE_OUT)
Beispiel #15
0
 def setUp(self):
     self.phyloxml = PhyloXMLIO.read(EX_PHYLO)
def reconcile_tree(gene_tree_file,reconciled_file,rec_tag,pfam_id,db):
    if (os.path.isfile(rec_tag+'ids.pickle')) and  (pplacer_flag==1): 
        id_information = pickle.load(open(rec_tag+'ids.pickle', 'rb'))      
        existing_genes=id_information['existing_genes']
        Sequnces=[]
        p_ids=[]
        new_genes=set([w['id'] for w in pplacer_queries[pfam_id]])
        if not (new_genes-set(existing_genes)):
            print "All %s Genes for family %s have already been placed in the reconciled tree."%(len(new_genes),pfam_id)
            print "Skip Reconciliation for %s"%pfam_id
            return

    txid_file=rec_tag+'txid.xml'       
    if not(os.path.isfile(rec_tag+'ids.pickle')) or not(os.path.isfile(reconciled_file+'.gz')) or  (pplacer_flag==1): 
        print "Running Reconciliation for: %s"%pfam_id
        
        rand_id=random.randint(1000000,9999999)        
        subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d"%(tree_folder,pfam_id,gene_tree_file,rand_id),shell=True)
        tree = ete2.PhyloTree('%s.%d'%(gene_tree_file,rand_id), format=0)
        tree.resolve_polytomy()
        tree.write(format=0, outfile=txid_file+'.tmp.nw')
        if os.path.exists('%s.%d'%(gene_tree_file,rand_id)):
            subprocess.check_call("rm  %s.%d"%(gene_tree_file,rand_id),shell=True)

        Phylo.convert(txid_file+'.tmp.nw', 'newick', txid_file+'.tmp.xml', 'phyloxml')
        treexml = PhyloXMLIO.read(open(txid_file+'.tmp.xml','r'))
        tree = treexml[0]
        treexml.attributes.pop('schemaLocation', None)  # not supported by Forester
        tree.rooted = True
        my_ids=set([])
        my_query_by_taxid={}
        for leaf in tree.clade.find_clades(terminal=True):
            up_name = leaf.name.split('/')[0]
            tax_id,tax_name=find_tax_id_unip(up_name,db)
            if tax_id not in all_species_txids:
                if tax_id in merged_taxid.keys():
                    tax_id=merged_taxid[tax_id]
                    tax_name=find_tax_name(tax_id,db)
                if tax_id in best_taxid_map.keys():
                    tax_id=best_taxid_map[tax_id]
                    tax_name=find_tax_name(tax_id,db)
                else:
                    tax_id0=tax_id
                    tax_id,tax_name=find_best_taxid(tax_id,db)
                    if tax_id>0:
                        best_taxid_map[tax_id0]=tax_id
            if tax_id<0:
                if (-tax_id) in merged_taxid.keys():
                    tax_id=merged_taxid[-tax_id]
                    tax_name=find_tax_name(tax_id,db)
            if tax_id in my_query_by_taxid:
               my_query_by_taxid[tax_id].append(up_name)
            else:
               my_query_by_taxid[tax_id]=[up_name]
            my_ids.add(tax_id)
            my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy')
            taxon=PhyloXML.Taxonomy(id=my_tax_id)
            taxon.scientific_name = tax_name
            leaf._set_taxonomy(taxon)
        PhyloXMLIO.write(treexml, open(txid_file,'w'))    
        os.system('rm '+txid_file+'.tmp.nw')
        os.system('rm '+txid_file+'.tmp.xml')
        print "Taxid file done for: %s"%pfam_id
        existing_ids=list(set(my_ids)&set(all_species_txids))
        existing_genes=[g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids]        
        pickle.dump({'pfam_id':pfam_id,'existing_ids':existing_ids,'existing_genes':existing_genes}, open(rec_tag+'ids.pickle', 'wb'))      
        print "Pickle file done for: %s"%pfam_id
        
       
    if os.path.exists(reconciled_file):
        os.system('rm '+reconciled_file)
    os.system("java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"%(lib_path, txid_file, species_tree_data_path, reconciled_file))
    if os.path.exists(reconciled_file):
        if os.path.exists(reconciled_file+'.gz'):
            subprocess.check_call("rm  %s.gz"%(reconciled_file),shell=True)
        subprocess.check_call("gzip %s"%(reconciled_file),shell=True)
    os.system('rm '+rec_tag+'reconciled_species_tree_used.xml')
    os.system('rm '+rec_tag+'reconciled_gsdi_log.txt')
    os.system('rm '+txid_file)
    print "Reconciliation file done for: %s"%pfam_id
Beispiel #17
0
 def test_read(self):
     phx = PhyloXMLIO.read(source)
     self.assertTrue(phx)
     self.assertEqual(len(phx), count[0])
     self.assertEqual(len(phx.other), count[1])
Beispiel #18
0
def reconcile_tree(gene_tree_file, reconciled_file, rec_tag, pfam_id, db):
    if (os.path.isfile(rec_tag + 'ids.pickle')) and (pplacer_flag == 1):
        id_information = pickle.load(open(rec_tag + 'ids.pickle', 'rb'))
        existing_genes = id_information['existing_genes']
        Sequnces = []
        p_ids = []
        new_genes = set([w['id'] for w in pplacer_queries[pfam_id]])
        if not (new_genes - set(existing_genes)):
            print "All %s Genes for family %s have already been placed in the reconciled tree." % (
                len(new_genes), pfam_id)
            print "Skip Reconciliation for %s" % pfam_id
            return

    txid_file = rec_tag + 'txid.xml'
    if not (os.path.isfile(rec_tag + 'ids.pickle')) or not (
            os.path.isfile(reconciled_file + '.gz')) or (pplacer_flag == 1):
        print "Running Reconciliation for: %s" % pfam_id

        rand_id = random.randint(1000000, 9999999)
        subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d" %
                              (tree_folder, pfam_id, gene_tree_file, rand_id),
                              shell=True)
        tree = ete2.PhyloTree('%s.%d' % (gene_tree_file, rand_id), format=0)
        tree.resolve_polytomy()
        tree.write(format=0, outfile=txid_file + '.tmp.nw')
        if os.path.exists('%s.%d' % (gene_tree_file, rand_id)):
            subprocess.check_call("rm  %s.%d" % (gene_tree_file, rand_id),
                                  shell=True)

        Phylo.convert(txid_file + '.tmp.nw', 'newick', txid_file + '.tmp.xml',
                      'phyloxml')
        treexml = PhyloXMLIO.read(open(txid_file + '.tmp.xml', 'r'))
        tree = treexml[0]
        treexml.attributes.pop('schemaLocation',
                               None)  # not supported by Forester
        tree.rooted = True
        my_ids = set([])
        my_query_by_taxid = {}
        for leaf in tree.clade.find_clades(terminal=True):
            up_name = leaf.name.split('/')[0]
            tax_id, tax_name = find_tax_id_unip(up_name, db)
            if tax_id not in all_species_txids:
                if tax_id in merged_taxid.keys():
                    tax_id = merged_taxid[tax_id]
                    tax_name = find_tax_name(tax_id, db)
                if tax_id in best_taxid_map.keys():
                    tax_id = best_taxid_map[tax_id]
                    tax_name = find_tax_name(tax_id, db)
                else:
                    tax_id0 = tax_id
                    tax_id, tax_name = find_best_taxid(tax_id, db)
                    if tax_id > 0:
                        best_taxid_map[tax_id0] = tax_id
            if tax_id < 0:
                if (-tax_id) in merged_taxid.keys():
                    tax_id = merged_taxid[-tax_id]
                    tax_name = find_tax_name(tax_id, db)
            if tax_id in my_query_by_taxid:
                my_query_by_taxid[tax_id].append(up_name)
            else:
                my_query_by_taxid[tax_id] = [up_name]
            my_ids.add(tax_id)
            my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy')
            taxon = PhyloXML.Taxonomy(id=my_tax_id)
            taxon.scientific_name = tax_name
            leaf._set_taxonomy(taxon)
        PhyloXMLIO.write(treexml, open(txid_file, 'w'))
        os.system('rm ' + txid_file + '.tmp.nw')
        os.system('rm ' + txid_file + '.tmp.xml')
        print "Taxid file done for: %s" % pfam_id
        existing_ids = list(set(my_ids) & set(all_species_txids))
        existing_genes = [
            g for txid in my_query_by_taxid.keys()
            for g in my_query_by_taxid[txid] if txid in existing_ids
        ]
        pickle.dump(
            {
                'pfam_id': pfam_id,
                'existing_ids': existing_ids,
                'existing_genes': existing_genes
            }, open(rec_tag + 'ids.pickle', 'wb'))
        print "Pickle file done for: %s" % pfam_id

    if os.path.exists(reconciled_file):
        os.system('rm ' + reconciled_file)
    os.system(
        "java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"
        % (lib_path, txid_file, species_tree_data_path, reconciled_file))
    if os.path.exists(reconciled_file):
        if os.path.exists(reconciled_file + '.gz'):
            subprocess.check_call("rm  %s.gz" % (reconciled_file), shell=True)
        subprocess.check_call("gzip %s" % (reconciled_file), shell=True)
    os.system('rm ' + rec_tag + 'reconciled_species_tree_used.xml')
    os.system('rm ' + rec_tag + 'reconciled_gsdi_log.txt')
    os.system('rm ' + txid_file)
    print "Reconciliation file done for: %s" % pfam_id
Beispiel #19
0
 def test_read(self):
     phx = PhyloXMLIO.read(source)
     self.assertTrue(phx)
     self.assertEqual(len(phx), count[0])
     self.assertEqual(len(phx.other), count[1])
Beispiel #20
0
 def setUp(self):
     self.phyloxml = PhyloXMLIO.read(EX_PHYLO)