def _stash_rewrite_and_call(self, fname, test_cases): """Safely run a series of tests on a parsed and rewritten file. Specifically: Parse a file, rename the source file to a backup, rewrite the file from the parsed object, check the rewritten file with the given series of test functions, then restore the original by renaming the backup copy. Python 2.4 support: This would make more sense as a context manager that simply handles renaming and finally restoring the original. """ phx = PhyloXMLIO.read(fname) if os.path.exists(fname + "~"): os.remove(fname + "~") os.rename(fname, fname + "~") try: PhyloXMLIO.write(phx, fname) for cls, tests in test_cases: inst = cls("setUp") for test in tests: getattr(inst, test)() finally: # XXX not safe! if os.path.exists(fname): os.remove(fname) os.rename(fname + "~", fname)
def test_Phyloxml(self): """Instantiation of Phyloxml objects.""" phx = PhyloXMLIO.read(EX_PHYLO) self.assertTrue(isinstance(phx, PX.Phyloxml)) for tree in phx: self.assertTrue(isinstance(tree, PX.Phylogeny)) for otr in phx.other: self.assertTrue(isinstance(otr, PX.Other))
def _rewrite_and_call(self, orig_fname, test_cases): """Parse, rewrite and retest a phyloXML example file.""" with open(orig_fname) as infile: phx = PhyloXMLIO.read(infile) with open(DUMMY, "w") as outfile: PhyloXMLIO.write(phx, outfile) for cls, tests in test_cases: inst = cls("setUp") for test in tests: getattr(inst, test)()
def _rewrite_and_call(self, orig_fname, test_cases): """Parse, rewrite and retest a phyloXML example file.""" infile = open(orig_fname, "rb") phx = PhyloXMLIO.read(infile) infile.close() outfile = open(DUMMY, "w+b") PhyloXMLIO.write(phx, outfile) outfile.close() for cls, tests in test_cases: inst = cls("setUp") for test in tests: getattr(inst, test)()
def _rewrite_and_call(self, orig_fname, test_cases): """Parse, rewrite and retest a phyloXML example file.""" infile = open(orig_fname, "r") phx = PhyloXMLIO.read(infile) infile.close() outfile = open(DUMMY, "w") PhyloXMLIO.write(phx, outfile) outfile.close() for cls, tests in test_cases: inst = cls("setUp") for test in tests: getattr(inst, test)()
def test_Other(self): """Instantiation of Other objects.""" phx = PhyloXMLIO.read(EX_PHYLO) otr = phx.other[0] self.assertTrue(isinstance(otr, PX.Other)) self.assertEqual(otr.tag, 'alignment') self.assertEqual(otr.namespace, 'http://example.org/align') self.assertEqual(len(otr.children), 3) for child, name, value in zip(otr, ('A', 'B', 'C'), ( 'acgtcgcggcccgtggaagtcctctcct', 'aggtcgcggcctgtggaagtcctctcct', 'taaatcgc--cccgtgg-agtccc-cct')): self.assertEqual(child.tag, 'seq') self.assertEqual(child.attributes['name'], name) self.assertEqual(child.value, value)
def test_Other(self): """Instantiation of Other objects.""" phx = PhyloXMLIO.read(EX_PHYLO) otr = phx.other[0] self.assertTrue(isinstance(otr, PX.Other)) self.assertEqual(otr.tag, "alignment") self.assertEqual(otr.namespace, "http://example.org/align") self.assertEqual(len(otr.children), 3) for child, name, value in zip( otr, ("A", "B", "C"), ("acgtcgcggcccgtggaagtcctctcct", "aggtcgcggcctgtggaagtcctctcct", "taaatcgc--cccgtgg-agtccc-cct")): self.assertEqual(child.tag, "seq") self.assertEqual(child.attributes["name"], name) self.assertEqual(child.value, value)
def test_Other(self): """Instantiation of Other objects.""" phx = PhyloXMLIO.read(EX_PHYLO) otr = phx.other[0] self.assertTrue(isinstance(otr, PX.Other)) self.assertEqual(otr.tag, "alignment") self.assertEqual(otr.namespace, "http://example.org/align") self.assertEqual(len(otr.children), 3) for child, name, value in zip( otr, ("A", "B", "C"), ("acgtcgcggcccgtggaagtcctctcct", "aggtcgcggcctgtggaagtcctctcct", "taaatcgc--cccgtgg-agtccc-cct"), ): self.assertEqual(child.tag, "seq") self.assertEqual(child.attributes["name"], name) self.assertEqual(child.value, value)
def test_Polygon(self): """Instantiation of Polygon objects.""" tree = PhyloXMLIO.read(EX_MADE).phylogenies[1] self.assertEqual(tree.name, "testing polygon") dist = tree.clade[0].distributions[0] for poly in dist.polygons: self.assertTrue(isinstance(poly, PX.Polygon)) self.assertEqual(len(poly.points), 3) self.assertEqual(dist.polygons[0].points[0].alt_unit, "m") for point, lati, longi, alti in zip( chain(dist.polygons[0].points, dist.polygons[1].points), (47.481277, 35.155904, 47.376334, 40.481277, 25.155904, 47.376334), (8.769303, 136.915863, 8.548108, 8.769303, 136.915863, 7.548108), (472, 10, 452, 42, 10, 452), ): self.assertTrue(isinstance(point, PX.Point)) self.assertEqual(point.geodetic_datum, "WGS84") self.assertEqual(point.lat, lati) self.assertEqual(point.long, longi) self.assertEqual(point.alt, alti)
def prepare_species_tree(FILE_TREE_IN,FILE_TREE_OUT): clan_taxa = {} treexml = PhyloXMLIO.read(open(FILE_TREE_IN, 'r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True leaf_dict = {} for node in tree.clade.find_clades(): if node.name: tax_id = node.name if tax_id.startswith('INT'): tax_id = tax_id[3:] taxon = PhyloXML.Taxonomy(id=PhyloXML.Id(tax_id, provider='ncbi_taxonomy')) try: taxon.scientific_name = find_tax_name(tax_id) except KeyError: taxon.scientific_name = '(NA)' node._set_taxonomy(taxon) node.name = None else: pass PhyloXMLIO.write(treexml, FILE_TREE_OUT)
def prepare_species_tree(FILE_TREE_IN, FILE_TREE_OUT): clan_taxa = {} treexml = PhyloXMLIO.read(open(FILE_TREE_IN, 'r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True leaf_dict = {} for node in tree.clade.find_clades(): if node.name: tax_id = node.name if tax_id.startswith('INT'): tax_id = tax_id[3:] taxon = PhyloXML.Taxonomy( id=PhyloXML.Id(tax_id, provider='ncbi_taxonomy')) try: taxon.scientific_name = find_tax_name(tax_id) except KeyError: taxon.scientific_name = '(NA)' node._set_taxonomy(taxon) node.name = None else: pass PhyloXMLIO.write(treexml, FILE_TREE_OUT)
def setUp(self): self.phyloxml = PhyloXMLIO.read(EX_PHYLO)
def reconcile_tree(gene_tree_file,reconciled_file,rec_tag,pfam_id,db): if (os.path.isfile(rec_tag+'ids.pickle')) and (pplacer_flag==1): id_information = pickle.load(open(rec_tag+'ids.pickle', 'rb')) existing_genes=id_information['existing_genes'] Sequnces=[] p_ids=[] new_genes=set([w['id'] for w in pplacer_queries[pfam_id]]) if not (new_genes-set(existing_genes)): print "All %s Genes for family %s have already been placed in the reconciled tree."%(len(new_genes),pfam_id) print "Skip Reconciliation for %s"%pfam_id return txid_file=rec_tag+'txid.xml' if not(os.path.isfile(rec_tag+'ids.pickle')) or not(os.path.isfile(reconciled_file+'.gz')) or (pplacer_flag==1): print "Running Reconciliation for: %s"%pfam_id rand_id=random.randint(1000000,9999999) subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d"%(tree_folder,pfam_id,gene_tree_file,rand_id),shell=True) tree = ete2.PhyloTree('%s.%d'%(gene_tree_file,rand_id), format=0) tree.resolve_polytomy() tree.write(format=0, outfile=txid_file+'.tmp.nw') if os.path.exists('%s.%d'%(gene_tree_file,rand_id)): subprocess.check_call("rm %s.%d"%(gene_tree_file,rand_id),shell=True) Phylo.convert(txid_file+'.tmp.nw', 'newick', txid_file+'.tmp.xml', 'phyloxml') treexml = PhyloXMLIO.read(open(txid_file+'.tmp.xml','r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True my_ids=set([]) my_query_by_taxid={} for leaf in tree.clade.find_clades(terminal=True): up_name = leaf.name.split('/')[0] tax_id,tax_name=find_tax_id_unip(up_name,db) if tax_id not in all_species_txids: if tax_id in merged_taxid.keys(): tax_id=merged_taxid[tax_id] tax_name=find_tax_name(tax_id,db) if tax_id in best_taxid_map.keys(): tax_id=best_taxid_map[tax_id] tax_name=find_tax_name(tax_id,db) else: tax_id0=tax_id tax_id,tax_name=find_best_taxid(tax_id,db) if tax_id>0: best_taxid_map[tax_id0]=tax_id if tax_id<0: if (-tax_id) in merged_taxid.keys(): tax_id=merged_taxid[-tax_id] tax_name=find_tax_name(tax_id,db) if tax_id in my_query_by_taxid: my_query_by_taxid[tax_id].append(up_name) else: my_query_by_taxid[tax_id]=[up_name] my_ids.add(tax_id) my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy') taxon=PhyloXML.Taxonomy(id=my_tax_id) taxon.scientific_name = tax_name leaf._set_taxonomy(taxon) PhyloXMLIO.write(treexml, open(txid_file,'w')) os.system('rm '+txid_file+'.tmp.nw') os.system('rm '+txid_file+'.tmp.xml') print "Taxid file done for: %s"%pfam_id existing_ids=list(set(my_ids)&set(all_species_txids)) existing_genes=[g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids] pickle.dump({'pfam_id':pfam_id,'existing_ids':existing_ids,'existing_genes':existing_genes}, open(rec_tag+'ids.pickle', 'wb')) print "Pickle file done for: %s"%pfam_id if os.path.exists(reconciled_file): os.system('rm '+reconciled_file) os.system("java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"%(lib_path, txid_file, species_tree_data_path, reconciled_file)) if os.path.exists(reconciled_file): if os.path.exists(reconciled_file+'.gz'): subprocess.check_call("rm %s.gz"%(reconciled_file),shell=True) subprocess.check_call("gzip %s"%(reconciled_file),shell=True) os.system('rm '+rec_tag+'reconciled_species_tree_used.xml') os.system('rm '+rec_tag+'reconciled_gsdi_log.txt') os.system('rm '+txid_file) print "Reconciliation file done for: %s"%pfam_id
def test_read(self): phx = PhyloXMLIO.read(source) self.assertTrue(phx) self.assertEqual(len(phx), count[0]) self.assertEqual(len(phx.other), count[1])
def reconcile_tree(gene_tree_file, reconciled_file, rec_tag, pfam_id, db): if (os.path.isfile(rec_tag + 'ids.pickle')) and (pplacer_flag == 1): id_information = pickle.load(open(rec_tag + 'ids.pickle', 'rb')) existing_genes = id_information['existing_genes'] Sequnces = [] p_ids = [] new_genes = set([w['id'] for w in pplacer_queries[pfam_id]]) if not (new_genes - set(existing_genes)): print "All %s Genes for family %s have already been placed in the reconciled tree." % ( len(new_genes), pfam_id) print "Skip Reconciliation for %s" % pfam_id return txid_file = rec_tag + 'txid.xml' if not (os.path.isfile(rec_tag + 'ids.pickle')) or not ( os.path.isfile(reconciled_file + '.gz')) or (pplacer_flag == 1): print "Running Reconciliation for: %s" % pfam_id rand_id = random.randint(1000000, 9999999) subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d" % (tree_folder, pfam_id, gene_tree_file, rand_id), shell=True) tree = ete2.PhyloTree('%s.%d' % (gene_tree_file, rand_id), format=0) tree.resolve_polytomy() tree.write(format=0, outfile=txid_file + '.tmp.nw') if os.path.exists('%s.%d' % (gene_tree_file, rand_id)): subprocess.check_call("rm %s.%d" % (gene_tree_file, rand_id), shell=True) Phylo.convert(txid_file + '.tmp.nw', 'newick', txid_file + '.tmp.xml', 'phyloxml') treexml = PhyloXMLIO.read(open(txid_file + '.tmp.xml', 'r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True my_ids = set([]) my_query_by_taxid = {} for leaf in tree.clade.find_clades(terminal=True): up_name = leaf.name.split('/')[0] tax_id, tax_name = find_tax_id_unip(up_name, db) if tax_id not in all_species_txids: if tax_id in merged_taxid.keys(): tax_id = merged_taxid[tax_id] tax_name = find_tax_name(tax_id, db) if tax_id in best_taxid_map.keys(): tax_id = best_taxid_map[tax_id] tax_name = find_tax_name(tax_id, db) else: tax_id0 = tax_id tax_id, tax_name = find_best_taxid(tax_id, db) if tax_id > 0: best_taxid_map[tax_id0] = tax_id if tax_id < 0: if (-tax_id) in merged_taxid.keys(): tax_id = merged_taxid[-tax_id] tax_name = find_tax_name(tax_id, db) if tax_id in my_query_by_taxid: my_query_by_taxid[tax_id].append(up_name) else: my_query_by_taxid[tax_id] = [up_name] my_ids.add(tax_id) my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy') taxon = PhyloXML.Taxonomy(id=my_tax_id) taxon.scientific_name = tax_name leaf._set_taxonomy(taxon) PhyloXMLIO.write(treexml, open(txid_file, 'w')) os.system('rm ' + txid_file + '.tmp.nw') os.system('rm ' + txid_file + '.tmp.xml') print "Taxid file done for: %s" % pfam_id existing_ids = list(set(my_ids) & set(all_species_txids)) existing_genes = [ g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids ] pickle.dump( { 'pfam_id': pfam_id, 'existing_ids': existing_ids, 'existing_genes': existing_genes }, open(rec_tag + 'ids.pickle', 'wb')) print "Pickle file done for: %s" % pfam_id if os.path.exists(reconciled_file): os.system('rm ' + reconciled_file) os.system( "java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s" % (lib_path, txid_file, species_tree_data_path, reconciled_file)) if os.path.exists(reconciled_file): if os.path.exists(reconciled_file + '.gz'): subprocess.check_call("rm %s.gz" % (reconciled_file), shell=True) subprocess.check_call("gzip %s" % (reconciled_file), shell=True) os.system('rm ' + rec_tag + 'reconciled_species_tree_used.xml') os.system('rm ' + rec_tag + 'reconciled_gsdi_log.txt') os.system('rm ' + txid_file) print "Reconciliation file done for: %s" % pfam_id