def pubmed_adapter(search=None, id=None): ''' Sample queries: #curl "http://localhost:8880/pubmed?" curl "http://localhost:8880/pubmed?search=stem+cells" curl "http://localhost:8880/pubmed?id=19358275" ''' #FIXME: How do we handle no search or id param? Just serve up the latest entries? Or error as below? #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.") if search: #search = first_item(search) #reldate: only search for last N days #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'}) query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'}) search_url = NCBI_SEARCH_PATTERN + query logger.debug("Term search URL: " + search_url) doc = bindery.parse(search_url, standalone=True) search_terms = search ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id ) ids = ','.join(ids) self_link = '/pubmed?search='+search else: #ids = first_item(id) #fulltext = fulltext[0] if fulltext else u'no' #if fulltext == 'yes': search_terms = ids self_link = '/pubmed?id='+ids query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'}) search_url = NCBI_ARTICLE_ACCESS_PATTERN + query logger.debug("ID search URL: " + search_url) alt_link = search_url doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL) #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL) metadata, first_id = metadata_dict(generate_metadata(doc)) return atom_results(doc, metadata, self_link, alt_link, search_terms)
def moin2atomentries(wikibase, outputdir, rewrite, pattern): wikibase_len = len(rewrite) if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) with closing(urllib2.urlopen(req)) as resp: feed = bindery.parse(resp) for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] relative = uri[wikibase_len:] print >> sys.stderr, uri, relative if pattern and not pattern.match(relative): continue if rewrite: uri = uri.replace(rewrite, wikibase) req = urllib2.Request(uri, headers={'Accept': DOCBOOK_IMT}) with closing(urllib2.urlopen(req)) as resp: page = bindery.parse(resp) entrydate = dateparse(unicode(page.article.articleinfo.revhistory.revision.date)) if entrydate.tzinfo == None: entrydate = entrydate.replace(tzinfo=DEFAULT_TZ) output = os.path.join(outputdir, OUTPUTPATTERN%pathsegment(relative)) if os.access(output, os.R_OK): lastrev = dateparse(unicode(bindery.parse(output).entry.updated)) if lastrev.tzinfo == None: lastrev = lastrev.replace(tzinfo=DEFAULT_TZ) if (entrydate == lastrev): print >> sys.stderr, 'Not updated. Skipped...' continue print >> sys.stderr, 'Writing to ', output with open(output, 'w') as output: handle_page(uri, page, outputdir, relative, output) return
def test_xbel_2(self): #BM1 = 'http://hg.4suite.org/amara/trunk/raw-file/bb6c40828b2d/demo/7days/bm1.xbel' #BM2 = 'http://hg.4suite.org/amara/trunk/raw-file/bb6c40828b2d/demo/7days/bm2.xbel' doc1 = bindery.parse(FILE('bm1.xbel')) doc2 = bindery.parse(FILE('bm2.xbel')) merge(doc1.xbel, doc2.xbel) normalize_whitespace(doc1) output = doc1.xml_encode(lookup("xml-indent")) + '\n' self.assertEqual(output, open(FILE('merged-2.xbel')).read())
def test_simple_atom_entry(): '''Basic ns fixup upon mutation''' doc = bindery.parse(ATOMENTRY1) s = cStringIO.StringIO() xml_print(doc, stream=s) out = s.getvalue() #self.assertEqual(out, ATOMENTRY1) diff = treecompare.xml_diff(out, ATOMENTRY1) diff = '\n'.join(diff) assert not diff, "Expected=%r, returned=%r diff=%r" % (ATOMENTRY1, out, diff) #Make sure we can parse the result doc2 = bindery.parse(out)
def test_combined(self): #ATOM1 = 'http://zepheira.com/news/atom/entries/' #ATOM2 = 'http://ma.gnolia.com/atom/full/people/Uche' ATOM1 = FILE('zepheira_atom.xml') #local download for testing ATOM2 = FILE('magnolia_uche.xml') #local download for testing output = cStringIO.StringIO() combined_output = open(FILE('entries_combined.txt')).read() #local file for testing doc1 = bindery.parse(ATOM1) doc2 = bindery.parse(ATOM2) combined = itertools.chain(*[doc.feed.entry for doc in (doc1, doc2)]) for node in sorted(combined, key=operator.attrgetter('updated')): print >> output, node.title self.assertEqual(output.getvalue(), combined_output)
def aggregate_entries(envelope, entries): ''' envelope - input source of atom feed document to enclose entries if it has any entries, the new ones are appended entries - sequence of entry input sources ''' envelope_doc = bindery.parse(envelope, model=FEED_MODEL) entrydocs = [ bindery.parse(entry, model=ENTRY_MODEL) for entry in entries ] #for entry in sorted(entrydocs, key=lambda x: attrgetter('updated')): for entry_doc in sorted(entrydocs, key=lambda x: str(x.entry.updated), reverse=True): envelope_doc.feed.xml_append(entry_doc.entry) metadata = generate_metadata(envelope_doc) return envelope_doc, metadata
def parse_landing_zone(ips_dict, cluster_config): """ Returns the first landing zone found. return (public_ip, directory)""" cluster_config = cluster_config.strip().encode('utf-8') if not ips_dict: raise ClusterError('Cannot parse landing zone. ' + 'No ip translation table. ' + 'Cluster may not be ready yet.') if not cluster_config: raise ClusterError('Cannot parse landing zone. ' + 'Config is empty. ' + 'Cluster may not be ready yet.') doc = bindery.parse(cluster_config.strip()) # Create node to ip address translation table nodes = {} for e in doc.Environment.Hardware.Computer: nodes[e.name] = [e.netAddress] Software = doc.Environment.Software # attributes - build, buildSet, computer, description, directory, name instances = Software.DropZone [nodes[instance.computer].append('Drop Zone') for instance in instances] computer = instances[0].computer private_ip = nodes[computer][0] directory = instances[0].directory public_ip = ips_dict[private_ip] return (public_ip, directory)
def moincms(wikibase, outputdir, pattern): if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) resp = urllib2.urlopen(req) original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] feed = bindery.parse(resp) process_list = [] for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] #print >> sys.stderr, (uri, str(item.resource), split_fragment(item.resource)) #Deal with the wrapped URI if original_wiki_base: #print >> sys.stderr, (uri, original_wiki_base.rstrip('/')+'/') relative = relativize(uri, original_wiki_base.rstrip('/') + '/').lstrip('/') uri = absolutize(relative, wikibase) #print >> sys.stderr, (uri, relative) if pattern and not pattern.match(relative): continue n = node.factory(uri, relative, outputdir) if n.up_to_date(): pass #print >> sys.stderr, 'Up to date. Skipped...' else: process_list.append(n) #Process nodes needing update according to priority for n in sorted(process_list, key=attrgetter('PRIORITY'), reverse=True): #print >> sys.stderr, "processing ", n.rest_uri n.render() return
def read_info(fname): all = bindery.parse(fname.read()).vector info = [v for v in all.xml_children if v.xml_type == "element"] def setImplements(fcns): for fcn in fcns: fcn.name = str(fcn.nameStr) fcn.impl = getattr(fcn, "implName", None) fcn.impl = (str(fcn.impl) if fcn.impl else fcn.impl) fcn.line_idx = int(str(fcn.lineNum)) - 1 fcn.is_generator = str(fcn.isGenerator) == "true" for fcn in fcns: fcn.is_toplevel = getattr(fcn, "is_toplevel", False) or bool( fcn.impl) if fcn.impl: try: get_singleton(v for v in fcns if v.name == fcn.impl).is_toplevel = True except GetSingletonEmptyException: global couldntfindexception couldntfindexception = True print("couldn't find function %s for file %s" % (fcn.impl, fcn.srcFile)) return fcns return dict(list(info).equiv_classes( lambda a: Path(str(a.srcFile)))).map_values(setImplements)
def scanXMLDocument(self, path): i=0 self.XML_W = lookup("xml") self.doc = bindery.parse(path) self.control.SetText(open(path).read()) self.control.EmptyUndoBuffer() self.control.Colourise(0, -1) # line numbers in the margin self.control.SetMarginType(1, wx.stc.STC_MARGIN_NUMBER) self.control.SetMarginWidth(1, 25) #self.control.BeginTextColour((255,0,0)) #self.control.SetValue(self.doc.xml_encode(self.XML_W)) self.Sentences = [] for i in range (0, self.control.GetNumberOfLines()): Line = self.control.GetLineText(i) Line.strip() #To remove spaces in the line Line = re.sub(r"\s+", "", Line, flags=re.UNICODE) if (Line[0]=='<') & (Line[1]=='S') & (Line[2]=='i'): """ Everytime we encouter '<Si' at the beginning of a line, we append the Sentence Tab with the line number """ self.Sentences.append(i)
def testDelChildElement7(self): DOC = "<a><b>spam</b><b>eggs</b></a>" EXPECTED = '<a><b>spam</b></a>' doc = bindery.parse(DOC) del doc.a[u'b'][1] self.compare_output(doc, XMLDECL+EXPECTED) return
def test_update_bindery(self): doc = bindery.parse(self.XML) #Add a new text node to a (--> last child) doc.a.xml_append(u'New Content') self.assertEqual(doc.a.xml_children[-1].xml_value, u'New Content') new_elem = doc.xml_element_factory(None, u'spam') doc.a.xml_append(new_elem) self.assertEqual(doc.a.xml_children[-1], new_elem) new_text = amara.tree.text(u'New Content') doc.a.xml_insert(1, new_text) self.assertEqual(doc.a.xml_children[1], new_text) #Remove the last b child from a num_kids = len(doc.a.xml_children) #e1 = doc.a.b[-1].e b1 = doc.a.b[1] b1.xml_parent.xml_remove(b1) self.assertEqual(len(doc.a.xml_children), num_kids-1) doc = bindery.nodes.entity_base() #doc.xml_clear() #Remove all children from a doc.xml_append_fragment(self.XML) check_bindery(self, doc) return
def __init__(self, egdoc): from amara import bindery self.model_document = bindery.parse(egdoc) self.model_document.xml_model.prefixes = top_namespaces( self.model_document) self.setup_model() return
def testSetChildElement5(self): DOC = "<a><b>spam</b><b>spam</b></a>" EXPECTED = '<a><b>spam</b><b>eggs</b></a>' doc = bindery.parse(DOC) doc.a.b[1] = u"eggs" self.compare_output(doc, XMLDECL+EXPECTED) return
def xml_append_fragment(self, frag): from amara.bindery import parse doc = parse(frag) for child in doc.xml_children: self.xml_append(child) return
def testDelChildElement4(self): DOC = "<a><b>spam</b><b>spam</b></a>" doc = bindery.parse(DOC) def edit(): del doc.a.b[2] self.assertRaises(IndexError, edit) return
def testSetAttribute7(self): DOC = '<a><b>spam</b></a>' EXPECTED = '<a foo="bar"><b>spam</b></a>' doc = bindery.parse(DOC) doc.a.xml_attributes[u"foo"] = u"bar" self.compare_output(doc, XMLDECL+EXPECTED) return
def testDelAttribute1(self): DOC = '<a b="spam"><b>spam</b></a>' EXPECTED = '<a><b>spam</b></a>' doc = bindery.parse(DOC) del doc.a.b self.compare_output(doc, XMLDECL+EXPECTED) return
def testSetAttribute2(self): DOC = '<a b="spam"><b>spam</b></a>' EXPECTED = '<a b="eggs"><b>spam</b></a>' doc = bindery.parse(DOC) doc.a.xml_attributes[u'b'] = u"eggs" self.compare_output(doc, XMLDECL+EXPECTED) return
def OnSave(self,e): # Save away the edited text # Open the file, do an RU sure check for an overwrite! # Grab the content to be saved itcontains = self.control.GetValue().encode("utf-8") # Checking if the XML is correct try: self.doc = bindery.parse(itcontains) except bindery.Error as e: line = e.lineNumber dial = wx.MessageDialog(None, 'Error with XML parsing at line '+str(line-1)+' \nCannot save document, please correct the error', 'Error', wx.OK | wx.ICON_ERROR) if dial.ShowModal() == wx.ID_OK: dial.Destroy() return dlg = wx.FileDialog(self, "Choose a file", self.dirname, self.filename, "*.*", \ wx.SAVE | wx.OVERWRITE_PROMPT) if dlg.ShowModal() == wx.ID_OK: # Open the file for write, write, close self.filename=dlg.GetFilename() self.dirname=dlg.GetDirectory() filehandle=open(os.path.join(self.dirname, self.filename),'w') filehandle.write(itcontains) filehandle.close() # Get rid of the dialog to keep things tidy dlg.Destroy()
def testSetChildElement6(self): DOC = "<a><b>spam</b><b>spam</b></a>" doc = bindery.parse(DOC) def edit(): doc.a.b[2] = u"eggs" self.assertRaises(IndexError, edit) return
def test_update_bindery(self): doc = bindery.parse(self.XML) #Add a new text node to a (--> last child) doc.a.xml_append(u'New Content') self.assertEqual(doc.a.xml_children[-1].xml_value, u'New Content') new_elem = doc.xml_element_factory(None, u'spam') doc.a.xml_append(new_elem) self.assertEqual(doc.a.xml_children[-1], new_elem) new_text = amara.tree.text(u'New Content') doc.a.xml_insert(1, new_text) self.assertEqual(doc.a.xml_children[1], new_text) #Remove the last b child from a num_kids = len(doc.a.xml_children) #e1 = doc.a.b[-1].e b1 = doc.a.b[1] b1.xml_parent.xml_remove(b1) self.assertEqual(len(doc.a.xml_children), num_kids - 1) doc = bindery.nodes.entity_base() #doc.xml_clear() #Remove all children from a doc.xml_append_fragment(self.XML) check_bindery(self, doc) return
def factory(rest_uri, moin_link=None, opener=None): opener = opener or urllib2.build_opener() logger.debug('rest_uri: ' + rest_uri) req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT}) resp = opener.open(req) doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL) original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] #amara.xml_print(self.content_cache) metadata, first_id = metadata_dict(generate_metadata(doc)) metadata = metadata[first_id] akara_type = U(metadata[u'ak-type']) logger.debug('Type: ' + akara_type) try: #Older Moin CMS resource types are implemented by registration to the global node.NODES cls = node.NODES[akara_type] except KeyError: #Newer Moin CMS resource types are implemented by discovery of a URL, #to which a POST request executes the desired action return node.ENDPOINTS and (rest_uri, akara_type, node.ENDPOINTS[akara_type], doc, metadata, original_wiki_base) else: instance = cls(rest_uri, moin_link, opener, cache=(doc, metadata, original_wiki_base)) return instance
def addAudioSample(self, start, end): i = self.control.GetInsertionPoint() j = self.control.LineFromPosition(i) section = self.getSectionNumber(j) line= self.control.GetFirstVisibleLine() try: self.doc = bindery.parse(self.control.GetValue().encode("utf-8")) except bindery.Error as e: print e.lineNumber if section != 0: try: self.doc.TEXT.S[section-1].AUDIO.start = self.SimplifyAudioLimit(start) self.doc.TEXT.S[section-1].AUDIO.end = self.SimplifyAudioLimit(end) except AttributeError: new_elem = self.doc.xml_element_factory(None, u"AUDIO") new_elem.xml_attributes.setnode(new_elem.xml_attribute_factory(None, u"start", self.SimplifyAudioLimit(start))) new_elem.xml_attributes.setnode(new_elem.xml_attribute_factory(None, u"end", self.SimplifyAudioLimit(end))) self.doc.TEXT.S[section-1].xml_insert(0, new_elem) self.control.SetValue(self.doc.xml_encode(self.XML_W)) i = self.control.PositionFromLine(j) self.control.SetInsertionPoint(i) self.control.ScrollToLine(line) self.focus = section-1
def moincms(wikibase, outputdir, pattern): if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) resp = urllib2.urlopen(req) original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] feed = bindery.parse(resp) process_list = [] for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] #print >> sys.stderr, (uri, str(item.resource), split_fragment(item.resource)) #Deal with the wrapped URI if original_wiki_base: #print >> sys.stderr, (uri, original_wiki_base.rstrip('/')+'/') relative = relativize(uri, original_wiki_base.rstrip('/')+'/').lstrip('/') uri = absolutize(relative, wikibase) #print >> sys.stderr, (uri, relative) if pattern and not pattern.match(relative): continue n = node.factory(uri, relative, outputdir) if n.up_to_date(): pass #print >> sys.stderr, 'Up to date. Skipped...' else: process_list.append(n) #Process nodes needing update according to priority for n in sorted(process_list, key=attrgetter('PRIORITY'), reverse=True): #print >> sys.stderr, "processing ", n.rest_uri n.render() return
def list_records(self, set="", resumption_token = ""): ''' List records. Use either the resumption token or set id. ''' if resumption_token: params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token} else: params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set} qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL) records, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) for id_, props in records: for k, v in props.iteritems(): props[k] = [ U(item) for item in v ] if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None): resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken) else: resumption_token = '' return {'records' : records, 'resumption_token' : resumption_token}
def testSetAttribute5(self): DOC = '<a><b>spam</b></a>' EXPECTED = '<a xmlns:ns="urn:bogus" ns:foo="bar"><b>spam</b></a>' doc = bindery.parse(DOC) doc.a.xmlns_attributes[u'ns']= u'urn:bogus' doc.a.xml_attributes[u'ns:foo'] = u'bar' self.compare_output(doc, XMLDECL+EXPECTED) return
def testSetAttribute4(self): DOC = '<a><b>spam</b></a>' EXPECTED = '<a><b xml:lang="en">spam</b></a>' doc = bindery.parse(DOC) # doc.a.b.xml_set_attribute((u"xml:lang"), u"en") doc.a.b.xml_attributes[u'xml:lang'] = u'en' self.compare_output(doc, XMLDECL+EXPECTED) return
def testSetAttribute3(self): from xml.dom import Node DOC = '<a b="spam"><b>spam</b></a>' EXPECTED = '<a b="eggs"><b>spam</b></a>' doc = bindery.parse(DOC) doc.a[ATTRIBUTE_NODE, None, u'b'] = u'eggs' self.compare_output(doc, XMLDECL+EXPECTED) return
def render(self): #Copy attachments to dir req = urllib2.Request(self.rest_uri, headers={'Accept': ATTACHMENTS_IMT}) resp = urllib2.urlopen(req) doc = bindery.parse(resp, model=ATTACHMENTS_MODEL) for attachment in (doc.attachments.attachment or ()): print attachment return
def test_parse_with_file_path(self): """Parse with file path""" fname = tempfile.mktemp('.xml') fout = open(fname, 'w') fout.write(MONTY_XML) fout.close() doc = parse(fname) self.run_checks(doc)
def Xtest_parse_with_url(self): doc = parse(TEST_URL) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_type, tree.element.xml_type) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual(doc.xml_children[0].xml_prefix, None,)
def test_xslt(): url = server() + "akara.xslt?" + urllib.urlencode({"@xslt": XSLT_URL}) req = urllib2.Request(url) req.add_header("Content-Type", "text/xml") response = urllib2.urlopen(req, XML_DATA) doc = bindery.parse(response) assert str(doc.html.head.title) == "Document Title", repr(str(doc.html.head.title))
def test_xpath1(self): """Test AVT""" doc = bindery.parse(self.DOC) self.assertEqual(doc.xml_select(u'ns:friends/ns:bff', prefixes=self.PREFIXES), u'Suzie Q') self.assertEqual( [ unicode(f.xml_select(u'concat(@rank, ": ", .)')) for f in doc.friends.bff ], [u'1: Suzie Q', u'2: Betty Boost']) return
def __init__(self, schdoc): from amara import bindery dispatcher.__init__(self) self.model_document = bindery.parse(schdoc) self.model_document.xml_model.prefixes = top_namespaces(self.model_document) self.rules = [] self.setup_model() return
def __init__(self, parent): self._init_ctrls(parent) # replace the file name below fileName = 'Musica/crdo-NRU_F4_10_AGRICULTURAL_ACTIVITIES.xml' file = os.path.join(os.getcwd(), fileName) self.xmlDoc = bindery.parse(file) self.styledTextCtrl1.AddTextUTF8(self.xmlDoc.xml(indent=u'yes'))
def ejsonize(isrc): ''' Convert Atom syntax to a dictionary Note: the conventions used are designed to simplify conversion to Exhibit JSON (see: http://www.ibm.com/developerworks/web/library/wa-realweb6/ ; listing 3) ''' doc = bindery.parse(isrc, model=FEED_MODEL) def process_entry(e): known_elements = [u'id', u'title', u'link', u'author', u'category', u'updated', u'content', u'summary'] data = { u"id": unicode(e.id), #XXX Shall we use title for label? u"label": unicode(e.id), u"type": u"Entry", u"title": unicode(e.title), u"link": first_item([ l.href for l in e.link if l.rel in [None, u"alternate"] ], []), #Nested list comprehension to select the alternate link, #then select the first result ([0]) and gets its href attribute u"authors": [ unicode(a.name) for a in iter(e.author or []) ], #Nested list comprehension to create a list of category values u"categories": [ unicode(c.term) for c in iter(e.category or []) ], u"updated": unicode(e.updated), u"summary": unicode(e.summary), } if not data[u"categories"]: del data[u"categories"] if e.summary is not None: data[u"summary"] = unicode(e.summary) if e.content is not None: try: data[u"content_src"] = unicode(e.content.src) except AttributeError: data[u"content_text"] = deserialize_text_construct(e.content) for child in e.xml_elements: if child.xml_namespace != ATOM_NAMESPACE and child.xml_local not in known_elements: data[child.xml_local] = unicode(child) return data try: doc_entries = iter(doc.feed.entry) feedinfo = { u"id": unicode(doc.feed.id), #XXX Shall we use title for label? u"label": unicode(doc.feed.id), u"type": u"Feed", u"title": unicode(doc.feed.title), u"link": first_item([ l.href for l in doc.feed.link if l.rel in [None, u"alternate"] ], []), u"authors": [ unicode(a.name) for a in iter(doc.feed.author or []) ], u"updated": unicode(doc.feed.updated), } except AttributeError: try: doc_entries = iter(doc.entry) feedinfo = None except AttributeError: #FIXME L10N raise ValueError("Does not appear to be a valid Atom file") return [ process_entry(e) for e in doc_entries ]
def test_nasty_xml_1(self): """XML with 2 elements with same local name and different NS on same parent""" doc = parse(NASTY_NS_XML1) self.assertEqual(len(doc.top.xml_children), 5) self.assertEqual(len(list(doc.top.monty)), 1) self.assertEqual(len(list(doc.top.monty_)), 1) self.assertEqual(doc.top.monty.xml_namespace, u"urn:bogus:a") self.assertEqual(doc.top.monty_.xml_namespace, u"urn:bogus:b") self.assertEqual(doc.top.monty.xml_following_sibling.xml_following_sibling, doc.top.monty_)
def testInsertBefore1(self): DOC = "<a><b>eggs</b></a>" EXPECTED = '<a><b>spam</b><b>eggs</b></a>' doc = bindery.parse(DOC) new = doc.xml_element_factory(None, u'b') new.xml_append(doc.xml_text_factory(u'spam')) doc.a.xml_insert(0, new) self.compare_output(doc, XMLDECL+EXPECTED) return
def test_xslt(): url = server() + "akara.xslt?" + urllib.urlencode({"@xslt": XSLT_URL}) req = urllib2.Request(url) req.add_header("Content-Type", "text/xml") response = urllib2.urlopen(req, XML_DATA) doc = bindery.parse(response) assert str(doc.html.head.title) == "Document Title", repr( str(doc.html.head.title))
def test_xpath1(self): """Test AVT""" doc = bindery.parse(self.DOC) self.assertEqual(doc.xml_select(u'ns:friends/ns:bff'), u'Suzie Q') self.assertEqual([ unicode(f.xml_select(u'concat(@rank, ": ", .)')) for f in doc.friends.bff ], [u'1: Suzie Q', u'2: Betty Boost']) return
def test_xpath(self): doc = bindery.parse(self.MONTY_XML) m = doc.monty p1 = doc.monty.python self.assertEqual(p1.xml_select(u'string(@spam)'), u'eggs') for p, line in zip(doc.xml_select(u'//python'), self.lines_py): output = cStringIO.StringIO() xml_print(p, stream=output) self.assertEqual(output.getvalue(), line)
def parseXMLDocument(self, path): i=0 self.XML_W = lookup("xml") try: self.doc = bindery.parse(path) except amara.lib.IriError: self.doc = bindery.parse(path, standalone=True) #self.control.SetText(open(path).read()) self.control.EmptyUndoBuffer() self.control.Colourise(0, -1) # line numbers in the margin self.control.SetMarginType(1, wx.stc.STC_MARGIN_NUMBER) self.control.SetMarginWidth(1, 25) self.control.SetValue(self.doc.xml_encode(self.XML_W).decode("utf-8").replace("<S id=","\n <S id=").replace("<", u"\u2039").replace(">", u"\u203A"))
def testInsertAfter3(self): DOC = "<a><b>spam</b><c>ham</c><c>pork</c></a>" EXPECTED = "<a><b>spam</b><c>eggs</c><c>ham</c><c>pork</c></a>" doc = bindery.parse(DOC) new = doc.xml_element_factory(None, u'c') new.xml_append(doc.xml_text_factory(u'eggs')) doc.a.xml_insert(doc.a.xml_index(doc.a.b) +1, new) self.compare_output(doc, XMLDECL+EXPECTED) return
def test_parse_with_stream(self): """Parse with stream""" fname = tempfile.mktemp('.xml') fout = open(fname, 'w') fout.write(MONTY_XML) fout.close() fout = open(fname, 'r') doc = parse(fout) fout.close() self.run_checks(doc)
def test_bindery(self): doc = bindery.parse(self.MONTY_XML) m = doc.monty p1 = doc.monty.python #or m.python; p1 is just the first python element self.assertEqual(p1.xml_attributes[(None, u'spam')], u'eggs') self.assertEqual(p1.spam, u'eggs') for p, line in zip(doc.monty.python, self.lines_py): #The loop will pick up both python elements output = cStringIO.StringIO() xml_print(p, stream=output) self.assertEqual(output.getvalue(), line)
def testXml(self): tmp = self.control.GetValue().encode("utf-8") try: self.doc = bindery.parse(tmp) except bindery.Error as e: line= e.lineNumber res = self.errorDialog(line) return res return 1
def test_avt1(self): """Test AVT""" doc = bindery.parse(self.DOC) self.assertEqual(doc.xml_avt(u'Hello, {friends/bff}'), u'Hello, Suzie Q') self.assertEqual( [ f.xml_avt(u'Big up, {.}, rank {@rank}') for f in doc.friends.bff ], [u'Big up, Suzie Q, rank 1', u'Big up, Betty Boost, rank 2']) self.assertEqual( [ f.xml_avt(u'{@rank}: {.}') for f in sorted(doc.friends.bff, key=attrgetter("rank")) ], [u'1: Suzie Q', u'2: Betty Boost']) return
def test_charsearch(): url = server() + "akara.unicode.search?q=DAGGER" doc = bindery.parse(urllib2.urlopen(url)) names = set() see_alsos = set() for child in doc.xml_select(u"characters/character"): names.add(child.name) see_alsos.add(child.see_also) assert names == set(["DAGGER", "DOUBLE DAGGER"]), names assert see_alsos == set( ["http://www.fileformat.info/info/unicode/char/2020/index.htm", "http://www.fileformat.info/info/unicode/char/2021/index.htm"]), see_alsos
def search(self, term): qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id}) url = DSPACE_OAI_ENDPOINT + '?' + qstr logger.debug('DSpace URL: ' + str(url)) #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ] doc = bindery.parse(url, model=OAI_MODEL) #print >> sys.stderr, list(generate_metadata(doc)) resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) record = doc.OAI_PMH resource = resources[first_id]
def test_namespace_free_xhtml3(self): 'namespace-free XHTML' + '...as XML with pretty print' doc = self._build_namespace_free_xhtml() s = cStringIO.StringIO() xml_print(doc, stream=s, indent=True) out = s.getvalue() #self.assertEqual(out, ATOMENTRY1) diff = treecompare.xml_diff(out, XHTML_EXPECTED_3, whitespace=False) diff = '\n'.join(diff) self.assertFalse(diff, msg=(None, diff)) #Make sure we can parse the result doc2 = bindery.parse(out)
def test_avt1(self): """Test AVT""" doc = bindery.parse(self.DOC) #FIXME: try to work it so , prefixes= is not needed for this case (declaration in scope) self.assertEqual(doc.xml_avt(u'Hello, {ns:friends/ns:bff}', prefixes=self.PREFIXES), u'Hello, Suzie Q') self.assertEqual( [ f.xml_avt(u'Big up, {.}, rank {@rank}') for f in doc.friends.bff ], [u'Big up, Suzie Q, rank 1', u'Big up, Betty Boost, rank 2']) self.assertEqual( [ f.xml_avt(u'{@rank}: {.}') for f in sorted(doc.friends.bff, key=attrgetter("rank")) ], [u'1: Suzie Q', u'2: Betty Boost']) return