def test_pre_simile_missing_id(): obj = atomtools.ejsonize(filesource('rfc4287-1-1-2.atom').source) obj[0][u'type'] = u'atom:entry' del obj[0][u'id'] prepped = exhibit.prep(obj, schema=PIPELINES) return
def test_entry_basics1(self): """Testing atom entry basics""" #EXPECTED_TITLE = u'dive into mark' f = feed(feedxml=tidy_atom(filesource('rfc4287-1-1-1.atom'))) self.assertEqual(f.feed.entry.link.rel, None) self.assertEqual(unicode(f.feed.entry.link.href), u'http://example.org/2003/12/13/atom03') return
def test_atom2rdf_rss(self): """Testing atom to RSS conversion""" EXPECTED_TITLE = u'dive into mark' f = feed(feedxml=tidy_atom(filesource('rfc4287-1-1-2.atom'))) self.assertEqual(unicode(f.feed.title), EXPECTED_TITLE) self.assert_(xml_compare(f.rss1format(), self.EXPECTED_RSS1)) return
def test_pre_simile1(): obj = atomtools.ejsonize(filesource('rfc4287-1-1-2.atom').source) obj[0][u'type'] = u'atom:entry' #del obj[0][u'id'] #import pprint; pprint.pprint(obj, stream=sys.stderr) prepped = exhibit.prep(obj, schema=PIPELINES) #import pprint; pprint.pprint(prepped, stream=sys.stderr) return
def test_pre_simile_missing_field(): obj = atomtools.ejsonize(filesource('rfc4287-1-1-2.atom').source) obj[0][u'type'] = u'atom:entry' #Add a requirement for a "spam" field epipelines_plus_spam = {u'spam': (first_item, exhibit.REQUIRE)} epipelines_plus_spam.update(ENTRY_PIPELINE) new_pipelines = { u'atom:entry': epipelines_plus_spam } #Don't bother with atom:feed; unused in test prepped = exhibit.prep(obj, schema=new_pipelines) return
def test_entry_basics2(self): """Testing atom entry basics""" #EXPECTED_TITLE = u'dive into mark' f = feed(feedxml=tidy_atom(filesource('rfc4287-1-1-2.atom'))) self.assertEqual(unicode(f.feed.entry.link.rel), u'alternate') self.assertEqual(unicode(f.feed.entry.link.href), u'http://example.org/2005/04/02/atom') self.assertEqual(unicode(f.feed.entry.link[1].rel), u'enclosure') self.assertEqual(unicode(f.feed.entry.link[1].href), u'http://example.org/audio/ph34r_my_podcast.mp3') return
def test_feed_basics(self): """Testing atom entry basics""" #EXPECTED_TITLE = u'dive into mark' f = feed(feedxml=tidy_atom(filesource('rfc4287-1-1-2.atom'))) self.assertEqual(unicode(f.feed.link.rel), u'alternate') self.assertEqual(unicode(f.feed.link.href), u'http://example.org/') self.assertEqual(unicode(f.feed.link[1].rel), u'self') self.assertEqual(unicode(f.feed.link[1].href), u'http://example.org/feed.atom') return
def test_parse_file(self): """Parse ugly HTML file""" f = filesource('nastytagsoup1.html') doc = html.parse(f.source) self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_type, tree.element.xml_type) self.assertEqual(doc.xml_children[0].xml_qname, 'html') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual(doc.xml_children[0].xml_prefix, None) self.assertEqual(len(list(doc.html.xml_elements)), 2) return
def test_reserved_attributes_page_ns(): EXPECTED = '<h1 xmlns="http://www.w3.org/1999/xhtml" xmlns:h="http://www.w3.org/1999/xhtml" id="akara:metadata">akara:metadata</h1>' f = filesource('tagsoup2.html') doc = html.parse(f.source, prefixes=XHTML_NSS, use_xhtml_ns=True) #import sys; print >> sys.stderr, doc.xml_select(u'*')[0].xml_name #import sys; print >> sys.stderr, doc.xml_select(u'//h:div[@id="content"]')[0].xml_first_child #content = doc.xml_select(u'//div[@id="content"]//h1')[0] #first_h1 = content.xml_select(u'.//h1')[0] first_h1 = doc.xml_select(u'//h:div[@id="content"]//h:h1')[0] treecompare.check_xml(first_h1.xml_encode(), EXPECTED) assert first_h1.id == u'akara:metadata', (first_h1.id, u'akara:metadata') return
def test_reserved_attributes_page(): EXPECTED = '<h1 id="akara:metadata">akara:metadata</h1>' f = filesource('tagsoup2.html') doc = html.parse(f.source) #import sys; print >> sys.stderr, [ d.xml_name for d in doc.xml_select(u'//div') ] #import sys; print >> sys.stderr, dict(doc.xml_select(u'//div')[1].xml_attributes) #import sys; print >> sys.stderr, doc.xml_select(u'*')[0].xml_name #content = doc.xml_select(u'//div[@id="content"]//h1')[0] #first_h1 = content.xml_select(u'.//h1')[0] #import sys; print >> sys.stderr, doc.xml_select(u'//div[@id="content"]')[0].xml_first_child first_h1 = doc.xml_select(u'//div[@id="content"]//h1')[0] treecompare.check_xml(first_h1.xml_encode(), EXPECTED) assert first_h1.id == u'akara:metadata', (first_h1.id, u'akara:metadata') return
def test_ejsonize1(self): EXPECTED = [{ u'updated': u'2005-07-31T12:29:29Z', u'title': u'Atom draft-07 snapshot', u'label': u'tag:example.org,2003:3.2397', u'content_text': u'\n \n [Update: The Atom draft is finished.]\n \n ', u'link': u'http://example.org/2005/04/02/atom', u'authors': [u'Mark Pilgrim'], u'summary': u'None', u'type': u'Entry' }] results = ejsonize(filesource('rfc4287-1-1-2.atom').source) self.assertEqual(results, EXPECTED) return
def test_tagsoup1(self): """Test RDFa interpretation from tagsoup""" f = filesource('tagsouprdfa1.html') doc = html.parse(f.source) h = doc.xml_select(u'//h1')[0] self.assertEqual(h.property, u'dc:title') self.assertEqual(h.xml_attributes[None, u'property'], u'dc:title') #print h.xml_namespaces.copy()[u'xml'] #print h.xml_namespaces.copy() self.assertEqual(h.xml_namespaces.copy()[u'xml'], u'http://www.w3.org/XML/1998/namespace') self.assertEqual(h.xml_namespaces[u'xml'], u'http://www.w3.org/XML/1998/namespace') self.assertEqual(h.xml_namespaces[u'd'], u'http://purl.org/dc/elements/1.1/') self.assertEqual(h.xml_namespaces[u'xlink'], u'http://www.w3.org/1999/xlink') self.assertEqual(h.xml_namespaces[u'mml'], u'http://www.w3.org/1998/Math/MathML') self.assertEqual(h.xml_namespaces[u'xs'], u'http://www.w3.org/2001/XMLSchema') self.assertEqual(h.xml_namespaces[u'aml'], u'http://topazproject.org/aml/') return
def test_parse_file(self): doc = bindery.parse(tidy_atom(filesource('entry1.atom'))) self.assertEqual(len(doc.xml_children), 1) self.assertEqual(len(list(doc.entry)), 1) self.assertEqual(len(list(doc.entry.link)), 1) return
def test_ejsonize1(self): EXPECTED = [{u'updated': u'2005-07-31T12:29:29Z', u'title': u'Atom draft-07 snapshot', u'label': u'tag:example.org,2003:3.2397', u'content_text': u'\n \n [Update: The Atom draft is finished.]\n \n ', u'link': u'http://example.org/2005/04/02/atom', u'authors': [u'Mark Pilgrim'], u'summary': u'None', u'type': u'Entry'}] results = ejsonize(filesource('rfc4287-1-1-2.atom').source) self.assertEqual(results, EXPECTED) return