def test_304(self): # first retrieve the url u = 'http://localhost:8097/tests/http/http_status_304.xml' f = feedparser.parse(u) self.assertEqual(f.status, 200) self.assertEqual(f.entries[0].title, 'title 304') # extract the etag and last-modified headers e = [v for k, v in f.headers.items() if k.lower() == 'etag'][0] mh = [v for k, v in f.headers.items() if k.lower() == 'last-modified'][0] ms = f.updated mt = f.updated_parsed md = datetime.datetime(*mt[0:7]) self.assertTrue(isinstance(mh, basestring)) self.assertTrue(isinstance(ms, basestring)) self.assertTrue(isinstance(mt, time.struct_time)) self.assertTrue(isinstance(md, datetime.datetime)) # test that sending back the etag results in a 304 f = feedparser.parse(u, etag=e) self.assertEqual(f.status, 304) # test that sending back last-modified (string) results in a 304 f = feedparser.parse(u, modified=ms) self.assertEqual(f.status, 304) # test that sending back last-modified (9-tuple) results in a 304 f = feedparser.parse(u, modified=mt) self.assertEqual(f.status, 304) # test that sending back last-modified (datetime) results in a 304 f = feedparser.parse(u, modified=md) self.assertEqual(f.status, 304)
def test_lxml_etree_bug(self): try: import lxml.etree except ImportError: pass else: doc = u"<feed>&illformed_charref</feed>".encode('utf8') # Importing lxml.etree currently causes libxml2 to # throw SAXException instead of SAXParseException. feedparser.parse(feedparser._StringIO(doc)) self.assertTrue(True)
def test_redirect_to_304(self): # ensure that an http redirect to an http 304 doesn't # trigger a bozo_exception u = 'http://localhost:8097/tests/http/http_redirect_to_304.xml' f = feedparser.parse(u) self.assertTrue(f.bozo == 0) self.assertTrue(f.status == 302)
def test_gb2312_converted_to_gb18030_in_xml_encoding(self): # \u55de was chosen because it exists in gb18030 but not gb2312 feed = u'''<?xml version="1.0" encoding="gb2312"?> <feed><title>\u55de</title></feed>''' result = feedparser.parse(feed.encode('gb18030'), response_headers={ 'Content-Type': 'text/xml' }) self.assertEqual(result.encoding, 'gb18030')
def test_gb2312_converted_to_gb18030_in_xml_encoding(self): # \u55de was chosen because it exists in gb18030 but not gb2312 feed = u'''<?xml version="1.0" encoding="gb2312"?> <feed><title>\u55de</title></feed>''' result = feedparser.parse( feed.encode('gb18030'), response_headers={'Content-Type': 'text/xml'}) self.assertEqual(result.encoding, 'gb18030')
def test_doctype_replacement(self): "Ensure that non-ASCII-compatible encodings don't hide " \ "disallowed ENTITY declarations" doc = """<?xml version="1.0" encoding="utf-16be"?> <!DOCTYPE feed [ <!ENTITY exponential1 "bogus "> <!ENTITY exponential2 "&exponential1;&exponential1;"> <!ENTITY exponential3 "&exponential2;&exponential2;"> ]> <feed><title type="html">&exponential3;</title></feed>""" doc = codecs.BOM_UTF16_BE + doc.encode('utf-16be') result = feedparser.parse(doc) self.assertEqual(result['feed']['title'], u'&exponential3')
def failUnlessEval(self, xmlfile, evalString, msg=None): """Fail unless eval(evalString, env)""" env = feedparser.parse(xmlfile) try: if not eval(evalString, globals(), env): failure=(msg or 'not eval(%s) \nWITH env(%s)' % (evalString, pprint.pformat(env))) raise self.failureException, failure if not everythingIsUnicode(env): raise self.failureException, "not everything is unicode \nWITH env(%s)" % (pprint.pformat(env), ) except SyntaxError: # Python 3 doesn't have the `u""` syntax, so evalString needs to be modified, # which will require the failure message to be updated evalString = re.sub(unicode1_re, _s2bytes(" '"), evalString) evalString = re.sub(unicode2_re, _s2bytes(' "'), evalString) if not eval(evalString, globals(), env): failure=(msg or 'not eval(%s) \nWITH env(%s)' % (evalString, pprint.pformat(env))) raise self.failureException, failure
def failUnlessEval(self, xmlfile, evalString, msg=None): """Fail unless eval(evalString, env)""" env = feedparser.parse(xmlfile) try: if not eval(evalString, globals(), env): failure = (msg or 'not eval(%s) \nWITH env(%s)' % (evalString, pprint.pformat(env))) raise self.failureException, failure if not everythingIsUnicode(env): raise self.failureException, "not everything is unicode \nWITH env(%s)" % ( pprint.pformat(env), ) except SyntaxError: # Python 3 doesn't have the `u""` syntax, so evalString needs to be modified, # which will require the failure message to be updated evalString = re.sub(unicode1_re, _s2bytes(" '"), evalString) evalString = re.sub(unicode2_re, _s2bytes(' "'), evalString) if not eval(evalString, globals(), env): failure = (msg or 'not eval(%s) \nWITH env(%s)' % (evalString, pprint.pformat(env))) raise self.failureException, failure
def test_404(self): f = feedparser.parse('http://localhost:8097/tests/http/http_status_404.xml') self.assertEqual(f.status, 404)
def test_gzip_good(self): f = feedparser.parse('http://localhost:8097/tests/compression/gzip.gz') self.assertEqual(f.version, 'atom10')
def test_307(self): f = feedparser.parse('http://localhost:8097/tests/http/http_status_307.xml') self.assertEqual(f.status, 307) self.assertEqual(f.href, 'http://localhost:8097/tests/http/target.xml') self.assertEqual(f.entries[0].title, 'target')
def test_zlib_not_compressed(self): f = feedparser.parse('http://localhost:8097/tests/compression/deflate-not-compressed.z') self.assertEqual(f.bozo, 1) self.assertTrue(isinstance(f.bozo_exception, zlib.error)) self.assertEqual(f['feed']['title'], 'deflate')
def test_zlib_no_headers(self): f = feedparser.parse('http://localhost:8097/tests/compression/deflate-no-headers.z') self.assertEqual(f.version, 'atom10')
def test_gzip_struct_error(self): f = feedparser.parse('http://localhost:8097/tests/compression/gzip-struct-error.gz') self.assertEqual(f.bozo, 1) self.assertTrue(isinstance(f.bozo_exception, struct.error))
def test_307(self): f = feedparser.parse( 'http://localhost:8097/tests/http/http_status_307.xml') self.assertEqual(f.status, 307) self.assertEqual(f.href, 'http://localhost:8097/tests/http/target.xml') self.assertEqual(f.entries[0].title, 'target')
def test_feed_http(self): f = feedparser.parse(u'feed:http://localhost:8097/tests/http/target.xml') self.assertEqual(f.href, u'http://localhost:8097/tests/http/target.xml')
def test_gzip_not_compressed(self): f = feedparser.parse( 'http://localhost:8097/tests/compression/gzip-not-compressed.gz') self.assertEqual(f.bozo, 1) self.assertTrue(isinstance(f.bozo_exception, IOError)) self.assertEqual(f['feed']['title'], 'gzip')
def test_gzip_struct_error(self): f = feedparser.parse( 'http://localhost:8097/tests/compression/gzip-struct-error.gz') self.assertEqual(f.bozo, 1) self.assertTrue(isinstance(f.bozo_exception, struct.error))
def test_zlib_no_headers(self): f = feedparser.parse( 'http://localhost:8097/tests/compression/deflate-no-headers.z') self.assertEqual(f.version, 'atom10')
def test_zlib_not_compressed(self): f = feedparser.parse( 'http://localhost:8097/tests/compression/deflate-not-compressed.z') self.assertEqual(f.bozo, 1) self.assertTrue(isinstance(f.bozo_exception, zlib.error)) self.assertEqual(f['feed']['title'], 'deflate')
def test_feed_http(self): f = feedparser.parse( u'feed:http://localhost:8097/tests/http/target.xml') self.assertEqual(f.href, u'http://localhost:8097/tests/http/target.xml')
def test_gzip_not_compressed(self): f = feedparser.parse('http://localhost:8097/tests/compression/gzip-not-compressed.gz') self.assertEqual(f.bozo, 1) self.assertTrue(isinstance(f.bozo_exception, IOError)) self.assertEqual(f['feed']['title'], 'gzip')
def test_404(self): f = feedparser.parse( 'http://localhost:8097/tests/http/http_status_404.xml') self.assertEqual(f.status, 404)