def test_rss_feed(self): """ Test serialization as RSS. """ expected = '''<?xml version="1.0" encoding="utf-8"?>\n<rss version="2.0"><channel><title>Title</title><link></link><description></description><lastBuildDate>Wed, 15 Feb 2017 07:00:00 -0000</lastBuildDate><item><title>A Look At Bernie Sanders\' Electoral Socialism</title><link>http://americancynic.net/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/</link><description>On the difference between democratic socialism and social democracy, the future of capitalism, and the socialist response to the Bernie Sanders presidential campaign.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">A. Cynic</dc:creator><pubDate>Sat, 27 Feb 2016 22:33:51 -0000</pubDate><guid isPermaLink="false">tag:americancynic.net,2016-02-27:/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/</guid></item><item><title>Uber finds one allegedly stolen Waymo file on an employee’s personal device</title><link>https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/</link><description><p>Article URL: <a href="https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/">https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/</a></p><p>Comments URL: <a href="https://news.ycombinator.com/item?id=14044517">https://news.ycombinator.com/item?id=14044517</a></p><p>Points: 336</p><p># Comments: 206</p></description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">folz</dc:creator><comments>https://news.ycombinator.com/item?id=14044517</comments><guid isPermaLink="false">https://news.ycombinator.com/item?id=14044517</guid><enclosure length="501" type="image/jpeg" url="image.jpg"></enclosure></item></channel></rss>''' mc = build_stub_session() fm = FeedMixer(feeds=['atom', 'rss'], num_keep=1, sess=mc) rf = fm.rss_feed() self.maxDiff = None self.assertIn(expected, rf)
def test_json_feed(self): """ Test serialization as JSON. """ expected = '''{"version": "https://jsonfeed.org/version/1", "title": "Title", "home_page_url": "", "description": "", "items": [{"title": "A Look At Bernie Sanders\' Electoral Socialism", "content_html": "On the difference between democratic socialism and social democracy, the future of capitalism, and the socialist response to the Bernie Sanders presidential campaign.", "url": "http://americancynic.net/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/", "id": "tag:americancynic.net,2016-02-27:/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/", "author": {"name": "A. Cynic", "url": "http://americancynic.net"}, "date_published": "2016-02-27T22:33:51Z", "date_modified": "2017-02-15T07:00:00Z"}, {"title": "Uber finds one allegedly stolen Waymo file on an employee\\u2019s personal device", "content_html": "<p>Article URL: <a href=\\"https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/\\">https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/</a></p><p>Comments URL: <a href=\\"https://news.ycombinator.com/item?id=14044517\\">https://news.ycombinator.com/item?id=14044517</a></p><p>Points: 336</p><p># Comments: 206</p>", "url": "https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/", "id": "https://news.ycombinator.com/item?id=14044517", "author": {"name": "folz"}, "attachments": [{"url": "image.jpg", "size_in_bytes": "501", "mime_type": "image/jpeg"}]}]}''' mc = build_stub_session() fm = FeedMixer(feeds=['atom', 'rss'], num_keep=1, sess=mc) jf = fm.json_feed() self.maxDiff = None self.assertIn(expected, jf)
def test_set_feed(self): """ Test that setting the feed property clears existing mixed_entries. """ # First fetch some entries mc = build_stub_session() fm = FeedMixer(feeds=['atom', 'rss'], num_keep=1, sess=mc) self.assertEqual(len(fm.mixed_entries), 2) # Now clear feeds and assert that mixed_entries is also cleared fm.feeds = [] self.assertEqual(len(fm.mixed_entries), 0)
def test_set_num_keep(self): """ Test that setting the num_keep property re-fetches the feeds. """ # First fetch some entries mc = build_stub_session() fm = FeedMixer(feeds=['atom', 'rss'], num_keep=2, sess=mc) self.assertEqual(len(fm.mixed_entries), 4) # Now clear feeds and assert that mixed_entries is also cleared fm.num_keep = 1 self.assertEqual(len(fm.mixed_entries), 2)
def test_atom_feed(self): """ Test serialization as Atom. """ expected = '''<?xml version="1.0" encoding="utf-8"?>\n<feed xmlns="http://www.w3.org/2005/Atom"><title>Title</title><link href="" rel="alternate"></link><id></id><updated>2017-04-05T18:48:43Z</updated><entry><title>Uber finds one allegedly stolen Waymo file on an employee’s personal device</title><link href="https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/" rel="alternate"></link><published>2017-04-05T18:48:43Z</published><updated>2017-04-05T18:48:43Z</updated><author><name>folz</name></author><id>https://news.ycombinator.com/item?id=14044517</id><summary type="html"><p>Article URL: <a href="https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/">https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/</a></p><p>Comments URL: <a href="https://news.ycombinator.com/item?id=14044517">https://news.ycombinator.com/item?id=14044517</a></p><p>Points: 336</p><p># Comments: 206</p></summary></entry><entry><title>A Look At Bernie Sanders\' Electoral Socialism</title><link href="http://americancynic.net/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/" rel="alternate"></link><published>2016-02-27T22:33:51Z</published><updated>2017-02-15T07:00:00Z</updated><author><name>A. Cynic</name><uri>http://americancynic.net</uri></author><id>tag:americancynic.net,2016-02-27:/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/</id><summary type="html">On the difference between democratic socialism and social democracy, the future of capitalism, and the socialist response to the Bernie Sanders presidential campaign.</summary></entry></feed>''' mc = build_mock_cache_get() fm = FeedMixer(feeds=['atom', 'rss'], cache_get=mc, num_keep=1, cache=mock_shelfcache()) af = fm.atom_feed() self.maxDiff = None self.assertIn(expected, af)
def test_memoized(self): """ Test that calls to the parser are memoized """ mc = build_stub_session() cache_parser.cache_clear() fm = FeedMixer(feeds=['atom'], num_keep=2, sess=mc) me = fm.mixed_entries fm = FeedMixer(feeds=['atom'], num_keep=2, sess=mc) me = fm.mixed_entries hits, misses, _, _ = cache_parser.cache_info() self.assertEqual(hits, 1) self.assertEqual(misses, 1)
def test_atom_feed(self): """ Test serialization as Atom. """ # NOTE: expected does not contain entire text because the RSS entry has no # pubdate so the UPDATED tag is given the current date; instead of the # complication of inserting that into expected, we just test up to that # point. expected = '''<?xml version="1.0" encoding="utf-8"?>\n<feed xmlns="http://www.w3.org/2005/Atom"><title>Title</title><link href="" rel="alternate"></link><id></id><updated>2017-02-15T07:00:00Z</updated><entry><title>A Look At Bernie Sanders\' Electoral Socialism</title><link href="http://americancynic.net/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/" rel="alternate"></link><published>2016-02-27T22:33:51Z</published><updated>2017-02-15T07:00:00Z</updated><author><name>A. Cynic</name><uri>http://americancynic.net</uri></author><id>tag:americancynic.net,2016-02-27:/log/2016/2/27/a_look_at_bernie_sanders_electoral_socialism/</id><summary type="html">On the difference between democratic socialism and social democracy, the future of capitalism, and the socialist response to the Bernie Sanders presidential campaign.</summary></entry><entry><title>Uber finds one allegedly stolen Waymo file on an employee’s personal device</title><link href="https://techcrunch.com/2017/04/05/uber-finds-one-allegedly-stolen-waymo-file-on-an-employees-personal-device/" rel="alternate"></link><updated>''' mc = build_stub_session() fm = FeedMixer(feeds=['atom', 'rss'], num_keep=1, sess=mc) af = fm.atom_feed() self.maxDiff = None self.assertIn(expected, af)
def test_set_feed(self): """ Test that setting the feed property clears existing mixed_entries. """ # First fetch some entries mc = build_mock_cache_get() fm = FeedMixer(feeds=['atom', 'rss'], cache_get=mc, num_keep=1, cache=mock_shelfcache()) self.assertEqual(len(fm.mixed_entries), 2) # Now clear feeds and assert that mixed_entries is also cleared fm.feeds = [] self.assertEqual(len(fm.mixed_entries), 0)
def test_set_num_keep(self): """ Test that setting the num_keep property re-fetches the feeds. """ # First fetch some entries mc = build_mock_cache_get() fm = FeedMixer(feeds=['atom', 'rss'], cache_get=mc, num_keep=2, cache=mock_shelfcache()) self.assertEqual(len(fm.mixed_entries), 4) # Now clear feeds and assert that mixed_entries is also cleared fm.num_keep = 1 self.assertEqual(len(fm.mixed_entries), 2)
def on_get(self, req: falcon.Request, resp: falcon.Response) -> None: """ Falcon GET handler. """ feeds, n, full = parse_qs(req) summ = not full fm = FeedMixer(feeds=feeds, num_keep=n, prefer_summary=summ, title=self.title, desc=self.desc, link=req.uri, cache=self.cache) # dynamically find and call appropriate method based on ftype: method_name = "{}_feed".format(self.ftype) method = getattr(fm, method_name) resp.body = method() if fm.error_urls: # There were errors; report them in the 'X-fm-errors' http header as # a url-encoded JSON hash error_dict = {} for url, e in fm.error_urls.items(): err_str = str(e) if hasattr(e, 'status'): err_str += " ({})".format(e.status) error_dict[url] = err_str json_err = urllib.parse.quote(json.dumps(error_dict)) resp.append_header('X-fm-errors', json_err) if self.ftype == 'json': # special case content_type for JSON resp.content_type = "application/json" else: resp.content_type = "application/{}+xml".format(self.ftype) resp.status = falcon.HTTP_200
def test_keep_all_zero(self): """ Setting num_keep to 0 should also keep all the entries. """ mc = build_stub_session() fm = FeedMixer(feeds=['atom'], num_keep=0, sess=mc) me = fm.mixed_entries self.assertEqual(len(me), 12)
def test_empty(self): """ Test with an empty `feeds` list. """ mc = build_stub_session() fm = FeedMixer(feeds=[], sess=mc) me = fm.mixed_entries mc.assert_not_called() self.assertEqual(me, [])
def test_single_good(self): """ Test with a single good URL. """ mc = build_stub_session() fm = FeedMixer(feeds=['atom'], num_keep=2, sess=mc) me = fm.mixed_entries mc.get.assert_called_once_with('atom') self.assertEqual(len(me), 2)
def test_empty(self): """ Test with an empty `feeds` list. """ mc = MagicMock() fm = FeedMixer(feeds=[], cache_get=mc, cache=mock_shelfcache()) me = fm.mixed_entries mc.assert_not_called() self.assertEqual(me, [])
def test_single_exception(self): """ Test with a single URL which throws an exception. """ mc = build_stub_session() fm = FeedMixer(feeds=['fetcherror'], num_keep=2, sess=mc) me = fm.mixed_entries self.assertEqual(len(me), 0) self.assertIsInstance(fm.error_urls['fetcherror'], RequestException)
def test_multi_exception(self): """ Test with several URLs which all throw exceptions. """ mc = build_stub_session() fm = FeedMixer(feeds=['fetcherror', 'parseerror'], num_keep=2, sess=mc) me = fm.mixed_entries self.assertEqual(len(me), 0) self.assertIsInstance(fm.error_urls['fetcherror'], RequestException) self.assertIsInstance(fm.error_urls['parseerror'], ParseError)
def test_single_good(self): """ Test with a single good URL. """ mc = build_mock_cache_get() cache = mock_shelfcache() fm = FeedMixer(feeds=['atom'], cache_get=mc, num_keep=2, cache=cache) me = fm.mixed_entries mc.assert_called_once_with(cache, 'atom', headers=ANY) self.assertEqual(len(me), 2)
def test_keep_all_zero(self): """ Setting num_keep to 0 should also keep all the entries. """ mc = build_mock_cache_get() fm = FeedMixer(feeds=['atom'], cache_get=mc, num_keep=0, cache=mock_shelfcache()) me = fm.mixed_entries self.assertEqual(len(me), 12)
def test_atom_prefer_content(self): """ Test that passing prefer_summary=False will ask the parser for the full entry content. """ mc = build_stub_session() fm = FeedMixer(feeds=['atom'], num_keep=1, sess=mc, prefer_summary=False) me = fm.mixed_entries[0] self.assertTrue(len(me.get('description')) > 1000)
def test_atom_prefer_summary(self): """ Test that passing prefer_summary=True will return the short 'summary' """ expected = '''On the difference between democratic socialism and social democracy, the future of capitalism, and the socialist response to the Bernie Sanders presidential campaign.''' mc = build_stub_session() fm = FeedMixer(feeds=['atom'], num_keep=1, sess=mc, prefer_summary=True) me = fm.mixed_entries[0] self.assertEqual(me.get('description'), expected)
def test_multi_good(self): """ Test with multiple good URLs. """ cache_parser.cache_clear() mc = build_stub_session() fm = FeedMixer(feeds=['atom', 'rss', 'atom'], num_keep=2, sess=mc) me = fm.mixed_entries mc.get.assert_has_calls( [call('atom'), call('rss'), call('atom')], any_order=True) self.assertEqual(len(me), 6)
def test_single_exception(self): """ Test with a single URL which throws an exception. """ mc = build_mock_cache_get() fm = FeedMixer(feeds=['fetcherror'], cache_get=mc, num_keep=2, cache=mock_shelfcache()) me = fm.mixed_entries self.assertEqual(len(me), 0) self.assertIsInstance(fm.error_urls['fetcherror'], RequestException)
def test_atom_prefer_content(self): """ Test that passing prefer_summary=False will ask the parser for the full entry content. """ mc = build_mock_cache_get() cache = mock_shelfcache() fm = FeedMixer(feeds=['atom'], cache_get=mc, num_keep=1, cache=cache, prefer_summary=False) me = fm.mixed_entries[0] self.assertTrue(len(me.get('description')) > 1000)
def test_fresh_response(self): """ cache_get() has returned a requests.Response object from the requests library. Ensure it gets cached as a FeedParserDict. """ # Setup: url = 'atom' mc = build_mock_cache_get() fresh = build_response() mock_result = shelfcache.CacheResult(data=fresh, expired=False) cache = mock_shelfcache(return_value=mock_result) # DUT: fm = FeedMixer(feeds=[url], cache_get=mc, cache=cache) fm.mixed_entries # Asserts: cache.replace_data.assert_called_once_with(key=url, data=ANY)
def test_multi_good(self): """ Test with multiple good URLs. """ mc = build_mock_cache_get() cache = mock_shelfcache() fm = FeedMixer(feeds=['atom', 'rss', 'atom'], cache_get=mc, num_keep=2, cache=cache) me = fm.mixed_entries mc.assert_has_calls([ call(cache, 'atom', headers=ANY), call(cache, 'rss', headers=ANY), call(cache, 'atom', headers=ANY) ], any_order=True) self.assertEqual(len(me), 6)
def test_multi_exception(self): """ Test with several URLs which all throw exceptions. """ mc = build_mock_cache_get() cache = mock_shelfcache() fm = FeedMixer(feeds=['fetcherror', 'parseerror'], cache_get=mc, num_keep=2, cache=cache) me = fm.mixed_entries mc.assert_has_calls([ call(cache, 'fetcherror', headers=ANY), call(cache, 'parseerror', headers=ANY) ], any_order=True) self.assertEqual(len(me), 0) self.assertIsInstance(fm.error_urls['fetcherror'], RequestException) self.assertIsInstance(fm.error_urls['parseerror'], ParseError)
def test_stale_parsed(self): """ cache_get() has returned a stale FeedParserDict object. Ensure we re-fetch, parse, and cache it. """ # Setup: url = 'atom' mc = build_mock_cache_get() stale = build_response(status=304) fresh = feedparser.parse(TEST_ATOM) mock_result = shelfcache.CacheResult(data=stale, expired=True) cache = mock_shelfcache(return_value=mock_result) # DUT: fm = FeedMixer(feeds=[url], cache_get=mc, cache=cache, num_keep=-1) me = fm.mixed_entries # Asserts: self.assertEqual(len(me), len(fresh.entries)) cache.replace_data.assert_called_once_with(key=url, data=ANY)
def test_saves_headers(self): """ Make sure headers are stored with cached feed. Tests regression fixed with 2ee4bc9c245229d564d4b14e7d76ae5879f6eeae """ # Setup: url = 'atom' resp = build_response() mc = MagicMock(return_value=resp) headers = resp.headers mock_result = shelfcache.CacheResult(data=resp, expired=True) cache = mock_shelfcache(return_value=mock_result) # DUT: fm = FeedMixer(feeds=[url], cache_get=mc, cache=cache, num_keep=-1) fm.mixed_entries # Asserts: saved_headers = cache.replace_data.call_args[1]['data'].headers self.assertEqual(headers, saved_headers)
def test_multi_mixed(self): """ Test with several URLs, some of which succeed and some of which throw exceptions. """ mc = build_stub_session() fm = FeedMixer(feeds=['fetcherror', 'atom', 'rss', 'parseerror'], num_keep=2, sess=mc) me = fm.mixed_entries mc.get.assert_has_calls([ call('fetcherror'), call('atom'), call('rss'), call('parseerror') ], any_order=True) self.assertEqual(len(me), 4) self.assertEqual(len(fm.error_urls.keys()), 2) self.assertIsInstance(fm.error_urls['fetcherror'], RequestException) self.assertIsInstance(fm.error_urls['parseerror'], ParseError)
def test_adds_feed_author(self): """ Test that a feed missing the `author_detail` attribute on its entries has it added. """ # Ensure that any future changes to the test file at ATOM_PATH don't # include <author> for each entry (which would render this test useless) feed = feedparser.parse(TEST_ATOM) first = feed['entries'][0] if hasattr(first, 'author_detail'): del first['author_detail'] first_entry = feed['entries'][0] self.assertNotIn('author_detail', first_entry) self.assertNotIn('author_name', first_entry) # Now simulate fetching URL, after which the entry should have an # `author_name` attribute mc = build_stub_session() fm = FeedMixer(feeds=['atom'], num_keep=1, sess=mc) me = fm.mixed_entries mc.get.assert_called_once_with('atom') self.assertIn('author_name', me[0])