def feed(): ''' returns feed instance ''' return FeedParserDict( feed=FeedParserDict( link='https://sample.com', subtitle='This is the best podcast show in the entire universe', title='PodcastTitle'), entries=[ FeedParserDict(title='Episode1', subtitle='episode of week 1', published=str(datetime.datetime(2021, 1, 1)), links=[{ 'href': 'https://somesite.com/episode1.mp3', 'type': 'audio/mpeg' }, { 'href': 'https://somesite.com/episode1', 'type': 'text' }]), FeedParserDict(title='Episode2', subtitle='episode of week 2', published=str(datetime.datetime(2021, 1, 7)), links=[{ 'href': 'https://somesite.com/episode2', 'type': 'text', }, { 'href': 'https://somesite.com/episode2.mp3', 'type': 'audio/mpeg' }]) ], etag='5f77c6d7-45f1e', href='https://sample.podcast.tv/test.xml', updated=str(datetime.datetime(2021, 1, 1)), status=200)
def rss_feed_missing_feed_title(): """An invalid rss feed for tests""" item1 = {"title": "test1", "link": "https://test.com/item1"} item2 = {"title": "test1", "description": "foo"} item3 = {"title": "test3", "summary": "bar"} entries = [ FeedParserDict(item1), FeedParserDict(item2), FeedParserDict(item3) ] feed = FeedParserDict({"title": "test", "description": "testing"}) rss_feed = FeedParserDict({"feed": feed, "entries": entries}) return rss_feed
def make_fake_feedparser_dict(feed_url, n_items=30): """Creates a fake but valid FeedParserDict object. Args: feed_url (str): Fake URL for fake Feed n_items (int): number of fake entries to generate """ return FeedParserDict( feed=FeedParserDict(title='Sample Feed', description='This is a sample feed', link=feed_url), entries=make_feed_entries_list(n_items=n_items, feed_url=feed_url), version='rss20', bozo=0, )
def test_feeds(mock_feedparse, patch_try_shorten): from plugins import feeds mock_feedparse.return_value = FeedParserDict(entries=[], ) assert feeds.rss('xkcd') == "Feed not found." mock_feedparse.assert_called_with('http://xkcd.com/rss.xml') mock_feedparse.reset_mock() mock_feedparse.return_value = FeedParserDict( entries=[FeedParserDict(title='foo1', link='http://example.com')], feed=FeedParserDict(title='test'), ) with_title = "\x02test\x02: foo1 (http://example.com)" assert feeds.rss('http://rss.example.com/feed.xml') == with_title mock_feedparse.assert_called_with('http://rss.example.com/feed.xml') mock_feedparse.reset_mock() mock_feedparse.return_value = FeedParserDict( entries=[FeedParserDict(title='foo1', link='http://example.com')], feed=FeedParserDict(), ) without_title = "foo1 (http://example.com)" assert feeds.rss('http://rss.example.com/feed.xml') == without_title mock_feedparse.assert_called_with('http://rss.example.com/feed.xml') mock_feedparse.reset_mock()
def test_fetch_feed_unknown_issue(mocked_parse, hacks_feed): """If a feed encounters an unknown issue, it is disabled.""" mocked_parse.return_value = FeedParserDict(bozo=1) stream = fetch_feed(hacks_feed) assert stream is None feed = Feed.objects.get() assert not feed.enabled expected_reason = "Error while reading the feed: 500 __ " assert feed.disabled_reason == expected_reason
def test_fetch_feed_exception(mocked_parse, hacks_feed): """If a feed encounters an exception, it is disabled.""" mocked_parse.return_value = FeedParserDict( bozo=1, bozo_exception=Exception("I am grumpy today.")) stream = fetch_feed(hacks_feed) assert stream is None feed = Feed.objects.get() assert not feed.enabled expected_reason = "Error while reading the feed: 500 __ I am grumpy today." assert feed.disabled_reason == expected_reason
def test_parse_rss_error(mocker): """ Test exceptions are handled for error while parsing rss """ feed = FeedParserDict({ "bozo": True, "bozo_exception": "details", }) mocker.patch("apps.feeds.feed_parser.feedparser.parse", return_value=feed) with pytest.raises(ParseContentError): parse("foo") feed = FeedParserDict({"bozo": False, "feed": "test"}) mocker.patch("apps.feeds.feed_parser.feedparser.parse", return_value=feed) mocker.patch("apps.feeds.feed_parser.has_required_fields", return_value=False) with pytest.raises(ParseContentError): parse("foo")
def getFeed(url, request_headers=None, handlers=None): try: return feedparser.parse( sickrage.srCore.srWebSession.normalize_url(url), agent=random.choice(USER_AGENTS), etag=False, modified=False, request_headers=request_headers, handlers=handlers) except Exception as e: return FeedParserDict()
def test_fetch_feed_timeout(mocked_parse, hacks_feed, settings): """If a feed times out, it is disabled.""" settings.FEEDER_TIMEOUT = 10 mocked_parse.return_value = FeedParserDict( bozo=1, bozo_exception=URLError(reason=socket.timeout("timed out"))) stream = fetch_feed(hacks_feed) assert stream is None feed = Feed.objects.get() assert feed.etag == "" assert not feed.enabled expected_reason = "This feed didn't respond after 10 seconds" assert feed.disabled_reason == expected_reason
def rss_feed(): """ Create a rss FeedParserDict obj for tests """ item1 = {"title": "test1", "link": "https://test.com/item1"} item2 = {"title": "test2", "description": "foo"} item3 = {"title": "test3", "summary": "bar"} entries = [ FeedParserDict(item1), FeedParserDict(item2), FeedParserDict(item3) ] feed = FeedParserDict({ "title": "test", "link": "https://test.com", "description": "testing" }) rss_feed = FeedParserDict({"feed": feed, "entries": entries, "bozo": 0}) return rss_feed
def getFeed(url, params=None, request_headers=None, handlers=None): try: resp = sickrage.app.wsession.get(url, params=params) if resp.ok: return feedparser.parse(resp.text, agent=sickrage.app.user_agent, etag=False, modified=False, request_headers=request_headers, handlers=handlers) except Exception as e: sickrage.app.log.debug("RSS Error: {}".format(e.message)) return FeedParserDict()
def getFeed(url, params=None, request_headers=None, handlers=None): try: resp = sickrage.srCore.srWebSession.get(url, params=params) if resp.ok: return feedparser.parse(resp.text, agent=random.choice(USER_AGENTS), etag=False, modified=False, request_headers=request_headers, handlers=handlers) except Exception as e: sickrage.srCore.srLogger.debug("RSS Error: {}".format(e.message)) return FeedParserDict()
def article(self) -> Article: if self.processed: return FeedParserDict(authors='', text='') article = self.article_supplier(self.input_entry.link) if "content" in self.input_entry: article.set_html(self.input_entry.content[0].value) logging.info("Using inline content") else: logging.info(f"Getting content from: {self.input_entry.link}") article.download() article.parse() logging.debug("Just retrieved the following article: ") logging.debug(article) return article
def getFeed(url, request_headers=None, handlers=None): feed = FeedParserDict() try: try: feed = feedparser.parse(normalize_url(url), False, False, request_headers, handlers=handlers) except AttributeError: sickrage.LOGGER.debug('RSS ERROR:[{}] CODE:[{}]'.format( feed.feed[b'error'][b'description'], feed.feed[b'error'][b'code'])) except: pass return feed
def make_feed_entries_list(n_items=10, feed_url=''): """Generates a list of feed entries. Args: n_items (int): how many feed entries to make feed_url (str): base URL """ tz = ['+0800', 'GMT'] fmt = '%a, %d %b %Y %H:%M:%S' now = datetime.now() offset = timedelta(minutes=1) return [ FeedParserDict( link=urljoin(feed_url, f'story-{i + 1:05d}.html'), published=( f'{(now - offset * random.randint(1, 180)).strftime(fmt)} ' f'{random.choice(tz)}'), author=f'Author {i + 1}', summary=f'Summary {i + 1}', title=f'Title {i + 1}') for i in range(n_items) ]
from collections import defaultdict from unittest.mock import Mock from expects import expect, contain, equal, be from feedparser import FeedParserDict from mamba import description, it from pollycast.entry import Entry EXAMPLE_ID = "example_id" BASIC_INPUT_ENTRY = FeedParserDict(id=EXAMPLE_ID) def mock_bucket(has_file=False, file_name=None): bucket = Mock() bucket.has_file = Mock(return_value=has_file) bucket.get_file = Mock(return_value=file_name) return bucket with description("Entry"): with it("should hash http-link form id's"): http_id = "http://example.com" entry = Entry(FeedParserDict(id=http_id), None) expect(entry.id).not_to(contain("http")) with it("should not change it if it is does not contain http"): entry = Entry(BASIC_INPUT_ENTRY, None) expect(entry.id).to(equal(EXAMPLE_ID))
def test_html(self): entry = FeedParserDict(summary='<html>A brief summary.</html>', summary_detail=FeedParserDict(type='text/html')) assert 'A brief summary.' == get_summary(entry)
def test_no_key(self): entry = FeedParserDict() assert '' == get_summary(entry)
def test_markdown(self): entry = FeedParserDict( summary='**A brief summary.**', summary_detail=FeedParserDict(type='text/markdown')) assert '**A brief summary.**' == get_summary(entry)
def modify_fpd(parsed, **kwargs): """Create a new FeedParserDict, overriding some values.""" response = FeedParserDict(**parsed.copy()) for key, value in kwargs.items(): response[key] = value return response
HACKS_PARSED = FeedParserDict( # Omited attributes: encoding, headers, namespaces bozo=0, entries=[ FeedParserDict( # Omited attributes: author_detail, authors, content, guidislink, # links, summary_detail, tags, title_detail, comments, slash_comments, # wfw_commentrss author="Jen Simmons", id="https://hacks.mozilla.org/?p=31957", link="https://hacks.mozilla.org/2018/02/its-resilient-css-week/", published="Mon, 26 Feb 2018 15:05:08 +0000", published_parsed=struct_time((2018, 2, 26, 15, 5, 8, 0, 57, 0)), summary="Jen Simmons celebrates resilient CSS", title="It\u2019s Resilient CSS Week", ), FeedParserDict( author="James Hobin", id="https://hacks.mozilla.org/?p=31946", link=("https://hacks.mozilla.org/2018/02/making-a-clap-sensing" "-web-thing/"), published="Thu, 22 Feb 2018 15:55:45 +0000", published_parsed=struct_time((2018, 2, 22, 15, 55, 45, 3, 53, 0)), summary=("The Project Things Gateway exists as a platform to bring" " all of your IoT devices together under a unified" " umbrella."), title="Making a Clap-Sensing Web Thing", ), ], etag='W/"1da1fc6a456fd49c32a9291b38ec31ee-gzip"', feed=FeedParserDict( # Omited attributes: generator, generator_detail, language, links, # subtitle_detail, sy_updatefrequency, sy_updateperiod, title_detail, link="https://hacks.mozilla.org", subtitle="hacks.mozilla.org", title="Mozilla Hacks \u2013 the Web developer blog", updated="Mon, 26 Feb 2018 21:23:38 +0000", updated_parsed=struct_time((2018, 2, 26, 21, 23, 38, 0, 57, 0)), ), href="https://hacks.mozilla.org/feed/", status=200, updated="Mon, 26 Feb 2018 21:23:38 GMT", updated_parsed=struct_time((2018, 2, 26, 21, 23, 38, 0, 57, 0)), version="rss20", )