def test_mark_as_read_unread(db_path, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) entry = parser.entry(1, 1, datetime(2010, 1, 1)) reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() browser.open('http://app/') assert len(browser.get_current_page().select('.entry')) == 1 form = browser.select_form('.entry form.action-mark-as-read') response = browser.submit_selected( form.form.find('button', text='mark as read')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 0 response = browser.follow_link(browser.find_link(text='read')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 1 form = browser.select_form('.entry form.action-mark-as-unread') response = browser.submit_selected( form.form.find('button', text='mark as unread')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 0 response = browser.follow_link(browser.find_link(text='unread')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 1
def test_sort_and_filter_subset_basic(data_and_kwargs, pre_stuff, call_method): entry_data, kwargs, chunk_size = data_and_kwargs # can't use reader fixture because of # https://github.com/pytest-dev/pytest/issues/916 reader = make_reader(':memory:') reader._storage.chunk_size = chunk_size parser = Parser() reader._parser = parser for feed_id, entry_id, feed_updated, entry_updated in entry_data: seen_feed = feed_id in parser.feeds feed = parser.feed(feed_id, feed_updated) parser.entry(feed_id, entry_id, entry_updated) if not seen_feed: reader.add_feed(feed.url) reader.update_feeds() pre_stuff(reader) expected = [(fid, eid) for fid, entries in parser.entries.items() for eid in entries] actual = [eval(e.id) for e in call_method(reader)] if call_method not in (get_entries_random, search_entries_random): assert len(expected) == len(actual) assert set(expected) == set(actual) else: assert set(expected) >= set(actual) actual = [eval(e.id) for e in call_method(reader, **kwargs)] assert set(expected) >= set(actual)
def test_plugin(): reader = make_reader(':memory:', plugins=['reader.enclosure_dedupe']) reader._parser = parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1)) two = parser.entry( 1, 2, datetime(2010, 1, 1), enclosures=(Enclosure('href'), Enclosure('another one')), ) three = parser.entry( 1, 3, datetime(2010, 1, 1), enclosures=(Enclosure('href', 'text', 1), Enclosure('href', 'json', 2)), ) reader.add_feed(feed.url) reader.update_feeds() assert set((e.id, e.enclosures) for e in reader.get_entries()) == { (one.id, one.enclosures), (two.id, two.enclosures), (three.id, (Enclosure('href', 'text', 1), )), }
def make_reader_from_config(*, plugins=None, plugin_loader_cls=Loader, **kwargs): """Like reader.make_reader(), but: * If *_cls arguments are str, import them. * Load plugins. """ plugins = plugins or {} for name in MAKE_READER_IMPORT_KWARGS: thing = kwargs.get(name) if thing and isinstance(thing, str): kwargs[name] = import_string(thing) reader = make_reader(**kwargs) try: plugin_loader_cls(plugins).load_plugins(reader) except Exception: reader.close() raise return reader
def setup_reader_with_text_entries(num_entries): if EXISTING_DB_PATH: yield make_reader(EXISTING_DB_PATH) return with setup_db() as path: reader = make_reader_with_entries(path, num_entries, text=True) reader.update_feeds() yield reader
def manage_feed(self, update: Update, context: CallbackContext) -> None: """ Adds or removes an RSS feed. Command args (required): * Option: Can be either 'Add' or 'Remove'. * Feed URL: The URL of the RSS feed to add or remove. """ # Retrieve option and RSS feed URL try: option = context.args[0] feed_url = context.args[1] logger.debug(f"Option: {option}, RSS feed URL: {feed_url}.") except IndexError as err: logger.debug( f"Failed to retrieve option and RSS feed URL for /managefeed: {err}." ) update.message.reply_text( "Provide an option and RSS feed URL to /managefeed.\nOption can be either Add or Remove." ) return # Use Reader object with closing(make_reader("db.sqlite")) as reader: # Check if an RSS feed is being added or removed if option.lower() == "add": try: reader.add_feed(feed_url) logger.debug(f"Successfully added RSS feed: {feed_url}.") update.message.reply_text( f"The RSS feed: {feed_url} was successfully added.") except FeedExistsError as err: logger.debug( f"The RSS feed: {feed_url} has already been added: {err}." ) update.message.reply_text( f"The RSS feed: {feed_url} has already been added.") elif option.lower() == "remove": try: reader.remove_feed(feed_url) logger.debug(f"Successfully removed RSS feed: {feed_url}.") update.message.reply_text( f"The RSS feed: {feed_url} was successfully removed.") except FeedNotFoundError as err: logger.debug( f"The RSS feed: {feed_url} was not found: {err}.") update.message.reply_text( f"The RSS feed: {feed_url} was not found.\nTry adding it using '/managefeed add https://examplefeedurl.com'." ) else: logger.debug( f"The option: {option} provided to /managefeed was invalid." ) update.message.reply_text( f"The option: {option} provided was invalid. The option can be either Add or Remove." )
def test_update_search_concurrent_calls(db_path, monkeypatch): """Test concurrent calls to reader.update_search() don't interfere with one another. https://github.com/lemon24/reader/issues/175#issuecomment-652489019 """ # This is a very intrusive test, maybe we should move it somewhere else. reader = make_reader(db_path) parser = reader._parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1), title='feed') parser.entry( 1, 1, datetime(2010, 1, 1), title='entry', summary='summary', content=[Content('content')], ) reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() barrier = threading.Barrier(2) def target(): from reader._search import Search class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): barrier.wait() return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader.update_search() threads = [threading.Thread(target=target) for _ in range(2)] for thread in threads: thread.start() for thread in threads: thread.join() (result, ) = reader.search_entries('entry') assert len(result.content) == 2 ((rowcount, ), ) = reader._search.db.execute("select count(*) from entries_search;") assert rowcount == 2
def setup_reader_with_search_and_some_read_entries(num_entries): if EXISTING_DB_PATH: yield make_reader(EXISTING_DB_PATH) return with setup_reader_with_text_entries(num_entries) as reader: reader.enable_search() reader.update_search() for i, entry in enumerate(reader.get_entries()): if i % 2 == 5: reader.mark_as_read(entry) yield reader
def show_feeds(self, update: Update, context: CallbackContext) -> None: """ Show RSS feed(s) currently being checked for updates. """ # Use Reader object with closing(make_reader("db.sqlite")) as reader: # Obtain RSS feed(s) currently being checked for updates feeds = list(reader.get_feeds(sort="added")) message = f"The following RSS feed(s) are being checked for updates: {[feed.url for feed in feeds]}." logger.debug(message) update.message.reply_text(message)
def test_noop(requests_mock): url = 'http://www.example.com/' reader = make_reader(':memory:', plugins=('reader.ua_fallback', )) reader.add_feed(url) matcher = requests_mock.get(url, status_code=404) with pytest.raises(ParseError) as exc_info: reader.update_feed(url) assert '404' in str(exc_info.value) assert len(matcher.request_history) == 1
def target(): from reader._search import Search class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): barrier.wait() return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader.update_search()
def target(): reader = make_reader(db_path) original_insert_chunk = reader._search._insert_into_search_one_chunk loop = 0 def insert_chunk(*args, **kwargs): nonlocal loop if loop == 1: in_insert_chunk.set() can_return_from_insert_chunk.wait() loop += 1 return original_insert_chunk(*args, **kwargs) reader._search._insert_into_search_one_chunk = insert_chunk reader.update_search()
def target(): from reader._search import Search # strip_html() may or may not be used a SQLite user-defined function, # hence the whole subclassing thing class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): in_strip_html.set() can_return_from_strip_html.wait() return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader.update_search()
def test_delete_feed_from_entries_page_redirects(db_path, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) entry = parser.entry(1, 1, datetime(2010, 1, 1)) reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() browser.open('http://app/', params={'feed': feed.url}) form = browser.select_form('#update-entries form.action-delete-feed') form.set_checkbox({'really-delete-feed': True}) response = browser.submit_selected( form.form.find('button', text='delete feed')) assert response.status_code == 200 assert browser.get_url() == 'http://app/' assert len(browser.get_current_page().select('.entry')) == 0
def check_feeds(self, context: CallbackContext) -> None: """ Background task to check RSS feed(s) for updates and send to the user on a repeated interval. """ # Update RSS feed(s) logger.debug("Updating RSS feeds.") # Use Reader object with closing(make_reader("db.sqlite")) as reader: reader.update_feeds(workers=10) # Retrieve all RSS feed(s) logger.debug("Retrieving RSS feed(s).") feeds = reader.get_feeds(sort="added") for feed in feeds: logger.debug( f"Checking if RSS feed: {feed.title} has updated.") # Retrieve latest feed entry latest_entry = list( reader.get_entries(feed=feed, sort="recent"))[0] # Retrieve last known entry title for feed feed_last_title = self.feeds_last_entry_title.get( feed.title, None) # Compare last entry title with latest RSS feed entry's title # If different, feed has updated # Update the dictionary and send message a message for the new entry if latest_entry.title != feed_last_title: logger.debug( f"RSS feed: {feed.title} has been updated.\nPrevious entry title was: {feed_last_title} and new entry title is: {latest_entry.title}\nUpdating dictionary with new title and sending update..." ) # Create Telegram message string message = f"[{latest_entry.title}]({latest_entry.link})" # Update dictionary with new title self.feeds_last_entry_title[ feed.title] = latest_entry.title # Send Telegram message context.bot.send_message(chat_id=self.user_id, text=message, parse_mode="Markdown") else: logger.debug( f"RSS feed: {feed.title} has not been updated. Checking next RSS feed..." ) logger.debug("All RSS feeds checked. Waiting for next run...")
def test_fallback(requests_mock): url = 'http://www.example.com/' reader = make_reader(':memory:', plugins=('reader.ua_fallback', )) reader.add_feed(url) matcher = requests_mock.get(url, status_code=403) with pytest.raises(ParseError) as exc_info: reader.update_feed(url) assert '403' in str(exc_info.value) assert len(matcher.request_history) == 2 first_ua, second_ua = [ r.headers['User-Agent'] for r in matcher.request_history ] assert first_ua.startswith('python-reader/') assert second_ua.startswith('feedparser/') assert second_ua.endswith(first_ua)
def test_limit(db_path, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1)) two = parser.entry(1, 2, datetime(2010, 1, 2)) reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() browser.open('http://app/') entries = browser.get_current_page().select('.entry') assert len(entries) == 2 assert '#2' in str(entries[0]) assert '#1' in str(entries[1]) browser.open('http://app/', params={'limit': 1}) entries = browser.get_current_page().select('.entry') assert len(entries) == 1 assert '#2' in str(entries[0])
def make_reader_with_entries(path, num_entries, num_feeds=NUM_FEEDS, text=False): reader = make_reader(path) reader._parser = parser = Parser() for i in range(num_feeds): feed = parser.feed(i, datetime(2010, 1, 1)) reader.add_feed(feed.url) random.seed(0) for i in range(num_entries): kwargs = {} if text: kwargs.update( title=generate_lorem_ipsum(html=False, n=1, min=1, max=10), summary=generate_lorem_ipsum(html=False), ) parser.entry(i % num_feeds, i, datetime(2010, 1, 1) + timedelta(i), **kwargs) return reader
def test_search(db_path, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1), title='one') two = parser.entry(1, 2, datetime(2010, 1, 2), title='two') reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() browser.open('http://app/', params={'q': 'feed'}) entries = browser.get_current_page().select('.entry') assert len(entries) == 2 assert 'one' in str(entries[0]) or 'one' in str(entries[1]) assert 'two' in str(entries[0]) or 'two' in str(entries[1]) browser.open('http://app/', params={'q': 'one'}) entries = browser.get_current_page().select('.entry') assert len(entries) == 1 assert 'one' in str(entries[0])
def setup_reader_with_entries(num_entries): with setup_db_with_entries(num_entries) as path: yield make_reader(path)
def test_make_reader_storage(): storage = Storage(':memory:') reader = make_reader('', _storage=storage) assert reader._storage is storage
def test_update_triggers_no_change(db_path, monkeypatch, set_user_title): """update_search() should *not* update the search index if anything else except the indexed fields changes. """ from reader._search import Search strip_html_called = 0 class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): nonlocal strip_html_called strip_html_called += 1 return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader._parser = parser = Parser() reader._parser = parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1), title='feed') entry = parser.entry( 1, 1, datetime(2010, 1, 1), title='entry', summary='summary', content=[Content('content')], ) reader.add_feed(feed.url) reader.update_feeds() if set_user_title: reader.set_feed_user_title(feed, 'user title') reader.enable_search() reader.update_search() assert strip_html_called > 0 strip_html_called = 0 (old_result, ) = reader.search_entries('entry OR feed') feed = parser.feed(1, datetime(2010, 1, 2), title='feed', link='link', author='author') """ entry = parser.entry( 1, 1, datetime(2010, 1, 2), title='entry', summary='summary', content=[Content('content')], link='link', author='author', published=datetime(2010, 1, 2), enclosures=[Enclosure('enclosure')], ) """ # NOTE: As of 1.4, updating entries normally (above) uses INSERT OR REPLACE. # REPLACE == DELETE + INSERT (https://www.sqlite.org/lang_conflict.html), # so updating the entry normally *will not* fire the ON UPDATE trigger, # but the ON DELETE and ON INSERT ones (basically, the ON UPDATE trigger # never fires at the moment). # # Meanwhile, we do a (more intrusive/brittle) manual update: with reader._search.db as db: db.execute(""" UPDATE entries SET ( title, link, updated, author, published, summary, content, enclosures ) = ( 'entry', 'http://www.example.com/entries/1', '2010-01-02 00:00:00', 'author', '2010-01-02 00:00:00', 'summary', '[{"value": "content", "type": null, "language": null}]', '[{"href": "enclosure", "type": null, "length": null}]' ) WHERE (id, feed) = ('1, 1', '1'); """) # TODO: Change this test when updating entries uses UPDATE instead of INSERT OR REPLACE reader.mark_as_read(entry) reader.mark_as_important(entry) reader.update_feeds() if set_user_title: reader.set_feed_user_title(feed, 'user title') reader.update_search() (new_result, ) = reader.search_entries('entry OR feed') assert old_result == new_result assert strip_html_called == 0
def test_update_search_entry_changed_during_strip_html(db_path, monkeypatch): """Test the entry can't remain out of sync if it changes during reader.update_search() in a strip_html() call. https://github.com/lemon24/reader/issues/175#issuecomment-652489019 """ # This is a very intrusive test, maybe we should move it somewhere else. reader = make_reader(db_path) parser = reader._parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1), title='one') parser.entry(1, 1, datetime(2010, 1, 1), title='one') reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() feed = parser.feed(1, datetime(2010, 1, 2), title='two') parser.entry(1, 1, datetime(2010, 1, 2), title='two') reader.update_feed(feed.url) in_strip_html = threading.Event() can_return_from_strip_html = threading.Event() def target(): from reader._search import Search # strip_html() may or may not be used a SQLite user-defined function, # hence the whole subclassing thing class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): in_strip_html.set() can_return_from_strip_html.wait() return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader.update_search() thread = threading.Thread(target=target) thread.start() in_strip_html.wait() try: feed = parser.feed(1, datetime(2010, 1, 3), title='three') parser.entry(1, 1, datetime(2010, 1, 3), title='three') reader._storage.db.execute("PRAGMA busy_timeout = 0;") reader.update_feed(feed.url) expected_title = 'three' except StorageError: expected_title = 'two' finally: can_return_from_strip_html.set() thread.join() reader.update_search() (entry, ) = reader.get_entries() (result, ) = reader.search_entries('one OR two OR three') assert entry.title == result.metadata['.title'].value == expected_title
def get_reader(): if not hasattr(g, 'reader'): reader = make_reader(current_app.config['READER_DB']) current_app.reader_load_plugins(reader) g.reader = reader return g.reader
def reader(): return make_reader(':memory:')
return itertools.islice(lines, size.lines - 1) def print_status_line(message, seconds): print(message, end="", flush=True) time.sleep(seconds) length = len(message) print("\b" * length, " " * length, "\b" * length, sep="", end="", flush=True) reader = make_reader(sys.argv[1]) # Prevent update errors from showing. logging.basicConfig(level=logging.CRITICAL) update_interval = 60 * 10 last_updated = time.monotonic() - update_interval while True: # Clear screen; should be cross-platform. os.system("cls || clear") print(*get_lines(reader), sep="\n") # Keep sleeping until we need to update. while True:
def test_plugin(): reader = make_reader(':memory:', plugins=['reader.entry_dedupe']) parser = Parser() reader._parser = parser one = parser.feed(1, datetime(2010, 1, 1)) old = parser.entry(1, 1, datetime(2010, 1, 1), title='title', summary='old') title_only_one = parser.entry(1, 2, datetime(2010, 1, 1), title='title only') read_one = parser.entry(1, 3, datetime(2010, 1, 1), title='title', summary='read') unread_one = parser.entry(1, 4, datetime(2010, 1, 1), title='title', summary='unread') important_one = parser.entry(1, 5, datetime(2010, 1, 1), title='important', summary='also important') # TODO just use the feeds/entries as arguments reader.add_feed(one.url) reader.update_feeds() reader.mark_as_read((one.url, read_one.id)) reader.mark_as_important((one.url, important_one.id)) one = parser.feed(1, datetime(2010, 1, 2)) new = parser.entry(1, 11, datetime(2010, 1, 2), title='title', summary='new') title_only_two = parser.entry(1, 12, datetime(2010, 1, 2), title='title only') read_two = parser.entry(1, 13, datetime(2010, 1, 2), title='title', summary='read') unread_two = parser.entry(1, 14, datetime(2010, 1, 2), title='title', summary='unread') important_two = parser.entry(1, 15, datetime(2010, 1, 2), title='important', summary='also important') reader.update_feeds() assert set((e.id, e.read, e.important) for e in reader.get_entries()) == { t + (False, ) for t in { # remain untouched (old.id, False), (new.id, False), # also remain untouched (title_only_one.id, False), (title_only_two.id, False), # the new one is marked as read because the old one was (read_one.id, True), (read_two.id, True), # the old one is marked as read in favor of the new one (unread_one.id, True), (unread_two.id, False), } } | { # the new one is important because the old one was; # the old one is not important anymore (important_one.id, True, False), (important_two.id, False, True), }
def test_update_search_entry_changed_between_insert_loops( db_path, monkeypatch): """Test the entry can't be added twice to the search index if it changes during reader.update_search() between two insert loops. The scenario is: * entry has to_update set * _delete_from_search removes it from search * loop 1 of _insert_into_search finds entry and inserts it into search, clears to_update * entry has to_update set (if to_update is set because the feed changed, last_updated does not change; even if it did, it doesn't matter, since the transaction only spans a single loop) * loop 2 of _insert_into_search finds entry and inserts it into search again, clears to_update * loop 3 of _insert_into_search doesn't find any entry, returns https://github.com/lemon24/reader/issues/175#issuecomment-654213853 """ # This is a very intrusive test, maybe we should move it somewhere else. reader = make_reader(db_path) reader.enable_search() parser = reader._parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) parser.entry(1, 1, datetime(2010, 1, 1), summary='one') reader.add_feed(feed.url) reader.update_feeds() in_insert_chunk = threading.Event() can_return_from_insert_chunk = threading.Event() def target(): reader = make_reader(db_path) original_insert_chunk = reader._search._insert_into_search_one_chunk loop = 0 def insert_chunk(*args, **kwargs): nonlocal loop if loop == 1: in_insert_chunk.set() can_return_from_insert_chunk.wait() loop += 1 return original_insert_chunk(*args, **kwargs) reader._search._insert_into_search_one_chunk = insert_chunk reader.update_search() thread = threading.Thread(target=target) thread.start() in_insert_chunk.wait() try: feed = parser.feed(1, datetime(2010, 1, 2)) parser.entry(1, 1, datetime(2010, 1, 2), summary='two') reader.update_feed(feed.url) finally: can_return_from_insert_chunk.set() thread.join() (result, ) = reader.search_entries('entry') assert len(result.content) == 1 ((rowcount, ), ) = reader._search.db.execute("select count(*) from entries_search;") assert rowcount == 1
Part of https://reader.readthedocs.io/en/latest/tutorial.html """ import os import os.path import shutil import requests from reader import make_reader, FeedExistsError feed_url = "http://www.hellointernet.fm/podcast?format=rss" podcasts_dir = "podcasts" reader = make_reader("db.sqlite") def add_and_update_feed(): try: reader.add_feed(feed_url) except FeedExistsError: pass reader.update_feeds() def download_everything(): entries = reader.get_entries(feed=feed_url, has_enclosures=True, read=False)
def test_make_reader_storage(storage): reader = make_reader('', _storage=storage) assert reader._storage is storage