def test_mark_as_read_unread(db_path, make_reader, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) entry = parser.entry(1, 1, datetime(2010, 1, 1)) reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() browser.open('http://app/') assert len(browser.get_current_page().select('.entry')) == 1 form = browser.select_form('.entry form.action-mark-as-read') response = browser.submit_selected( form.form.find('button', text='mark as read')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 0 response = browser.follow_link(browser.find_link(text='read')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 1 form = browser.select_form('.entry form.action-mark-as-unread') response = browser.submit_selected( form.form.find('button', text='mark as unread')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 0 response = browser.follow_link(browser.find_link(text='unread')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 1
def test_post_feed_update_plugins(reader): parser = Parser() reader._parser = parser plugin_calls = [] def first_plugin(r, e): assert r is reader plugin_calls.append((first_plugin, e)) def second_plugin(r, e): assert r is reader plugin_calls.append((second_plugin, e)) feed = parser.feed(1, datetime(2010, 1, 1)) parser.entry(1, 1, datetime(2010, 1, 1)) reader.add_feed(feed.url) reader._post_feed_update_plugins.append(first_plugin) reader._post_feed_update_plugins.append(second_plugin) reader.update_feeds() assert plugin_calls == [ (first_plugin, feed.url), (second_plugin, feed.url), ] assert set(e.id for e in reader.get_entries()) == {'1, 1'}
def test_sort_and_filter_subset_basic(data_and_kwargs, pre_stuff, call_method): entry_data, kwargs, chunk_size = data_and_kwargs # can't use reader fixture because of # https://github.com/pytest-dev/pytest/issues/916 reader = make_reader(':memory:') reader._storage.chunk_size = chunk_size parser = Parser() reader._parser = parser for feed_id, entry_id, feed_updated, entry_updated in entry_data: seen_feed = feed_id in parser.feeds feed = parser.feed(feed_id, feed_updated) parser.entry(feed_id, entry_id, entry_updated) if not seen_feed: reader.add_feed(feed.url) reader.update_feeds() pre_stuff(reader) expected = [(fid, eid) for fid, entries in parser.entries.items() for eid in entries] actual = [eval(e.id) for e in call_method(reader)] if call_method not in (get_entries_random, search_entries_random): assert len(expected) == len(actual) assert set(expected) == set(actual) else: assert set(expected) >= set(actual) actual = [eval(e.id) for e in call_method(reader, **kwargs)] assert set(expected) >= set(actual)
def test_search_entries_order_content(reader): parser = Parser() reader._parser = parser feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry( 1, 1, datetime(2010, 1, 1), summary='word word', content=[ Content('word'), Content('does not match'), Content('word word word word'), Content('word word word'), ], ) reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() # there should be exactly one result (rv, ) = reader.search_entries('word') assert list(rv.content) == [ '.content[2].value', '.content[3].value', '.summary', '.content[0].value', ]
def reader_without_and_with_entries(request, reader): if not request.param: return reader parser = Parser() reader._parser = parser feed = parser.feed(1, datetime(2010, 1, 1)) parser.entry( 1, 1, datetime(2010, 1, 1), title='feed one', summary='summary', content=[Content('content'), Content('another content')], ) parser.entry(1, 2, datetime(2010, 1, 1), title='feed one') parser.entry(1, 3, datetime(2010, 1, 1), title='feed one') parser.entry(1, 4, datetime(2010, 1, 1), title='feed one') parser.entry(1, 5, datetime(2010, 1, 1), title='feed one') reader.add_feed(feed.url) reader.update_feeds() return reader
def test_regex_mark_as_read_bad_metadata(make_reader, value): reader = make_reader(':memory:', plugins=['reader.mark_as_read']) parser = Parser() reader._parser = parser one = parser.feed(1, datetime(2010, 1, 1)) parser.entry(1, 1, datetime(2010, 1, 1), title='match') reader.add_feed(one) reader.set_feed_metadata_item(one, '.reader.mark_as_read', value) reader.update_feeds() assert [e.read for e in reader.get_entries()] == [False]
def test_enclosure_dedupe(reader): reader._parser = parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1)) two = parser.entry( 1, 2, datetime(2010, 1, 1), enclosures=(Enclosure('href'), Enclosure('another one')), ) three = parser.entry( 1, 3, datetime(2010, 1, 1), enclosures=(Enclosure('href', 'text', 1), Enclosure('href', 'json', 2)), ) reader.add_feed(feed.url) reader.update_feeds() enclosure_dedupe(reader) assert set((e.id, e.enclosures) for e in reader.get_entries()) == { (one.id, one.enclosures), (two.id, two.enclosures), (three.id, (Enclosure('href', 'text', 1),)), }
def test_update_triggers(reader, data): """update_search() should update the search index if the indexed fields change. """ reader._parser = parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) reader.add_feed(feed.url) reader.enable_search() for i, (do_stuff, paths) in enumerate(data): do_stuff(reader) reader.update_feeds() reader.update_search() entry_data = {(e.feed_url, e.id): {p: eval(f"e{p}", dict(e=e, p=p)) for p in paths} for e in reader.get_entries()} result_data = { (r.feed_url, r.id): {p: hl.value for p, hl in { **r.metadata, **r.content }.items()} for r in reader.search_entries('entry OR feed') } assert entry_data == result_data, f"change {i}"
def test_delete_feed_from_entries_page_redirects(db_path, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) entry = parser.entry(1, 1, datetime(2010, 1, 1)) reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() browser.open('http://app/', params={'feed': feed.url}) form = browser.select_form('#update-entries form.action-delete-feed') form.set_checkbox({'really-delete-feed': True}) response = browser.submit_selected( form.form.find('button', text='delete feed')) assert response.status_code == 200 assert browser.get_url() == 'http://app/' assert len(browser.get_current_page().select('.entry')) == 0
def test_update_search_concurrent_calls(db_path, monkeypatch): """Test concurrent calls to reader.update_search() don't interfere with one another. https://github.com/lemon24/reader/issues/175#issuecomment-652489019 """ # This is a very intrusive test, maybe we should move it somewhere else. reader = make_reader(db_path) parser = reader._parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1), title='feed') parser.entry( 1, 1, datetime(2010, 1, 1), title='entry', summary='summary', content=[Content('content')], ) reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() barrier = threading.Barrier(2) def target(): from reader._search import Search class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): barrier.wait() return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader.update_search() threads = [threading.Thread(target=target) for _ in range(2)] for thread in threads: thread.start() for thread in threads: thread.join() (result, ) = reader.search_entries('entry') assert len(result.content) == 2 ((rowcount, ), ) = reader._search.db.execute("select count(*) from entries_search;") assert rowcount == 2
def test_search_entries_order_title_content_beats_title(reader): parser = Parser() reader._parser = parser feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1), title='one') two = parser.entry(1, 2, datetime(2010, 1, 1), title='two') three = parser.entry(1, 3, datetime(2010, 1, 1), title='one', content=[Content('one')]) reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() assert [(e.id, e.feed_url) for e in reader.search_entries('one')] == [ (three.id, feed.url), (one.id, feed.url), ]
def test_limit(db_path, make_reader, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1)) two = parser.entry(1, 2, datetime(2010, 1, 2)) reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() browser.open('http://app/') entries = browser.get_current_page().select('.entry') assert len(entries) == 2 assert '#2' in str(entries[0]) assert '#1' in str(entries[1]) browser.open('http://app/', params={'limit': 1}) entries = browser.get_current_page().select('.entry') assert len(entries) == 1 assert '#2' in str(entries[0])
def test_search_entries_order_content_recent(reader): """When sort='recent' is used, the .content of any individual result should still be sorted by relevance. """ parser = Parser() reader._parser = parser feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry( 1, 1, datetime(2010, 1, 1), title='word', content=[ Content('word word'), Content('word'), Content('word word word') ], ) two = parser.entry(1, 2, datetime(2010, 1, 2), summary='word') reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() # sanity check, one is more relevant assert [e.id for e in reader.search_entries('word')] == ['1, 1', '1, 2'] results = list(reader.search_entries('word', sort='recent')) # two is first because of updated assert [e.id for e in results] == ['1, 2', '1, 1'] # but within 1, the content keys are sorted by relevance; assert list(results[1].content) == [ '.content[2].value', '.content[0].value', '.content[1].value', ]
def test_search(db_path, make_reader, browser): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1), title='one') two = parser.entry(1, 2, datetime(2010, 1, 2), title='two') reader = make_reader(db_path) reader._parser = parser reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() browser.open('http://app/', params={'q': 'feed'}) entries = browser.get_current_page().select('.entry') assert len(entries) == 2 assert 'one' in str(entries[0]) or 'one' in str(entries[1]) assert 'two' in str(entries[0]) or 'two' in str(entries[1]) browser.open('http://app/', params={'q': 'one'}) entries = browser.get_current_page().select('.entry') assert len(entries) == 1 assert 'one' in str(entries[0])
def make_reader_with_entries(path, num_entries, num_feeds=NUM_FEEDS, text=False): reader = make_reader(path) reader._parser = parser = Parser() for i in range(num_feeds): feed = parser.feed(i, datetime(2010, 1, 1)) reader.add_feed(feed.url) random.seed(0) for i in range(num_entries): kwargs = {} if text: kwargs.update( title=generate_lorem_ipsum(html=False, n=1, min=1, max=10), summary=generate_lorem_ipsum(html=False), ) parser.entry(i % num_feeds, i, datetime(2010, 1, 1) + timedelta(i), **kwargs) return reader
def test_plugin(): reader = make_reader(':memory:', plugins=['reader.entry_dedupe']) parser = Parser() reader._parser = parser one = parser.feed(1, datetime(2010, 1, 1)) old = parser.entry(1, 1, datetime(2010, 1, 1), title='title', summary='old') title_only_one = parser.entry(1, 2, datetime(2010, 1, 1), title='title only') read_one = parser.entry(1, 3, datetime(2010, 1, 1), title='title', summary='read') unread_one = parser.entry(1, 4, datetime(2010, 1, 1), title='title', summary='unread') important_one = parser.entry(1, 5, datetime(2010, 1, 1), title='important', summary='also important') # TODO just use the feeds/entries as arguments reader.add_feed(one.url) reader.update_feeds() reader.mark_as_read((one.url, read_one.id)) reader.mark_as_important((one.url, important_one.id)) one = parser.feed(1, datetime(2010, 1, 2)) new = parser.entry(1, 11, datetime(2010, 1, 2), title='title', summary='new') title_only_two = parser.entry(1, 12, datetime(2010, 1, 2), title='title only') read_two = parser.entry(1, 13, datetime(2010, 1, 2), title='title', summary='read') unread_two = parser.entry(1, 14, datetime(2010, 1, 2), title='title', summary='unread') important_two = parser.entry(1, 15, datetime(2010, 1, 2), title='important', summary='also important') reader.update_feeds() assert set((e.id, e.read, e.important) for e in reader.get_entries()) == { t + (False, ) for t in { # remain untouched (old.id, False), (new.id, False), # also remain untouched (title_only_one.id, False), (title_only_two.id, False), # the new one is marked as read because the old one was (read_one.id, True), (read_two.id, True), # the old one is marked as read in favor of the new one (unread_one.id, True), (unread_two.id, False), } } | { # the new one is important because the old one was; # the old one is not important anymore (important_one.id, True, False), (important_two.id, False, True), }
def test_add_delete_feed(db_path, make_reader, browser, monkeypatch): parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) entry = parser.entry(1, 1, datetime(2010, 1, 1)) def app_make_reader(**kwargs): reader = make_reader_from_config(**kwargs) reader._parser = parser return reader # this is brittle, it may break if we change how we use make_reader in app monkeypatch.setattr('reader._config.make_reader_from_config', app_make_reader) reader = app_make_reader(url=db_path) browser.open('http://app/') response = browser.follow_link(browser.find_link(text='feeds')) assert response.status_code == 200 assert len(browser.get_current_page().select('.feed')) == 0 # go to the preview page form = browser.select_form('#top-bar form') form.input({'url': feed.url}) response = browser.submit_selected( form.form.find('button', text='add feed')) assert response.status_code == 200 assert ( browser.get_current_page().select('title')[0].text == 'Preview for ' + feed.title) assert len(browser.get_current_page().select('.entry')) == 1 # actually add the feed form = browser.select_form('form.action-add-feed') response = browser.submit_selected( form.form.find('button', text='add_feed')) # we should be at the feed page, via a redirect assert response.status_code == 200 assert response.url == 'http://app/?feed=' + feed.url assert response.history[-1].status_code == 302 assert len(browser.get_current_page().select('.entry')) == 0 reader.update_feeds() browser.refresh() assert len(browser.get_current_page().select('.entry')) == 1 response = browser.follow_link(browser.find_link(text='feeds')) assert response.status_code == 200 feed_link = browser.find_link(text=feed.title) form = browser.select_form('.feed form.action-delete-feed') form.set_checkbox({'really-delete-feed': True}) response = browser.submit_selected( form.form.find('button', text='delete feed')) assert response.status_code == 200 assert len(browser.get_current_page().select('.feed')) == 0 response = browser.follow_link(browser.find_link(text='entries')) assert response.status_code == 200 assert len(browser.get_current_page().select('.entry')) == 0 response = browser.follow_link(feed_link) assert response.status_code == 404
def test_regex_mark_as_read(reader, monkeypatch, tmpdir): parser = Parser() reader._parser = parser one = parser.feed(1, datetime(2010, 1, 1)) parser.entry(1, 1, datetime(2010, 1, 1), title='match old') reader.add_feed(one.url) reader.update_feeds() reader.set_feed_metadata(one, 'regex-mark-as-read', {'patterns': ['^match']}) regex_mark_as_read(reader) one = parser.feed(1, datetime(2010, 1, 2)) match_new = parser.entry(1, 2, datetime(2010, 1, 2), title='match new') parser.entry(1, 3, datetime(2010, 1, 2), title='no match') two = parser.feed(2, datetime(2010, 1, 2)) parser.entry(2, 3, datetime(2010, 1, 2), title='match other') reader.add_feed(two.url) reader.update_feeds() assert len(list(reader.get_entries())) == 4 assert set((e.id, e.read) for e in reader.get_entries(read=True)) == { (match_new.id, True), }
def test_update_triggers_no_change(db_path, monkeypatch, set_user_title): """update_search() should *not* update the search index if anything else except the indexed fields changes. """ from reader._search import Search strip_html_called = 0 class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): nonlocal strip_html_called strip_html_called += 1 return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader._parser = parser = Parser() reader._parser = parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1), title='feed') entry = parser.entry( 1, 1, datetime(2010, 1, 1), title='entry', summary='summary', content=[Content('content')], ) reader.add_feed(feed.url) reader.update_feeds() if set_user_title: reader.set_feed_user_title(feed, 'user title') reader.enable_search() reader.update_search() assert strip_html_called > 0 strip_html_called = 0 (old_result, ) = reader.search_entries('entry OR feed') feed = parser.feed(1, datetime(2010, 1, 2), title='feed', link='link', author='author') """ entry = parser.entry( 1, 1, datetime(2010, 1, 2), title='entry', summary='summary', content=[Content('content')], link='link', author='author', published=datetime(2010, 1, 2), enclosures=[Enclosure('enclosure')], ) """ # NOTE: As of 1.4, updating entries normally (above) uses INSERT OR REPLACE. # REPLACE == DELETE + INSERT (https://www.sqlite.org/lang_conflict.html), # so updating the entry normally *will not* fire the ON UPDATE trigger, # but the ON DELETE and ON INSERT ones (basically, the ON UPDATE trigger # never fires at the moment). # # Meanwhile, we do a (more intrusive/brittle) manual update: with reader._search.db as db: db.execute(""" UPDATE entries SET ( title, link, updated, author, published, summary, content, enclosures ) = ( 'entry', 'http://www.example.com/entries/1', '2010-01-02 00:00:00', 'author', '2010-01-02 00:00:00', 'summary', '[{"value": "content", "type": null, "language": null}]', '[{"href": "enclosure", "type": null, "length": null}]' ) WHERE (id, feed) = ('1, 1', '1'); """) # TODO: Change this test when updating entries uses UPDATE instead of INSERT OR REPLACE reader.mark_as_read(entry) reader.mark_as_important(entry) reader.update_feeds() if set_user_title: reader.set_feed_user_title(feed, 'user title') reader.update_search() (new_result, ) = reader.search_entries('entry OR feed') assert old_result == new_result assert strip_html_called == 0
def test_regex_mark_as_read(make_reader): key = '.reader.mark_as_read' value = {'title': ['^match']} reader = make_reader(':memory:', plugins=['reader.mark_as_read']) parser = Parser() reader._parser = parser one = parser.feed(1, datetime(2010, 1, 1)) parser.entry(1, 1, datetime(2010, 1, 1), title='match old') reader.add_feed(one) reader.update_feeds() reader.set_feed_metadata_item(one, key, value) one = parser.feed(1, datetime(2010, 1, 2)) match_new = parser.entry(1, 2, datetime(2010, 1, 2), title='match new') parser.entry(1, 3, datetime(2010, 1, 2), title='no match') two = parser.feed(2, datetime(2010, 1, 2)) parser.entry(2, 3, datetime(2010, 1, 2), title='match other') reader.add_feed(two) reader.update_feeds() assert len(list(reader.get_entries())) == 4 assert set((e.id, e.read) for e in reader.get_entries(read=True)) == { (match_new.id, True), }
def test_search_entries_basic(reader, sort): parser = Parser() reader._parser = parser feed = parser.feed(1, datetime(2010, 1, 1)) one = parser.entry(1, 1, datetime(2010, 1, 1), title='one') two = parser.entry(1, 2, datetime(2010, 1, 1), title='two', summary='summary') three = parser.entry( 1, 3, datetime(2010, 1, 1), title='shall not be named', summary='does not match', # The emoji is to catch a bug in the json_extract() SQLite function. # As of reader 1.4 we're not using it anymore, and the workaround # was removed; we keep the emoji in case of regressions. # Bug: https://bugs.python.org/issue38749 # Workaround and more details: https://github.com/lemon24/reader/blob/d4363f683fc18ca12f597809ceca4e7dbd0a303a/src/reader/_sqlite_utils.py#L332 content=[Content('three 🤩 content')], ) reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() assert list(reader.search_entries('one')) == [] reader.update_search() search = lambda *a, **kw: reader.search_entries(*a, sort=sort, **kw) search_counts = lambda *a, **kw: reader.search_entry_counts(*a, **kw) # TODO: the asserts below look parametrizable assert list(search('zero')) == [] assert search_counts('zero') == EntrySearchCounts(0, 0, 0, 0) assert list(search('one')) == [ EntrySearchResult( feed.url, one.id, { '.title': HighlightedString(one.title, (slice(0, 3), )), '.feed.title': HighlightedString(feed.title), }, ) ] assert search_counts('one') == EntrySearchCounts(1, 0, 0, 0) assert list(search('two')) == [ EntrySearchResult( feed.url, two.id, { '.title': HighlightedString(two.title, (slice(0, 3), )), '.feed.title': HighlightedString(feed.title), }, {'.summary': HighlightedString('summary')}, ) ] assert list(search('three')) == [ EntrySearchResult( feed.url, three.id, { '.title': HighlightedString(three.title), '.feed.title': HighlightedString(feed.title), }, { '.content[0].value': HighlightedString(three.content[0].value, (slice(0, 5), )) }, ) ] # TODO: fix inconsistent naming feed_two = parser.feed(2, datetime(2010, 1, 2)) feed_two_entry = parser.entry(2, 1, datetime(2010, 1, 2), title=None) feed_three = parser.feed(3, datetime(2010, 1, 1), title=None) feed_three_entry = parser.entry(3, 1, datetime(2010, 1, 1), title='entry summary') reader.add_feed(feed_two.url) reader.add_feed(feed_three) reader.set_feed_user_title(feed_two, 'a summary of things') reader.update_feeds() feed_two_entry = reader.get_entry((feed_two.url, feed_two_entry.id)) reader.update_search() # We can't use a set here because the dicts in EntrySearchResult aren't hashable. assert { (e.feed_url, e.id): e for e in search('summary') } == { (e.feed_url, e.id): e for e in [ EntrySearchResult( feed_three.url, feed_three_entry.id, { '.title': HighlightedString(feed_three_entry.title, (slice(6, 13), )) }, ), EntrySearchResult( feed_two.url, feed_two_entry.id, { '.feed.user_title': HighlightedString(feed_two_entry.feed.user_title, ( slice(2, 9), )) }, ), EntrySearchResult( feed.url, two.id, { '.title': HighlightedString(two.title), '.feed.title': HighlightedString(feed.title), }, {'.summary': HighlightedString(two.summary, (slice(0, 7), ))}, ), ] } assert search_counts('summary') == EntrySearchCounts(3, 0, 0, 0)
def test_search_entries_order_weights(reader, chunk_size): """Entry title beats feed title beats entry content/summary.""" # TODO: may need fixing once we finish tuning the weights (it should fail) reader._search.storage.chunk_size = chunk_size parser = Parser() reader._parser = parser feed_one = parser.feed(1, datetime(2010, 1, 1), title='one') entry_one = parser.entry(1, 1, datetime(2010, 1, 1)) feed_two = parser.feed(2, datetime(2010, 1, 1), title='two') entry_two = parser.entry(2, 2, datetime(2010, 1, 1), title='one') entry_three = parser.entry(2, 3, datetime(2010, 1, 1), content=[Content('one')]) entry_four = parser.entry(2, 4, datetime(2010, 1, 1), summary='one') entry_five = parser.entry(2, 5, datetime(2010, 1, 1), content=[Content('one')] * 2) entry_six = parser.entry(2, 6, datetime(2010, 1, 1), summary='one', content=[Content('one')]) entry_seven = parser.entry(2, 7, datetime(2010, 1, 1), title="does not match") reader.add_feed(feed_one.url) reader.add_feed(feed_two.url) reader.update_feeds() reader.enable_search() reader.update_search() rv = [(e.id, e.feed_url) for e in reader.search_entries('one')] assert rv[:2] == [(entry_two.id, feed_two.url), (entry_one.id, feed_one.url)] # TODO: how do we check these have the same exact rank? assert sorted(rv[2:]) == [ (entry_three.id, feed_two.url), (entry_four.id, feed_two.url), (entry_five.id, feed_two.url), (entry_six.id, feed_two.url), ]
def test_update_search_feeds_change_after_enable(reader, sort, chunk_size): reader._search.storage.chunk_size = chunk_size reader.enable_search() reader.update_search() try: reader.remove_feed('1') except FeedNotFoundError: pass parser = Parser() reader._parser = parser parser.feed(1, datetime(2010, 1, 2)) parser.entry(1, 2, datetime(2010, 1, 2), title='feed one changed') parser.entry(1, 6, datetime(2010, 1, 2), title='feed one new') parser.feed(2, datetime(2010, 1, 1)) parser.entry(2, 1, datetime(2010, 1, 1), title='feed two') parser.entry(2, 2, datetime(2010, 1, 1), title='feed two') parser.entry(2, 3, datetime(2010, 1, 1), title='feed two') reader.add_feed('1') reader.add_feed('2') reader.update_feeds() reader.update_search() assert {(e.id, e.feed_url, e.title) for e in reader.get_entries() } == {(e.id, e.feed_url, e.metadata['.title'].value) for e in reader.search_entries('feed', sort=sort)} # no title, shouldn't come up in search entry = parser.entry(1, 1, datetime(2010, 1, 1)) reader.update_feeds() reader.get_entry(entry) # TODO: Should this be in test_search.py? # Other implementations may update the index as soon as an entry is updated, # and have a noop update_search(). assert (entry.id, entry.feed_url) not in { (e.id, e.feed_url) for e in reader.search_entries('feed', sort=sort) }
def test_update_search_entry_changed_between_insert_loops( db_path, monkeypatch): """Test the entry can't be added twice to the search index if it changes during reader.update_search() between two insert loops. The scenario is: * entry has to_update set * _delete_from_search removes it from search * loop 1 of _insert_into_search finds entry and inserts it into search, clears to_update * entry has to_update set (if to_update is set because the feed changed, last_updated does not change; even if it did, it doesn't matter, since the transaction only spans a single loop) * loop 2 of _insert_into_search finds entry and inserts it into search again, clears to_update * loop 3 of _insert_into_search doesn't find any entry, returns https://github.com/lemon24/reader/issues/175#issuecomment-654213853 """ # This is a very intrusive test, maybe we should move it somewhere else. reader = make_reader(db_path) reader.enable_search() parser = reader._parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1)) parser.entry(1, 1, datetime(2010, 1, 1), summary='one') reader.add_feed(feed.url) reader.update_feeds() in_insert_chunk = threading.Event() can_return_from_insert_chunk = threading.Event() def target(): reader = make_reader(db_path) original_insert_chunk = reader._search._insert_into_search_one_chunk loop = 0 def insert_chunk(*args, **kwargs): nonlocal loop if loop == 1: in_insert_chunk.set() can_return_from_insert_chunk.wait() loop += 1 return original_insert_chunk(*args, **kwargs) reader._search._insert_into_search_one_chunk = insert_chunk reader.update_search() thread = threading.Thread(target=target) thread.start() in_insert_chunk.wait() try: feed = parser.feed(1, datetime(2010, 1, 2)) parser.entry(1, 1, datetime(2010, 1, 2), summary='two') reader.update_feed(feed.url) finally: can_return_from_insert_chunk.set() thread.join() (result, ) = reader.search_entries('entry') assert len(result.content) == 1 ((rowcount, ), ) = reader._search.db.execute("select count(*) from entries_search;") assert rowcount == 1
def test_update_search_entry_changed_during_strip_html(db_path, monkeypatch): """Test the entry can't remain out of sync if it changes during reader.update_search() in a strip_html() call. https://github.com/lemon24/reader/issues/175#issuecomment-652489019 """ # This is a very intrusive test, maybe we should move it somewhere else. reader = make_reader(db_path) parser = reader._parser = Parser() feed = parser.feed(1, datetime(2010, 1, 1), title='one') parser.entry(1, 1, datetime(2010, 1, 1), title='one') reader.add_feed(feed.url) reader.update_feeds() reader.enable_search() reader.update_search() feed = parser.feed(1, datetime(2010, 1, 2), title='two') parser.entry(1, 1, datetime(2010, 1, 2), title='two') reader.update_feed(feed.url) in_strip_html = threading.Event() can_return_from_strip_html = threading.Event() def target(): from reader._search import Search # strip_html() may or may not be used a SQLite user-defined function, # hence the whole subclassing thing class MySearch(Search): @staticmethod def strip_html(*args, **kwargs): in_strip_html.set() can_return_from_strip_html.wait() return Search.strip_html(*args, **kwargs) # TODO: remove monkeypatching when make_reader() gets a search_cls argument monkeypatch.setattr('reader.core.Search', MySearch) reader = make_reader(db_path) reader.update_search() thread = threading.Thread(target=target) thread.start() in_strip_html.wait() try: feed = parser.feed(1, datetime(2010, 1, 3), title='three') parser.entry(1, 1, datetime(2010, 1, 3), title='three') reader._storage.db.execute("PRAGMA busy_timeout = 0;") reader.update_feed(feed.url) expected_title = 'three' except StorageError: expected_title = 'two' finally: can_return_from_strip_html.set() thread.join() reader.update_search() (entry, ) = reader.get_entries() (result, ) = reader.search_entries('one OR two OR three') assert entry.title == result.metadata['.title'].value == expected_title