Beispiel #1
0
def test_mark_as_read_unread(db_path, make_reader, browser):
    parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1))
    entry = parser.entry(1, 1, datetime(2010, 1, 1))

    reader = make_reader(db_path)
    reader._parser = parser

    reader.add_feed(feed.url)
    reader.update_feeds()

    browser.open('http://app/')
    assert len(browser.get_current_page().select('.entry')) == 1

    form = browser.select_form('.entry form.action-mark-as-read')
    response = browser.submit_selected(
        form.form.find('button', text='mark as read'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.entry')) == 0

    response = browser.follow_link(browser.find_link(text='read'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.entry')) == 1

    form = browser.select_form('.entry form.action-mark-as-unread')
    response = browser.submit_selected(
        form.form.find('button', text='mark as unread'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.entry')) == 0

    response = browser.follow_link(browser.find_link(text='unread'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.entry')) == 1
Beispiel #2
0
def test_post_feed_update_plugins(reader):
    parser = Parser()
    reader._parser = parser

    plugin_calls = []

    def first_plugin(r, e):
        assert r is reader
        plugin_calls.append((first_plugin, e))

    def second_plugin(r, e):
        assert r is reader
        plugin_calls.append((second_plugin, e))

    feed = parser.feed(1, datetime(2010, 1, 1))
    parser.entry(1, 1, datetime(2010, 1, 1))
    reader.add_feed(feed.url)
    reader._post_feed_update_plugins.append(first_plugin)
    reader._post_feed_update_plugins.append(second_plugin)

    reader.update_feeds()
    assert plugin_calls == [
        (first_plugin, feed.url),
        (second_plugin, feed.url),
    ]
    assert set(e.id for e in reader.get_entries()) == {'1, 1'}
Beispiel #3
0
def test_sort_and_filter_subset_basic(data_and_kwargs, pre_stuff, call_method):
    entry_data, kwargs, chunk_size = data_and_kwargs

    # can't use reader fixture because of
    # https://github.com/pytest-dev/pytest/issues/916
    reader = make_reader(':memory:')

    reader._storage.chunk_size = chunk_size

    parser = Parser()
    reader._parser = parser

    for feed_id, entry_id, feed_updated, entry_updated in entry_data:
        seen_feed = feed_id in parser.feeds
        feed = parser.feed(feed_id, feed_updated)
        parser.entry(feed_id, entry_id, entry_updated)
        if not seen_feed:
            reader.add_feed(feed.url)

    reader.update_feeds()
    pre_stuff(reader)

    expected = [(fid, eid) for fid, entries in parser.entries.items()
                for eid in entries]

    actual = [eval(e.id) for e in call_method(reader)]

    if call_method not in (get_entries_random, search_entries_random):
        assert len(expected) == len(actual)
        assert set(expected) == set(actual)
    else:
        assert set(expected) >= set(actual)

    actual = [eval(e.id) for e in call_method(reader, **kwargs)]
    assert set(expected) >= set(actual)
Beispiel #4
0
def test_search_entries_order_content(reader):
    parser = Parser()
    reader._parser = parser

    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(
        1,
        1,
        datetime(2010, 1, 1),
        summary='word word',
        content=[
            Content('word'),
            Content('does not match'),
            Content('word word word word'),
            Content('word word word'),
        ],
    )

    reader.add_feed(feed.url)
    reader.update_feeds()
    reader.enable_search()
    reader.update_search()

    # there should be exactly one result
    (rv, ) = reader.search_entries('word')
    assert list(rv.content) == [
        '.content[2].value',
        '.content[3].value',
        '.summary',
        '.content[0].value',
    ]
Beispiel #5
0
def reader_without_and_with_entries(request, reader):
    if not request.param:
        return reader

    parser = Parser()
    reader._parser = parser

    feed = parser.feed(1, datetime(2010, 1, 1))
    parser.entry(
        1,
        1,
        datetime(2010, 1, 1),
        title='feed one',
        summary='summary',
        content=[Content('content'),
                 Content('another content')],
    )
    parser.entry(1, 2, datetime(2010, 1, 1), title='feed one')
    parser.entry(1, 3, datetime(2010, 1, 1), title='feed one')
    parser.entry(1, 4, datetime(2010, 1, 1), title='feed one')
    parser.entry(1, 5, datetime(2010, 1, 1), title='feed one')

    reader.add_feed(feed.url)
    reader.update_feeds()

    return reader
Beispiel #6
0
def test_regex_mark_as_read_bad_metadata(make_reader, value):
    reader = make_reader(':memory:', plugins=['reader.mark_as_read'])

    parser = Parser()
    reader._parser = parser

    one = parser.feed(1, datetime(2010, 1, 1))
    parser.entry(1, 1, datetime(2010, 1, 1), title='match')

    reader.add_feed(one)
    reader.set_feed_metadata_item(one, '.reader.mark_as_read', value)

    reader.update_feeds()

    assert [e.read for e in reader.get_entries()] == [False]
def test_enclosure_dedupe(reader):
    reader._parser = parser = Parser()

    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(1, 1, datetime(2010, 1, 1))
    two = parser.entry(
        1,
        2,
        datetime(2010, 1, 1),
        enclosures=(Enclosure('href'), Enclosure('another one')),
    )
    three = parser.entry(
        1,
        3,
        datetime(2010, 1, 1),
        enclosures=(Enclosure('href', 'text', 1), Enclosure('href', 'json', 2)),
    )

    reader.add_feed(feed.url)
    reader.update_feeds()

    enclosure_dedupe(reader)

    assert set((e.id, e.enclosures) for e in reader.get_entries()) == {
        (one.id, one.enclosures),
        (two.id, two.enclosures),
        (three.id, (Enclosure('href', 'text', 1),)),
    }
Beispiel #8
0
def test_update_triggers(reader, data):
    """update_search() should update the search index
    if the indexed fields change.

    """
    reader._parser = parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1))
    reader.add_feed(feed.url)
    reader.enable_search()

    for i, (do_stuff, paths) in enumerate(data):
        do_stuff(reader)
        reader.update_feeds()
        reader.update_search()

        entry_data = {(e.feed_url, e.id):
                      {p: eval(f"e{p}", dict(e=e, p=p))
                       for p in paths}
                      for e in reader.get_entries()}

        result_data = {
            (r.feed_url, r.id):
            {p: hl.value
             for p, hl in {
                 **r.metadata,
                 **r.content
             }.items()}
            for r in reader.search_entries('entry OR feed')
        }

        assert entry_data == result_data, f"change {i}"
Beispiel #9
0
def test_delete_feed_from_entries_page_redirects(db_path, browser):
    parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1))
    entry = parser.entry(1, 1, datetime(2010, 1, 1))

    reader = make_reader(db_path)
    reader._parser = parser

    reader.add_feed(feed.url)
    reader.update_feeds()

    browser.open('http://app/', params={'feed': feed.url})
    form = browser.select_form('#update-entries form.action-delete-feed')
    form.set_checkbox({'really-delete-feed': True})
    response = browser.submit_selected(
        form.form.find('button', text='delete feed'))
    assert response.status_code == 200
    assert browser.get_url() == 'http://app/'
    assert len(browser.get_current_page().select('.entry')) == 0
Beispiel #10
0
def test_update_search_concurrent_calls(db_path, monkeypatch):
    """Test concurrent calls to reader.update_search() don't interfere
    with one another.

    https://github.com/lemon24/reader/issues/175#issuecomment-652489019

    """
    # This is a very intrusive test, maybe we should move it somewhere else.

    reader = make_reader(db_path)
    parser = reader._parser = Parser()

    feed = parser.feed(1, datetime(2010, 1, 1), title='feed')
    parser.entry(
        1,
        1,
        datetime(2010, 1, 1),
        title='entry',
        summary='summary',
        content=[Content('content')],
    )
    reader.add_feed(feed.url)
    reader.update_feeds()
    reader.enable_search()

    barrier = threading.Barrier(2)

    def target():
        from reader._search import Search

        class MySearch(Search):
            @staticmethod
            def strip_html(*args, **kwargs):
                barrier.wait()
                return Search.strip_html(*args, **kwargs)

        # TODO: remove monkeypatching when make_reader() gets a search_cls argument
        monkeypatch.setattr('reader.core.Search', MySearch)

        reader = make_reader(db_path)
        reader.update_search()

    threads = [threading.Thread(target=target) for _ in range(2)]
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()

    (result, ) = reader.search_entries('entry')
    assert len(result.content) == 2

    ((rowcount, ),
     ) = reader._search.db.execute("select count(*) from entries_search;")
    assert rowcount == 2
Beispiel #11
0
def test_search_entries_order_title_content_beats_title(reader):
    parser = Parser()
    reader._parser = parser

    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(1, 1, datetime(2010, 1, 1), title='one')
    two = parser.entry(1, 2, datetime(2010, 1, 1), title='two')
    three = parser.entry(1,
                         3,
                         datetime(2010, 1, 1),
                         title='one',
                         content=[Content('one')])

    reader.add_feed(feed.url)
    reader.update_feeds()
    reader.enable_search()
    reader.update_search()

    assert [(e.id, e.feed_url) for e in reader.search_entries('one')] == [
        (three.id, feed.url),
        (one.id, feed.url),
    ]
Beispiel #12
0
def test_limit(db_path, make_reader, browser):
    parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(1, 1, datetime(2010, 1, 1))
    two = parser.entry(1, 2, datetime(2010, 1, 2))

    reader = make_reader(db_path)
    reader._parser = parser

    reader.add_feed(feed.url)
    reader.update_feeds()

    browser.open('http://app/')
    entries = browser.get_current_page().select('.entry')
    assert len(entries) == 2
    assert '#2' in str(entries[0])
    assert '#1' in str(entries[1])

    browser.open('http://app/', params={'limit': 1})
    entries = browser.get_current_page().select('.entry')
    assert len(entries) == 1
    assert '#2' in str(entries[0])
Beispiel #13
0
def test_search_entries_order_content_recent(reader):
    """When sort='recent' is used, the .content of any individual result
    should still be sorted by relevance.

    """
    parser = Parser()
    reader._parser = parser

    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(
        1,
        1,
        datetime(2010, 1, 1),
        title='word',
        content=[
            Content('word word'),
            Content('word'),
            Content('word word word')
        ],
    )
    two = parser.entry(1, 2, datetime(2010, 1, 2), summary='word')

    reader.add_feed(feed.url)
    reader.update_feeds()
    reader.enable_search()
    reader.update_search()

    # sanity check, one is more relevant
    assert [e.id for e in reader.search_entries('word')] == ['1, 1', '1, 2']

    results = list(reader.search_entries('word', sort='recent'))
    # two is first because of updated
    assert [e.id for e in results] == ['1, 2', '1, 1']
    # but within 1, the content keys are sorted by relevance;
    assert list(results[1].content) == [
        '.content[2].value',
        '.content[0].value',
        '.content[1].value',
    ]
Beispiel #14
0
def test_search(db_path, make_reader, browser):
    parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(1, 1, datetime(2010, 1, 1), title='one')
    two = parser.entry(1, 2, datetime(2010, 1, 2), title='two')

    reader = make_reader(db_path)
    reader._parser = parser

    reader.add_feed(feed.url)
    reader.update_feeds()
    reader.enable_search()
    reader.update_search()

    browser.open('http://app/', params={'q': 'feed'})
    entries = browser.get_current_page().select('.entry')
    assert len(entries) == 2
    assert 'one' in str(entries[0]) or 'one' in str(entries[1])
    assert 'two' in str(entries[0]) or 'two' in str(entries[1])

    browser.open('http://app/', params={'q': 'one'})
    entries = browser.get_current_page().select('.entry')
    assert len(entries) == 1
    assert 'one' in str(entries[0])
Beispiel #15
0
def make_reader_with_entries(path,
                             num_entries,
                             num_feeds=NUM_FEEDS,
                             text=False):
    reader = make_reader(path)
    reader._parser = parser = Parser()

    for i in range(num_feeds):
        feed = parser.feed(i, datetime(2010, 1, 1))
        reader.add_feed(feed.url)

    random.seed(0)
    for i in range(num_entries):
        kwargs = {}
        if text:
            kwargs.update(
                title=generate_lorem_ipsum(html=False, n=1, min=1, max=10),
                summary=generate_lorem_ipsum(html=False),
            )
        parser.entry(i % num_feeds, i,
                     datetime(2010, 1, 1) + timedelta(i), **kwargs)

    return reader
def test_plugin():
    reader = make_reader(':memory:', plugins=['reader.entry_dedupe'])
    parser = Parser()
    reader._parser = parser

    one = parser.feed(1, datetime(2010, 1, 1))
    old = parser.entry(1,
                       1,
                       datetime(2010, 1, 1),
                       title='title',
                       summary='old')
    title_only_one = parser.entry(1,
                                  2,
                                  datetime(2010, 1, 1),
                                  title='title only')
    read_one = parser.entry(1,
                            3,
                            datetime(2010, 1, 1),
                            title='title',
                            summary='read')
    unread_one = parser.entry(1,
                              4,
                              datetime(2010, 1, 1),
                              title='title',
                              summary='unread')
    important_one = parser.entry(1,
                                 5,
                                 datetime(2010, 1, 1),
                                 title='important',
                                 summary='also important')

    # TODO just use the feeds/entries as arguments

    reader.add_feed(one.url)
    reader.update_feeds()
    reader.mark_as_read((one.url, read_one.id))
    reader.mark_as_important((one.url, important_one.id))

    one = parser.feed(1, datetime(2010, 1, 2))
    new = parser.entry(1,
                       11,
                       datetime(2010, 1, 2),
                       title='title',
                       summary='new')
    title_only_two = parser.entry(1,
                                  12,
                                  datetime(2010, 1, 2),
                                  title='title only')
    read_two = parser.entry(1,
                            13,
                            datetime(2010, 1, 2),
                            title='title',
                            summary='read')
    unread_two = parser.entry(1,
                              14,
                              datetime(2010, 1, 2),
                              title='title',
                              summary='unread')
    important_two = parser.entry(1,
                                 15,
                                 datetime(2010, 1, 2),
                                 title='important',
                                 summary='also important')

    reader.update_feeds()

    assert set((e.id, e.read, e.important) for e in reader.get_entries()) == {
        t + (False, )
        for t in {
            # remain untouched
            (old.id, False),
            (new.id, False),
            # also remain untouched
            (title_only_one.id, False),
            (title_only_two.id, False),
            # the new one is marked as read because the old one was
            (read_one.id, True),
            (read_two.id, True),
            # the old one is marked as read in favor of the new one
            (unread_one.id, True),
            (unread_two.id, False),
        }
    } | {
        # the new one is important because the old one was;
        # the old one is not important anymore
        (important_one.id, True, False),
        (important_two.id, False, True),
    }
Beispiel #17
0
def test_add_delete_feed(db_path, make_reader, browser, monkeypatch):
    parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1))
    entry = parser.entry(1, 1, datetime(2010, 1, 1))

    def app_make_reader(**kwargs):
        reader = make_reader_from_config(**kwargs)
        reader._parser = parser
        return reader

    # this is brittle, it may break if we change how we use make_reader in app
    monkeypatch.setattr('reader._config.make_reader_from_config',
                        app_make_reader)

    reader = app_make_reader(url=db_path)

    browser.open('http://app/')
    response = browser.follow_link(browser.find_link(text='feeds'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.feed')) == 0

    # go to the preview page
    form = browser.select_form('#top-bar form')
    form.input({'url': feed.url})
    response = browser.submit_selected(
        form.form.find('button', text='add feed'))
    assert response.status_code == 200
    assert (
        browser.get_current_page().select('title')[0].text == 'Preview for ' +
        feed.title)
    assert len(browser.get_current_page().select('.entry')) == 1

    # actually add the feed
    form = browser.select_form('form.action-add-feed')
    response = browser.submit_selected(
        form.form.find('button', text='add_feed'))

    # we should be at the feed page, via a redirect
    assert response.status_code == 200
    assert response.url == 'http://app/?feed=' + feed.url
    assert response.history[-1].status_code == 302

    assert len(browser.get_current_page().select('.entry')) == 0

    reader.update_feeds()

    browser.refresh()
    assert len(browser.get_current_page().select('.entry')) == 1

    response = browser.follow_link(browser.find_link(text='feeds'))
    assert response.status_code == 200

    feed_link = browser.find_link(text=feed.title)

    form = browser.select_form('.feed form.action-delete-feed')
    form.set_checkbox({'really-delete-feed': True})
    response = browser.submit_selected(
        form.form.find('button', text='delete feed'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.feed')) == 0

    response = browser.follow_link(browser.find_link(text='entries'))
    assert response.status_code == 200
    assert len(browser.get_current_page().select('.entry')) == 0

    response = browser.follow_link(feed_link)
    assert response.status_code == 404
Beispiel #18
0
def test_regex_mark_as_read(reader, monkeypatch, tmpdir):
    parser = Parser()
    reader._parser = parser

    one = parser.feed(1, datetime(2010, 1, 1))
    parser.entry(1, 1, datetime(2010, 1, 1), title='match old')

    reader.add_feed(one.url)
    reader.update_feeds()

    reader.set_feed_metadata(one, 'regex-mark-as-read',
                             {'patterns': ['^match']})

    regex_mark_as_read(reader)

    one = parser.feed(1, datetime(2010, 1, 2))
    match_new = parser.entry(1, 2, datetime(2010, 1, 2), title='match new')
    parser.entry(1, 3, datetime(2010, 1, 2), title='no match')

    two = parser.feed(2, datetime(2010, 1, 2))
    parser.entry(2, 3, datetime(2010, 1, 2), title='match other')

    reader.add_feed(two.url)
    reader.update_feeds()

    assert len(list(reader.get_entries())) == 4
    assert set((e.id, e.read) for e in reader.get_entries(read=True)) == {
        (match_new.id, True),
    }
Beispiel #19
0
def test_update_triggers_no_change(db_path, monkeypatch, set_user_title):
    """update_search() should *not* update the search index
    if anything else except the indexed fields changes.

    """
    from reader._search import Search

    strip_html_called = 0

    class MySearch(Search):
        @staticmethod
        def strip_html(*args, **kwargs):
            nonlocal strip_html_called
            strip_html_called += 1
            return Search.strip_html(*args, **kwargs)

    # TODO: remove monkeypatching when make_reader() gets a search_cls argument
    monkeypatch.setattr('reader.core.Search', MySearch)

    reader = make_reader(db_path)
    reader._parser = parser = Parser()

    reader._parser = parser = Parser()
    feed = parser.feed(1, datetime(2010, 1, 1), title='feed')
    entry = parser.entry(
        1,
        1,
        datetime(2010, 1, 1),
        title='entry',
        summary='summary',
        content=[Content('content')],
    )

    reader.add_feed(feed.url)
    reader.update_feeds()
    if set_user_title:
        reader.set_feed_user_title(feed, 'user title')

    reader.enable_search()
    reader.update_search()

    assert strip_html_called > 0
    strip_html_called = 0

    (old_result, ) = reader.search_entries('entry OR feed')

    feed = parser.feed(1,
                       datetime(2010, 1, 2),
                       title='feed',
                       link='link',
                       author='author')
    """
    entry = parser.entry(
        1, 1, datetime(2010, 1, 2),
        title='entry',
        summary='summary',
        content=[Content('content')],
        link='link', author='author',
        published=datetime(2010, 1, 2),
        enclosures=[Enclosure('enclosure')],
    )
    """
    # NOTE: As of 1.4, updating entries normally (above) uses INSERT OR REPLACE.
    # REPLACE == DELETE + INSERT (https://www.sqlite.org/lang_conflict.html),
    # so updating the entry normally *will not* fire the ON UPDATE trigger,
    # but the ON DELETE and ON INSERT ones (basically, the ON UPDATE trigger
    # never fires at the moment).
    #
    # Meanwhile, we do a (more intrusive/brittle) manual update:
    with reader._search.db as db:
        db.execute("""
            UPDATE entries
            SET (
                title,
                link,
                updated,
                author,
                published,
                summary,
                content,
                enclosures
            ) = (
                'entry',
                'http://www.example.com/entries/1',
                '2010-01-02 00:00:00',
                'author',
                '2010-01-02 00:00:00',
                'summary',
                '[{"value": "content", "type": null, "language": null}]',
                '[{"href": "enclosure", "type": null, "length": null}]'
            )
            WHERE (id, feed) = ('1, 1', '1');
            """)
    # TODO: Change this test when updating entries uses UPDATE instead of INSERT OR REPLACE

    reader.mark_as_read(entry)
    reader.mark_as_important(entry)

    reader.update_feeds()
    if set_user_title:
        reader.set_feed_user_title(feed, 'user title')

    reader.update_search()

    (new_result, ) = reader.search_entries('entry OR feed')

    assert old_result == new_result
    assert strip_html_called == 0
Beispiel #20
0
def test_regex_mark_as_read(make_reader):
    key = '.reader.mark_as_read'
    value = {'title': ['^match']}

    reader = make_reader(':memory:', plugins=['reader.mark_as_read'])

    parser = Parser()
    reader._parser = parser

    one = parser.feed(1, datetime(2010, 1, 1))
    parser.entry(1, 1, datetime(2010, 1, 1), title='match old')

    reader.add_feed(one)
    reader.update_feeds()

    reader.set_feed_metadata_item(one, key, value)

    one = parser.feed(1, datetime(2010, 1, 2))
    match_new = parser.entry(1, 2, datetime(2010, 1, 2), title='match new')
    parser.entry(1, 3, datetime(2010, 1, 2), title='no match')

    two = parser.feed(2, datetime(2010, 1, 2))
    parser.entry(2, 3, datetime(2010, 1, 2), title='match other')

    reader.add_feed(two)
    reader.update_feeds()

    assert len(list(reader.get_entries())) == 4
    assert set((e.id, e.read) for e in reader.get_entries(read=True)) == {
        (match_new.id, True),
    }
Beispiel #21
0
def test_search_entries_basic(reader, sort):
    parser = Parser()
    reader._parser = parser

    feed = parser.feed(1, datetime(2010, 1, 1))
    one = parser.entry(1, 1, datetime(2010, 1, 1), title='one')
    two = parser.entry(1,
                       2,
                       datetime(2010, 1, 1),
                       title='two',
                       summary='summary')
    three = parser.entry(
        1,
        3,
        datetime(2010, 1, 1),
        title='shall not be named',
        summary='does not match',
        # The emoji is to catch a bug in the json_extract() SQLite function.
        # As of reader 1.4 we're not using it anymore, and the workaround
        # was removed; we keep the emoji in case of regressions.
        # Bug: https://bugs.python.org/issue38749
        # Workaround and more details: https://github.com/lemon24/reader/blob/d4363f683fc18ca12f597809ceca4e7dbd0a303a/src/reader/_sqlite_utils.py#L332
        content=[Content('three 🤩 content')],
    )

    reader.add_feed(feed.url)
    reader.update_feeds()

    reader.enable_search()

    assert list(reader.search_entries('one')) == []

    reader.update_search()

    search = lambda *a, **kw: reader.search_entries(*a, sort=sort, **kw)
    search_counts = lambda *a, **kw: reader.search_entry_counts(*a, **kw)

    # TODO: the asserts below look parametrizable

    assert list(search('zero')) == []
    assert search_counts('zero') == EntrySearchCounts(0, 0, 0, 0)
    assert list(search('one')) == [
        EntrySearchResult(
            feed.url,
            one.id,
            {
                '.title': HighlightedString(one.title, (slice(0, 3), )),
                '.feed.title': HighlightedString(feed.title),
            },
        )
    ]
    assert search_counts('one') == EntrySearchCounts(1, 0, 0, 0)
    assert list(search('two')) == [
        EntrySearchResult(
            feed.url,
            two.id,
            {
                '.title': HighlightedString(two.title, (slice(0, 3), )),
                '.feed.title': HighlightedString(feed.title),
            },
            {'.summary': HighlightedString('summary')},
        )
    ]
    assert list(search('three')) == [
        EntrySearchResult(
            feed.url,
            three.id,
            {
                '.title': HighlightedString(three.title),
                '.feed.title': HighlightedString(feed.title),
            },
            {
                '.content[0].value':
                HighlightedString(three.content[0].value, (slice(0, 5), ))
            },
        )
    ]

    # TODO: fix inconsistent naming

    feed_two = parser.feed(2, datetime(2010, 1, 2))
    feed_two_entry = parser.entry(2, 1, datetime(2010, 1, 2), title=None)
    feed_three = parser.feed(3, datetime(2010, 1, 1), title=None)
    feed_three_entry = parser.entry(3,
                                    1,
                                    datetime(2010, 1, 1),
                                    title='entry summary')

    reader.add_feed(feed_two.url)
    reader.add_feed(feed_three)
    reader.set_feed_user_title(feed_two, 'a summary of things')

    reader.update_feeds()
    feed_two_entry = reader.get_entry((feed_two.url, feed_two_entry.id))

    reader.update_search()

    # We can't use a set here because the dicts in EntrySearchResult aren't hashable.
    assert {
        (e.feed_url, e.id): e
        for e in search('summary')
    } == {
        (e.feed_url, e.id): e
        for e in [
            EntrySearchResult(
                feed_three.url,
                feed_three_entry.id,
                {
                    '.title':
                    HighlightedString(feed_three_entry.title, (slice(6, 13), ))
                },
            ),
            EntrySearchResult(
                feed_two.url,
                feed_two_entry.id,
                {
                    '.feed.user_title':
                    HighlightedString(feed_two_entry.feed.user_title, (
                        slice(2, 9), ))
                },
            ),
            EntrySearchResult(
                feed.url,
                two.id,
                {
                    '.title': HighlightedString(two.title),
                    '.feed.title': HighlightedString(feed.title),
                },
                {'.summary': HighlightedString(two.summary, (slice(0, 7), ))},
            ),
        ]
    }
    assert search_counts('summary') == EntrySearchCounts(3, 0, 0, 0)
Beispiel #22
0
def test_search_entries_order_weights(reader, chunk_size):
    """Entry title beats feed title beats entry content/summary."""

    # TODO: may need fixing once we finish tuning the weights (it should fail)

    reader._search.storage.chunk_size = chunk_size

    parser = Parser()
    reader._parser = parser

    feed_one = parser.feed(1, datetime(2010, 1, 1), title='one')
    entry_one = parser.entry(1, 1, datetime(2010, 1, 1))
    feed_two = parser.feed(2, datetime(2010, 1, 1), title='two')
    entry_two = parser.entry(2, 2, datetime(2010, 1, 1), title='one')
    entry_three = parser.entry(2,
                               3,
                               datetime(2010, 1, 1),
                               content=[Content('one')])
    entry_four = parser.entry(2, 4, datetime(2010, 1, 1), summary='one')
    entry_five = parser.entry(2,
                              5,
                              datetime(2010, 1, 1),
                              content=[Content('one')] * 2)
    entry_six = parser.entry(2,
                             6,
                             datetime(2010, 1, 1),
                             summary='one',
                             content=[Content('one')])
    entry_seven = parser.entry(2,
                               7,
                               datetime(2010, 1, 1),
                               title="does not match")

    reader.add_feed(feed_one.url)
    reader.add_feed(feed_two.url)
    reader.update_feeds()
    reader.enable_search()
    reader.update_search()

    rv = [(e.id, e.feed_url) for e in reader.search_entries('one')]

    assert rv[:2] == [(entry_two.id, feed_two.url),
                      (entry_one.id, feed_one.url)]

    # TODO: how do we check these have the same exact rank?
    assert sorted(rv[2:]) == [
        (entry_three.id, feed_two.url),
        (entry_four.id, feed_two.url),
        (entry_five.id, feed_two.url),
        (entry_six.id, feed_two.url),
    ]
Beispiel #23
0
def test_update_search_feeds_change_after_enable(reader, sort, chunk_size):
    reader._search.storage.chunk_size = chunk_size
    reader.enable_search()
    reader.update_search()

    try:
        reader.remove_feed('1')
    except FeedNotFoundError:
        pass

    parser = Parser()
    reader._parser = parser

    parser.feed(1, datetime(2010, 1, 2))
    parser.entry(1, 2, datetime(2010, 1, 2), title='feed one changed')
    parser.entry(1, 6, datetime(2010, 1, 2), title='feed one new')
    parser.feed(2, datetime(2010, 1, 1))
    parser.entry(2, 1, datetime(2010, 1, 1), title='feed two')
    parser.entry(2, 2, datetime(2010, 1, 1), title='feed two')
    parser.entry(2, 3, datetime(2010, 1, 1), title='feed two')

    reader.add_feed('1')
    reader.add_feed('2')
    reader.update_feeds()

    reader.update_search()

    assert {(e.id, e.feed_url, e.title)
            for e in reader.get_entries()
            } == {(e.id, e.feed_url, e.metadata['.title'].value)
                  for e in reader.search_entries('feed', sort=sort)}

    # no title, shouldn't come up in search
    entry = parser.entry(1, 1, datetime(2010, 1, 1))
    reader.update_feeds()
    reader.get_entry(entry)

    # TODO: Should this be in test_search.py?
    # Other implementations may update the index as soon as an entry is updated,
    # and have a noop update_search().

    assert (entry.id, entry.feed_url) not in {
        (e.id, e.feed_url)
        for e in reader.search_entries('feed', sort=sort)
    }
Beispiel #24
0
def test_update_search_entry_changed_between_insert_loops(
        db_path, monkeypatch):
    """Test the entry can't be added twice to the search index if it changes
    during reader.update_search() between two insert loops.

    The scenario is:

    * entry has to_update set
    * _delete_from_search removes it from search
    * loop 1 of _insert_into_search finds entry and inserts it into search,
      clears to_update
    * entry has to_update set (if to_update is set because the feed changed,
      last_updated does not change; even if it did, it doesn't matter,
      since the transaction only spans a single loop)
    * loop 2 of _insert_into_search finds entry and inserts it into search
      again, clears to_update
    * loop 3 of _insert_into_search doesn't find any entry, returns

    https://github.com/lemon24/reader/issues/175#issuecomment-654213853

    """
    # This is a very intrusive test, maybe we should move it somewhere else.

    reader = make_reader(db_path)
    reader.enable_search()

    parser = reader._parser = Parser()

    feed = parser.feed(1, datetime(2010, 1, 1))
    parser.entry(1, 1, datetime(2010, 1, 1), summary='one')
    reader.add_feed(feed.url)
    reader.update_feeds()

    in_insert_chunk = threading.Event()
    can_return_from_insert_chunk = threading.Event()

    def target():
        reader = make_reader(db_path)
        original_insert_chunk = reader._search._insert_into_search_one_chunk

        loop = 0

        def insert_chunk(*args, **kwargs):
            nonlocal loop
            if loop == 1:
                in_insert_chunk.set()
                can_return_from_insert_chunk.wait()
            loop += 1
            return original_insert_chunk(*args, **kwargs)

        reader._search._insert_into_search_one_chunk = insert_chunk
        reader.update_search()

    thread = threading.Thread(target=target)
    thread.start()

    in_insert_chunk.wait()

    try:
        feed = parser.feed(1, datetime(2010, 1, 2))
        parser.entry(1, 1, datetime(2010, 1, 2), summary='two')
        reader.update_feed(feed.url)
    finally:
        can_return_from_insert_chunk.set()
        thread.join()

    (result, ) = reader.search_entries('entry')
    assert len(result.content) == 1

    ((rowcount, ),
     ) = reader._search.db.execute("select count(*) from entries_search;")
    assert rowcount == 1
Beispiel #25
0
def test_update_search_entry_changed_during_strip_html(db_path, monkeypatch):
    """Test the entry can't remain out of sync if it changes
    during reader.update_search() in a strip_html() call.

    https://github.com/lemon24/reader/issues/175#issuecomment-652489019

    """
    # This is a very intrusive test, maybe we should move it somewhere else.

    reader = make_reader(db_path)
    parser = reader._parser = Parser()

    feed = parser.feed(1, datetime(2010, 1, 1), title='one')
    parser.entry(1, 1, datetime(2010, 1, 1), title='one')
    reader.add_feed(feed.url)
    reader.update_feeds()

    reader.enable_search()
    reader.update_search()

    feed = parser.feed(1, datetime(2010, 1, 2), title='two')
    parser.entry(1, 1, datetime(2010, 1, 2), title='two')
    reader.update_feed(feed.url)

    in_strip_html = threading.Event()
    can_return_from_strip_html = threading.Event()

    def target():
        from reader._search import Search

        # strip_html() may or may not be used a SQLite user-defined function,
        # hence the whole subclassing thing
        class MySearch(Search):
            @staticmethod
            def strip_html(*args, **kwargs):
                in_strip_html.set()
                can_return_from_strip_html.wait()
                return Search.strip_html(*args, **kwargs)

        # TODO: remove monkeypatching when make_reader() gets a search_cls argument
        monkeypatch.setattr('reader.core.Search', MySearch)

        reader = make_reader(db_path)
        reader.update_search()

    thread = threading.Thread(target=target)
    thread.start()

    in_strip_html.wait()

    try:
        feed = parser.feed(1, datetime(2010, 1, 3), title='three')
        parser.entry(1, 1, datetime(2010, 1, 3), title='three')
        reader._storage.db.execute("PRAGMA busy_timeout = 0;")
        reader.update_feed(feed.url)
        expected_title = 'three'
    except StorageError:
        expected_title = 'two'
    finally:
        can_return_from_strip_html.set()
        thread.join()

    reader.update_search()

    (entry, ) = reader.get_entries()
    (result, ) = reader.search_entries('one OR two OR three')
    assert entry.title == result.metadata['.title'].value == expected_title