async def test_subscribe_to_gemeente_groningen(
    faker: Faker,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    feed_gemeente_groningen: Feed,
    user: User,
    user_bearer_token: str,
):
    # 1. Refresh feed so that it exists
    client_session_mocker.setup_client_session_for([
        "sample-files/html_sources/gemeente_groningen.html",
        "sample-files/html_sources/gemeente_groningen_2.html",
    ])

    response = await do_refresh_all_feeds()
    feed = await repositories.feed_repository.find_by_url(
        feed_gemeente_groningen.url)
    assert response is not None
    assert feed.number_of_subscriptions == 0
    assert await repositories.feed_item_repository.count({}) == 0
    assert await repositories.news_item_repository.count({}) == 0

    total_before_subscribe = user.number_of_unread_items
    # 2. Subscribe
    assert feed.feed_id not in user.subscribed_to
    await subscribe_to_feed(feed_id=feed.feed_id,
                            authorization=user_bearer_token)
    user = await repositories.user_repository.fetch_user_by_email(
        user.email_address)
    feed = await repositories.feed_repository.find_by_url(
        feed_gemeente_groningen.url)
    assert feed.feed_id in user.subscribed_to
    assert await repositories.news_item_repository.count(
        {}) == await repositories.feed_item_repository.count({})
    assert feed.number_of_items == 0
    assert user.number_of_unread_items == feed.number_of_items
    assert feed.number_of_subscriptions == 1

    # 3. Refresh - let the news flow in.
    response = await do_refresh_all_feeds()
    user = await repositories.user_repository.fetch_user_by_email(
        user.email_address)
    feed = await repositories.feed_repository.find_by_url(
        feed_gemeente_groningen.url)

    assert response.number_of_feeds_refreshed == 1
    assert await repositories.news_item_repository.count({}) == 10
    assert user.number_of_unread_items == 10
    item: FeedItem = (
        await repositories.feed_item_repository.fetch_all_for_feed(feed))[0]
    assert item.title == "Glasvezel in gebied Ten Boer"
    assert item.link == "https://gemeente.groningen.nl/actueel/nieuws/glasvezel-in-gebied-ten-boer"
    assert item.created_on is not None
    assert item.published is not None
    assert item.last_seen is not None

    # 3. Refresh again
    response = await do_refresh_all_feeds()
    assert response.number_of_feeds_refreshed == 1
    assert await repositories.news_item_repository.count({}) == 20
Example #2
0
async def test_paradiso_amsterdam_sync(
        paradiso_processor: VenueProcessor, repositories: Repositories,
        client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/paradiso-amsterdam")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 58
    item = xml.find(
        "channel/item[link='https://www.paradiso.nl/en/program/candy-dulfer-sax-candy-rock-n-roll-show-22-15-uur/85371']"
    )
    assert item is not None
    assert item.find(
        "./title"
    ).text == "Candy Dulfer - Sax, Candy & Rock-'n-Roll show - 22:15 uur [Paradiso NL-AMS]"
    assert (
        item.find("./link").text ==
        "https://www.paradiso.nl/en/program/candy-dulfer-sax-candy-rock-n-roll-show-22-15-uur/85371"
    )
    assert item.find("./description").text is not None
    assert (
        item.find("./guid").text ==
        "https://www.paradiso.nl/en/program/candy-dulfer-sax-candy-rock-n-roll-show-22-15-uur/85371"
    )
    assert item.find("./source").text == "https://www.paradiso.nl/"
    assert item.find("./author").text == "https://www.paradiso.nl/"
    assert item.find("./pubDate").text is not None
async def test_refresh_atom_feed(
    faker: Faker,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    user: User,
    user_bearer_token: str,
):

    test_url = faker.url()

    # Find the unknown feed. Should fetch 1 feed item.
    client_session_mocker.setup_client_session_for(
        ["sample-files/atom/fetch_1.xml", "sample-files/atom/fetch_2.xml"])
    feed = await fetch_feed_information_for(repositories.client_session,
                                            test_url)
    assert await repositories.feed_item_repository.count({}) == 1
    assert feed is not None

    # subscribe the user, there should be 1 news_item for the user.
    await subscribe_to_feed(feed_id=feed.feed_id.__str__(),
                            authorization=user_bearer_token)
    user = await repositories.user_repository.fetch_user_by_email(
        user.email_address)
    assert feed.feed_id in user.subscribed_to
    assert await repositories.news_item_repository.count({}) == 1
    assert user.number_of_unread_items == 1

    # refresh the feed, with one new item.
    await refresh_all_feeds()
    user = await repositories.user_repository.fetch_user_by_email(
        user.email_address)
    assert await repositories.feed_item_repository.count({}) == 2
    assert await repositories.news_item_repository.count({}) == 2
    assert user.number_of_unread_items == 2
async def test_neushoorn_leeuwarden_sync(
        neushoorn_processor: VenueProcessor, repositories: Repositories,
        client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/neushoorn-leeuwarden")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert len(xml.findall("channel/item")) == 16

    event = await repositories.event_repository.event_collection.find_one(
        {"url": "https://neushoorn.nl/production/hardcore-vrijdag-6/"})
    event["when"] = datetime.now(tz=pytz.utc) - timedelta(days=4)
    await repositories.event_repository.event_collection.replace_one(
        {"_id": event["_id"]}, event)

    event = await repositories.event_repository.event_collection.find_one({
        "url":
        "https://neushoorn.nl/production/shantel-bucovina-club-orkestar/"
    })
    event["when"] = datetime.now(tz=pytz.utc) - timedelta(days=3)
    await repositories.event_repository.event_collection.replace_one(
        {"_id": event["_id"]}, event)

    result = await maintenance_clean_up()
    assert result is not None
    assert result.number_of_events_cleaned == 2

    xml = await read_rss()
    assert len(xml.findall("channel/item")) == 14
Example #5
0
async def test_parse_sample_rss_feeds(
    repositories: Repositories, client_session_mocker: ClientSessionMocker, faker: Faker, user: User, user_bearer_token
):
    response_mock = MagicMock()

    xml_test_files = [
        "sample-files/rss_feeds/venues.xml",
        "sample-files/rss_feeds/ars_technica.xml",
        "sample-files/rss_feeds/pitchfork_best.xml",
    ]
    client_session_mocker.setup_client_session_for(xml_test_files)
    test_url = faker.url()

    for xml_test_file in xml_test_files:
        await clean_data_repositories(repositories)

        response = await fetch_feed_information_for_url(
            response=response_mock, url=test_url, authorization=user_bearer_token
        )
        await _assert_fetch_feed_information_response(
            response=response,
            response_mock=response_mock,
            expected_url=test_url,
            xml_test_file=xml_test_file,
            repositories=repositories,
        )
async def test_vera_groningen_sync(vera_processor: VenueProcessor,
                                   repositories: Repositories,
                                   client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/vera-groningen")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 39
    item = xml.find(
        "channel/item[link='http://www.vera-groningen.nl/?post_type=events&p=107558&lang=nl']"
    )
    assert item is not None
    assert item.find(
        "./title").text == "Meadowlake (GRN) (VERPLAATST NAAR) [Vera NL-GRN]"
    assert item.find(
        "./link"
    ).text == "http://www.vera-groningen.nl/?post_type=events&p=107558&lang=nl"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid"
    ).text == "http://www.vera-groningen.nl/?post_type=events&p=107558&lang=nl"
    assert item.find(
        "./source").text == "https://www.vera-groningen.nl/programma/"
    assert item.find(
        "./author").text == "https://www.vera-groningen.nl/programma/"
    assert item.find("./pubDate").text is not None
Example #7
0
async def test_oost_groningen_sync(oost_processor: VenueProcessor,
                                   repositories: Repositories,
                                   client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/oost-groningen")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 8
    item = xml.find(
        "channel/item[link='https://www.facebook.com/events/610421539383220/']"
    )
    assert item is not None
    assert item.find(
        "./title"
    ).text == "HOMOOST • Movie Night: Party Monster the Shockumentary [Oost NL-GRN]"
    assert item.find(
        "./link").text == "https://www.facebook.com/events/610421539383220/"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid").text == "https://www.facebook.com/events/610421539383220/"
    assert item.find("./source").text == "https://www.komoost.nl"
    assert item.find("./author").text == "https://www.komoost.nl"
    assert item.find("./pubDate").text is not None
Example #8
0
async def test_tivoli_utrecht_sync(tivoli_processor: VenueProcessor,
                                   repositories: Repositories,
                                   client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/tivoli-utrecht")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 92
    item = xml.find(
        "channel/item[link='https://www.tivolivredenburg.nl/agenda/bongeziwe-mabandla-27-06-2021/']"
    )
    assert item is not None
    assert item.find("./title").text == "Bongeziwe Mabandla [Tivoli NL-UTR]"
    assert item.find(
        "./link"
    ).text == "https://www.tivolivredenburg.nl/agenda/bongeziwe-mabandla-27-06-2021/"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid"
    ).text == "https://www.tivolivredenburg.nl/agenda/bongeziwe-mabandla-27-06-2021/"
    assert item.find(
        "./source").text == "https://www.tivolivredenburg.nl/agenda/"
    assert item.find(
        "./author").text == "https://www.tivolivredenburg.nl/agenda/"
    assert item.find("./pubDate").text is not None
async def test_hedon_zwolle_sync(hedon_processor: VenueProcessor,
                                 repositories: Repositories,
                                 client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/hedon-zwolle")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 105
    item = xml.find(
        "channel/item[link='https://www.hedon-zwolle.nl/voorstelling/30455/de-kift-']"
    )
    assert item is not None
    assert item.find("./title").text == "DE KIFT  [Hedon NL-ZWO]"
    assert item.find(
        "./link"
    ).text == "https://www.hedon-zwolle.nl/voorstelling/30455/de-kift-"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid"
    ).text == "https://www.hedon-zwolle.nl/voorstelling/30455/de-kift-"
    assert item.find(
        "./source").text == "https://www.hedon-zwolle.nl/#programma"
    assert item.find(
        "./author").text == "https://www.hedon-zwolle.nl/#programma"
    assert item.find("./pubDate").text is not None
async def test_no_undoubling_rss_feed_in_feed_items(
    faker: Faker,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
):

    test_url = faker.url()

    #
    # The file undoubling-events.xml contains:
    # - Christone (1x)
    # - Nina June (1x)
    # - Guus Meeuwis (3x)
    # - Inge van Calkar (2x)
    # - Milkshake festival (2x)
    # - Art of escapism (22x)
    client_session_mocker.setup_client_session_for(
        [
            "sample-files/rss_feeds/undoubling-events.xml",
        ]
    )
    feed = await fetch_feed_information_for(repositories.client_session, test_url)
    assert await repositories.feed_item_repository.count({}) == 31
    assert await repositories.news_item_repository.count({}) == 0  # no subscribers
    assert feed is not None
async def test_spot_groningen_sync(spot_processor: VenueProcessor,
                                   repositories: Repositories,
                                   client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/spot-groningen")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 20

    item = xml.find(
        "channel/item[link='https://www.spotgroningen.nl/programma/kamagurka/']"
    )
    assert item is not None
    assert item.find(
        "./title").text == "Kamagurka - De grenzen van de ernst [Spot NL-GRN]"
    assert item.find(
        "./link").text == "https://www.spotgroningen.nl/programma/kamagurka/"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid").text == "https://www.spotgroningen.nl/programma/kamagurka/"
    assert item.find(
        "./source").text == "https://www.spotgroningen.nl/programma"
    assert item.find(
        "./author").text == "https://www.spotgroningen.nl/programma"
    assert item.find("./pubDate").text is not None
Example #12
0
async def test_neushoorn_leeuwarden_sync(
        neushoorn_processor: VenueProcessor, repositories: Repositories,
        client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/neushoorn-leeuwarden")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 16
    item = xml.find(
        "channel/item[link='https://neushoorn.nl/production/uit-de-hoge-hoed-improv-comedy-11/']"
    )
    assert item is not None
    assert item.find(
        "./title").text == "Uit de Hoge Hoed: Improv Comedy [Neus NL-LEE]"
    assert item.find(
        "./link"
    ).text == "https://neushoorn.nl/production/uit-de-hoge-hoed-improv-comedy-11/"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid"
    ).text == "https://neushoorn.nl/production/uit-de-hoge-hoed-improv-comedy-11/"
    assert item.find("./source").text == "https://www.neushoorn.nl"
    assert item.find("./author").text == "https://www.neushoorn.nl"
    assert item.find("./pubDate").text is not None
async def test_parse_sample_rdf_feeds(
        repositories: Repositories, client_session_mocker: ClientSessionMocker,
        faker: Faker, user: User, user_bearer_token):
    response_mock = MagicMock()

    xml_test_files = [
        "sample-files/rdf_sources/slashdot.xml",
    ]
    client_session_mocker.setup_client_session_for(xml_test_files)
    test_url = faker.url()

    for xml_test_file in xml_test_files:
        await clean_data_repositories(repositories)
        response = await fetch_feed_information_for_url(
            response=response_mock,
            url=test_url,
            authorization=user_bearer_token)

        assert not response.user_is_subscribed
        assert response_mock.status_code == 201
        assert response.feed is not None
        assert response.feed.feed_id is not None
        xml_element = parse(xml_test_file)
        assert response.feed.description == xml_element.find(
            "{*}channel/{*}description").text
        assert response.feed.title == xml_element.find(
            "{*}channel/{*}title").text
        assert response.feed.link == xml_element.find(
            "{*}channel/{*}link").text
        assert response.feed.url == test_url.rstrip("/")
        assert response.feed.image_url == "https://a.fsdn.com/sd/topics/topicslashdot.gif"
        assert response.feed.image_link is None
        assert response.feed.image_title is None
        assert await repositories.feed_repository.count({}) == 1
        item: FeedItem = choice(
            await
            repositories.feed_item_repository.fetch_all_for_feed(response.feed
                                                                 ))
        xml_item = [
            element for element in xml_element.findall("{*}item")
            if sanitize_link(element.findtext("{*}link")) == item.link
        ]

        assert len(xml_item) == 1
        assert xml_item[0].findtext("{*}title") in item.title
        if xml_item[0].findtext("{*}date") is None:
            assert item.published is None
        else:
            assert item.published is not None
        assert item.created_on is not None
        assert item.description == xml_item[0].findtext(
            "{*}description")[:1400]
Example #14
0
async def test_refresh_rss_feed(
    faker: Faker,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    user: User,
    user_bearer_token: str,
):

    test_url = faker.url()

    # Find the unknown feed. Should fetch 1 feed item.
    client_session_mocker.setup_client_session_for(
        [
            "sample-files/rss_feeds/pitchfork_best_subscribe_fetch.xml",
            "sample-files/rss_feeds/pitchfork_best_first_fetch.xml",
            "sample-files/rss_feeds/pitchfork_best_second_fetch.xml",
            "sample-files/rss_feeds/pitchfork_best_third_fetch.xml",
        ]
    )
    feed = await fetch_feed_information_for(repositories.client_session, test_url)
    assert await repositories.feed_item_repository.count({}) == 1
    assert feed is not None

    # subscribe the user, there should be 1 news_item for the user.
    assert user.number_of_unread_items == 0
    await subscribe_to_feed(feed_id=feed.feed_id.__str__(), authorization=user_bearer_token)
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert feed.feed_id in user.subscribed_to
    assert await repositories.news_item_repository.count({}) == 1
    assert user.number_of_unread_items == 1

    # refresh the feed, with one new item.
    await refresh_all_feeds()
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert await repositories.feed_item_repository.count({}) == 2
    assert await repositories.news_item_repository.count({}) == 2
    assert user.number_of_unread_items == 2

    # refresh the feed, with the next one, three new items.
    await refresh_all_feeds()
    assert await repositories.news_item_repository.count({}) == 5
    assert await repositories.feed_item_repository.count({}) == 5

    # refresh the feed, with the next one, all new items except the ones already present.
    await refresh_all_feeds()
    assert await repositories.news_item_repository.count({}) == 25
    assert await repositories.feed_item_repository.count({}) == 25
async def test_undoubling_rss_feed_when_refreshing(
    faker: Faker,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    user: User,
    user_bearer_token: str,
):

    test_url = faker.url()

    #
    # The file undoubling-events.xml contains:
    # - Christone (1x)
    # - Nina June (1x)
    # - Guus Meeuwis (3x)
    # - Inge van Calkar (2x)
    # - Milkshake festival (2x)
    # - Art of escapism (22x)
    #
    # The prelude contains the same as Christone with different title, same link.
    client_session_mocker.setup_client_session_for(
        [
            "sample-files/rss_feeds/undoubling-events-prelude.xml",
            "sample-files/rss_feeds/undoubling-events.xml",
        ]
    )
    feed = await fetch_feed_information_for(repositories.client_session, test_url)
    assert await repositories.feed_item_repository.count({}) == 1
    assert feed is not None

    # subscribe the user, there should be 1 news_item for the user.
    assert user.number_of_unread_items == 0
    await subscribe_to_feed(feed_id=feed.feed_id.__str__(), authorization=user_bearer_token)
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert feed.feed_id in user.subscribed_to
    assert await repositories.news_item_repository.count({}) == 1
    assert await repositories.feed_item_repository.count({}) == 1
    assert user.number_of_unread_items == 1

    # refresh the feed, with one new item.
    await refresh_all_feeds()
    assert await repositories.feed_item_repository.count({}) == 31
    assert await repositories.news_item_repository.count({}) == 6
async def test_atom_feed(faker: Faker, repositories: Repositories,
                         client_session_mocker: ClientSessionMocker,
                         user: User, user_bearer_token):
    response_mock = MagicMock()

    xml_test_files = [
        "sample-files/atom/thequietus.xml",
    ]
    client_session_mocker.setup_client_session_for(xml_test_files)
    test_url = faker.url()

    response = await fetch_feed_information_for_url(
        response=response_mock, url=test_url, authorization=user_bearer_token)
    assert response_mock.status_code == 201
    assert response is not None

    assert response.feed.feed_id is not None
    assert response.feed.description is None
    assert response.feed.title == "The Quietus | All Articles"

    xml_element = parse(xml_test_files[0])
    assert await repositories.feed_item_repository.count({}) == len(
        xml_element.findall("{http://www.w3.org/2005/Atom}entry"))

    item: FeedItem = choice(
        await
        repositories.feed_item_repository.fetch_all_for_feed(response.feed))
    xml_item = [
        element for element in xml_element.findall(
            "{http://www.w3.org/2005/Atom}entry") if element.find(
                "{http://www.w3.org/2005/Atom}link").get("href") == item.link
    ]
    assert len(xml_item) == 1
    assert item.title == xml_item[0].findtext(
        "{http://www.w3.org/2005/Atom}title")
    if xml_item[0].findtext("{http://www.w3.org/2005/Atom}published") is None:
        assert item.published is None
    else:
        assert item.published is not None
    assert item.created_on is not None
    assert item.description == xml_item[0].findtext(
        "{http://www.w3.org/2005/Atom}content")
Example #17
0
async def test_t013_tilburg_sync(
    t013_processor: VenueProcessor, repositories: Repositories, client_session_mocker: ClientSessionMocker
):
    client_session_mocker.setup_client_session_for("tests/samples/t013-tilburg")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 7
    item = xml.find("channel/item[link='https://www.013.nl/programma/5423/snelle']")
    assert item is not None
    assert item.find("./title").text == "Snelle + Pjotr [013 NL-TIL]"
    assert item.find("./link").text == "https://www.013.nl/programma/5423/snelle"
    assert item.find("./description").text is not None
    assert item.find("./guid").text == "https://www.013.nl/programma/5423/snelle"
    assert item.find("./source").text == "https://www.013.nl/programma"
    assert item.find("./author").text == "https://www.013.nl/programma"
    assert item.find("./pubDate").text is not None
async def test_melkweg_amsterdam_sync(
    melkweg_processor: VenueProcessor, repositories: Repositories, client_session_mocker: ClientSessionMocker
):
    client_session_mocker.setup_client_session_for("tests/samples/melkweg-amsterdam")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 46
    item = xml.find("channel/item[link='https://www.melkweg.nl/nl/agenda/olga-gartland-10-11-2019']")
    assert item is not None
    assert item.find("./title").text == "Orla Gartland [Melkweg NL-AMS]"
    assert item.find("./link").text == "https://www.melkweg.nl/nl/agenda/olga-gartland-10-11-2019"
    assert item.find("./description").text is not None
    assert item.find("./guid").text == "https://www.melkweg.nl/nl/agenda/olga-gartland-10-11-2019"
    assert item.find("./source").text == "https://www.melkweg.nl/agenda"
    assert item.find("./author").text == "https://www.melkweg.nl/agenda"
    assert item.find("./pubDate").text is not None
async def test_parse_edge_cases(faker: Faker, repositories: Repositories,
                                client_session_mocker: ClientSessionMocker,
                                user: User, user_bearer_token):
    response_mock = MagicMock()

    client_session_mocker.setup_client_session_for([
        "sample-files/rss_feeds/edge_case.xml",
    ])
    test_url = faker.url()

    response = await fetch_feed_information_for_url(
        response=response_mock, url=test_url, authorization=user_bearer_token)
    assert response_mock.status_code == 201
    assert response is not None

    assert response.feed.description is None
    assert await repositories.feed_item_repository.count({}) == 1
    items = await repositories.feed_item_repository.fetch_all_for_feed(
        response.feed)
    assert items[0].description is None
async def test_unknown_rdf_feed_with_html(
    faker: Faker,
    user: User,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    user_bearer_token: str,
):

    html_file = "sample-files/rdf_sources/slashdot.html"
    xml_file = "sample-files/rdf_sources/slashdot.xml"
    client_session_mocker.setup_client_session_for([html_file, xml_file])
    response_mock = MagicMock()
    url = faker.url()

    response = await fetch_feed_information_for_url(
        response=response_mock, url=url, authorization=user_bearer_token)
    assert response.feed.link == "https://slashdot.org/"
    assert response.feed.number_of_items == 15
    assert response.feed.description == "News for nerds, stuff that matters"
    assert response.feed.title == "Slashdot"
    assert await repositories.feed_item_repository.count({}) == 15
    assert await repositories.feed_repository.count({}) == 1
Example #21
0
async def test_unknown_feed_with_html(
    faker: Faker,
    user: User,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    user_bearer_token: str,
):

    html_file = "sample-files/rss_feeds/pitchfork_best.html"
    xml_file = "sample-files/rss_feeds/pitchfork_best.xml"
    client_session_mocker.setup_client_session_for([html_file, xml_file])
    response_mock = MagicMock()
    url = faker.url()

    response = await fetch_feed_information_for_url(response=response_mock, url=url, authorization=user_bearer_token)
    await _assert_fetch_feed_information_response(
        response=response,
        response_mock=response_mock,
        expected_url="https://pitchfork.com/rss/reviews/best/albums",
        xml_test_file=xml_file,
        repositories=repositories,
    )
Example #22
0
async def test_simplon_groningen_sync(
        simplon_processor: VenueProcessor, repositories: Repositories,
        client_session_mocker: ClientSessionMocker):
    client_session_mocker.setup_client_session_for(
        "tests/samples/simplon-groningen")
    result = await sync_venues()
    assert result == 1

    xml = await read_rss()
    assert_rss_channel(xml)

    assert len(xml.findall("channel/item")) == 29
    item = xml.find(
        "channel/item[link='http://simplon.nl/?post_type=events&p=17602']")
    assert item is not None
    assert item.find("./title").text == "Foxlane + Car Pets [Simplon NL-GRN]"
    assert item.find(
        "./link").text == "http://simplon.nl/?post_type=events&p=17602"
    assert item.find("./description").text is not None
    assert item.find(
        "./guid").text == "http://simplon.nl/?post_type=events&p=17602"
    assert item.find("./source").text == "https://www.simplon.nl"
    assert item.find("./author").text == "https://www.simplon.nl"
    assert item.find("./pubDate").text is not None
Example #23
0
async def test_refresh_with_duplicate_titles(
    faker: Faker,
    repositories: Repositories,
    client_session_mocker: ClientSessionMocker,
    user: User,
    user_bearer_token: str,
):
    test_url = faker.url()

    # Find the unknown feed. Should fetch all items (8), but there are items with similar titles.
    client_session_mocker.setup_client_session_for(
        [
            "sample-files/rss_feeds/brakdag.xml",
            "sample-files/rss_feeds/brakdag_update_1.xml",
            "sample-files/rss_feeds/brakdag_update_2.xml",
            "sample-files/rss_feeds/brakdag_update_3.xml",
        ]
    )
    feed = await fetch_feed_information_for(repositories.client_session, test_url)
    assert await repositories.feed_item_repository.count({}) == 8
    assert feed is not None

    # subscribe the user, there should be 1 news_item for the user.
    assert user.number_of_unread_items == 0
    await subscribe_to_feed(feed_id=feed.feed_id.__str__(), authorization=user_bearer_token)
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert feed.feed_id in user.subscribed_to
    assert await repositories.news_item_repository.count({}) == 8
    assert user.number_of_unread_items == 8

    # Check if similarities where correctly flagged within the first refresh.
    count_feed_items = await repositories.feed_item_repository.count(
        {"link": "https://www.gic.nl/nieuws/brand-verwoest-boerderij-aan-stadsweg-in-groningen"}
    )
    assert count_feed_items == 1

    # ----- Next run. The last item is added with a different url but with similar title.
    await refresh_all_feeds()
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert await repositories.feed_item_repository.count({}) == 9
    assert await repositories.news_item_repository.count({}) == 8  # Created an updated news-item
    assert user.number_of_unread_items == 8

    updated_news_item_json = await repositories.news_item_repository.news_item_collection.find_one(
        {
            "link": "https://www.oogtv.nl/2021/01/uitslaande-brand-in-boerderij-in-ulgersmaborg/?utm_source=rss&utm_medium=rss&utm_campaign=uitslaande-brand-in-boerderij-in-ulgersmaborg"
        }
    )
    news_item = NewsItem.parse_obj(updated_news_item_json)
    assert len(news_item.alternate_title_links) == 1
    assert len(news_item.alternate_links) == 1
    assert len(news_item.alternate_favicons) == 1
    assert news_item.title.startswith("[Updated]")

    # ----- Next run. The last item is added with an identical link and identical title. Nothing happens.
    await refresh_all_feeds()
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert await repositories.feed_item_repository.count({}) == 9
    assert await repositories.news_item_repository.count({}) == 8
    assert user.number_of_unread_items == 8

    # ----- Next run. The item is added with similar title but with different link.
    await refresh_all_feeds()
    user = await repositories.user_repository.fetch_user_by_email(user.email_address)
    assert await repositories.feed_item_repository.count({}) == 10
    assert await repositories.news_item_repository.count({}) == 8
    assert user.number_of_unread_items == 8