Beispiel #1
0
    def test_parse(self):
        venue = OostGroningenProcessor.create_venue()
        content = fetch(venue.url)
        results = OostGroningenParser().parse(
            ParsingContext(venue=venue, content=content))
        assert_that(len(results), equal_to(8))

        event = results[0]
        assert_that(
            event.title,
            equal_to('HOMOOST • Movie Night: Party Monster the Shockumentary'))
        assert_that(event.description,
                    equal_to('Movie Screening • Group Discussion'))
        assert_that(event.when, is_not(none()))
        assert_that(
            event.url,
            equal_to('https://www.facebook.com/events/610421539383220/'))
        assert_that(
            event.image_url,
            equal_to(
                'https://www.komoost.nl/media/56721601_1992667177522931_8267801960216788992_o.jpg'
            ))
        assert_that(event.venue, equal_to(venue))
        assert_that(event.source, equal_to('https://www.komoost.nl'))
        assert_that(event.event_id, is_not(none()))
        assert_that(event.date_published, is_not(none()))

        [assert_that(event.when, is_not(none)) for event in results]
        [assert_that(event.description, is_not(none())) for event in results]
        [assert_that(event.title, is_not(none())) for event in results]
        [assert_that(event.url, is_not(none())) for event in results]
    def test_sample_file_page_1(self):
        venue = ParadisoProcessor.create_venue()
        parser = ParadisoParser()
        data = fetch(f'{venue.source_url}/page=1')

        results = parser.parse(ParsingContext(venue=venue, content=data))
        assert_that(len(results), equal_to(30))
        event = results[0]

        assert_that(
            event.url,
            equal_to('https://www.paradiso.nl/en/program/giant-rooks/54827'))
        assert_that(event.venue, equal_to(venue))
        assert_that(event.title, equal_to("Giant Rooks"))
        assert_that(event.description,
                    equal_to("Aanstormende Duitse indiepopband"))
        assert_that(event.when, is_not(none()))
        assert_that(event.image_url, none())
        assert_that(event.date_published, is_not(none()))
        assert_that(event.source, equal_to('https://www.paradiso.nl/'))

        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))
Beispiel #3
0
    def test_sample_file(self):
        venue = TivoliProcessor.create_venue()
        parser = TivoliParser()
        data = fetch(f'{venue.url}/page=1')
        results = parser.parse(ParsingContext(venue=venue, content=data))
        assert_that(len(results), equal_to(30))
        event = [
            result for result in results
            if result.title == "Leuk Dat Je d'r Bent Band"
        ][0]

        assert_that(
            event.url,
            equal_to(
                'https://www.tivolivredenburg.nl/agenda/leuk-dat-je-dr-bent-band-27-04-2019/'
            ))
        assert_that(event.venue, equal_to(venue))
        assert_that(event.title, equal_to("Leuk Dat Je d'r Bent Band"))
        assert_that(event.when, is_not(none()))
        assert_that(
            event.image_url,
            equal_to(
                'https://www.tivolivredenburg.nl/wp-content/uploads/2019/03/dezegebruikenleuk-195x130.jpg'
            ))
        assert_that(event.description, equal_to("met EK '88 thema!"))
        assert_that(event.date_published, is_not(none()))
        assert_that(event.source,
                    equal_to('https://www.tivolivredenburg.nl/agenda/'))

        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))
Beispiel #4
0
 def melkweg_observer(self, observer: Observer, _) -> Observer:
     parser = MelkwegParser()
     data = fetch(self.scrape_url)
     observer.on_next(
         parser.parse(ParsingContext(venue=self.venue, content=data)))
     observer.on_completed()
     return observer
Beispiel #5
0
    def test_sample_file(self):
        venue = SpotProcessor.create_venue()
        data = fetch(venue.url)
        results = self.parser.parse(ParsingContext(venue=venue, content=data))
        assert_that(results, is_not(none()))
        assert_that(len(results), equal_to(58))
        kamagurka = [item for item in results if item.url == 'https://www.spotgroningen.nl/programma/kamagurka/']
        assert_that(len(kamagurka), equal_to(1))
        assert_that(kamagurka[0].source, equal_to('https://www.spotgroningen.nl/programma'))
        assert_that(kamagurka[0].description, equal_to('De overtreffende trap van absurditeit'))
        assert_that(kamagurka[0].date_published, is_not(none()))
        assert_that(kamagurka[0].image_url,
                    equal_to('https://www.spotgroningen.nl/wp-content/uploads/2019/02/'
                             'Kamagurka-20-20De-20grenzen-20van-20de-20ernst-20'
                             'Kamagurka-202-20300-20dpi-20RGB-150x150.jpg'))
        assert_that(kamagurka[0].title, equal_to('Kamagurka - De grenzen van de ernst'))
        assert_that(kamagurka[0].when, is_not(none()))
        assert_that(kamagurka[0].url, equal_to('https://www.spotgroningen.nl/programma/kamagurka/'))
        assert_that(kamagurka[0].event_id, is_not(none()))
        assert_that(kamagurka[0].venue, equal_to(venue))

        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))
    def test_parse_sample(self):
        venue = SimplonProcessor.create_venue()
        parser = SimplonParser()
        data = fetch(venue.url)
        results = parser.parse(ParsingContext(venue=venue, content=data))
        assert_that(len(results), equal_to(29))
        event = results[0]
        assert_that(event.title, equal_to('Foxlane + Car Pets'))
        assert_that(event.venue, equal_to(venue))
        assert_that(event.description, equal_to('Simplon UP'))
        assert_that(event.url,
                    equal_to('http://simplon.nl/?post_type=events&p=17602'))
        assert_that(
            event.image_url,
            equal_to(
                'https://simplon.nl/content/uploads/2019/03/FOXLANE-MAIN-PRESS-PHOTO-600x600.jpg'
            ))
        assert_that(event.when, is_not(none()))
        assert_that(event.source, equal_to('https://www.simplon.nl'))
        assert_that(event.date_published, is_not(none()))

        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))
Beispiel #7
0
    def simplon_observer(self, observer: Observer, _) -> Disposable:
        parser = SimplonParser()
        data = fetch(self.scrape_url)
        events = parser.parse(ParsingContext(venue=self.venue, content=data))

        observer.on_next(events)
        observer.on_completed()
        return observer
    def test_raw_fetches(self):
        venue = VeraProcessor.create_venue()
        parser = VeraParser()
        data = fetch(f'{venue.url}/page=1')
        results = parser.parse(ParsingContext(venue=venue, content=data))
        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))

        data = fetch(f'{venue.url}/page=2')
        results = parser.parse(ParsingContext(venue=venue, content=data))
        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))
Beispiel #9
0
 def parse_all_at_once_observable(
     observer: Observer,
     parser: Parser,
     venue: Venue,
     scrape_url: str,
 ) -> Observer:
     data = fetch(scrape_url)
     events = parser.parse(ParsingContext(venue=venue, content=data))
     observer.on_next(events)
     observer.on_completed()
     return observer
    def test_sample_file_page_2(self):
        venue = ParadisoProcessor.create_venue()
        parser = ParadisoParser()
        data = fetch(f'{venue.source_url}/page=2')

        results = parser.parse(ParsingContext(venue=venue, content=data))
        assert_that(len(results), equal_to(1))

        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))
Beispiel #11
0
    def test_small_sample(self):
        venue = MelkwegProcessor.create_venue()
        data = fetch(venue.source_url)
        events = MelkwegParser().parse(ParsingContext(venue, data))
        assert_that(len(events), equal_to(46))

        for event in events:
            assert_that(event.title, is_not(none()))
            assert_that(event.description, is_not(none()))
            assert_that(event.image_url, is_not(none()))
            assert_that(event.source, is_not(none()))
            assert_that(event.when, is_not(none()))
            assert_that(event.url, is_not(none()))
            assert_that(event.is_valid(), equal_to(True))
Beispiel #12
0
    def vera_observer(self, observer: Observer, _) -> Disposable:
        vera_parser = VeraParser()

        items_per_page = 20
        page_index = 0
        done = False
        while not done:
            page_index += 1
            url = self.scrape_url.format(page_index, items_per_page)
            data = fetch(url)
            new_events = vera_parser.parse(ParsingContext(venue=self.venue, content=data))
            observer.on_next(new_events)
            done = len(new_events) < items_per_page
        observer.on_completed()
        return observer
Beispiel #13
0
    def parse_page_indexed_observable(observer: Observer, parser: Parser,
                                      venue: Venue, scrape_url_format: str,
                                      items_per_page: int) -> Observer:

        page_index = 0
        done = False

        while not done:
            page_index += 1
            data = fetch(scrape_url_format.format(page_index))
            new_events = parser.parse(ParsingContext(venue=venue,
                                                     content=data))
            observer.on_next(new_events)
            done = len(new_events) < items_per_page
        observer.on_completed()
        return observer
Beispiel #14
0
    def test_parse(self):

        content = fetch('https://www.melkweg.nl/large-json')
        venue = MelkwegProcessor.create_venue()
        parser = MelkwegParser()
        results = parser.parse(ParsingContext(venue=venue, content=content))
        assert_that(len(results), equal_to(378))
        inna_event = [
            r for r in results if r.title == 'Inna de Yard feat. Horace Andy'
        ][0]
        assert_that(inna_event.title,
                    equal_to('Inna de Yard feat. Horace Andy'))
        assert_that(
            inna_event.description,
            equal_to(
                'Inna de Yard is het resultaat van een historische ontmoeting van twee generaties Jamaicaanse '
                'zangers en muzikanten tijdens traditionele akoestische jamsessies \'inna de yard\'. Levende legendes '
                'van de gouden jaren van de rootsreggae als Horace Andy, Ken Boothe en Cedric Myton werken samen met '
                'jong talent van het eiland en blazen zo de originele essentie van \'jamrock\' nieuw leven in. '
                'Na het succes van het eerste album in 2017 en enkele geweldige concerten in Parijs, is er nu een '
                'vervolg met een nieuw album en een film. De nieuwe tour is nu al legendarisch en gelukkig slaan '
                'ze Amsterdam niet over!\xa0'))
        assert_that(
            inna_event.image_url,
            equal_to(
                'https://s3-eu-west-1.amazonaws.com/static.melkweg.nl/uploads/images/scaled/agenda_thumbnail/25520'
            ))
        assert_that(inna_event.source,
                    equal_to('https://www.melkweg.nl/agenda'))
        assert_that(
            inna_event.url,
            equal_to(
                'https://www.melkweg.nl/nl/agenda/inna-da-yard-13-06-2019'))
        assert_that(
            inna_event.when,
            equal_to(datetime.fromisoformat('2019-06-13T19:30:00+02:00')))

        for event in results:
            assert_that(event.title, is_not(none()))
            assert_that(event.description, is_not(none()))
            assert_that(event.image_url, is_not(none()))
            assert_that(event.source, is_not(none()))
            assert_that(event.when, is_not(none()))
            assert_that(event.url, is_not(none()))
    def test_sample_file(self):
        venue = VeraProcessor.create_venue()
        parser = VeraParser()
        data = fetch(f'{venue.url}/page=1')

        results = parser.parse(ParsingContext(venue=venue, content=data))
        assert_that(len(results), equal_to(20))
        event = results[0]
        assert_that(
            event.url,
            equal_to(
                'http://www.vera-groningen.nl/?post_type=events&p=98899&lang=nl'
            ))
        assert_that(event.venue, equal_to(venue))
        assert_that(event.title,
                    equal_to('Beyond Hip Hop (STUDIUM GENERALE PRESENTS)'))
        assert_that(event.when, is_not(none()))
        assert_that(
            event.image_url,
            equal_to(
                'https://www.vera-groningen.nl/content/uploads/2019/03/rich-medina-website2-360x250.jpg'
            ))
        assert_that(
            event.description,
            equal_to('Beyond Hip Hop with support A Lecture By Rich Medina'))
        assert_that(event.date_published, is_not(none()))
        assert_that(event.source,
                    equal_to('https://www.vera-groningen.nl/programma/'))

        event = results[2]
        assert_that(
            event.description,
            equal_to('Marissa Nadler (USA) with support Klaske Oenema (NL)'))
        assert_that(event.title, equal_to('Marissa Nadler (USA)'))

        for event in results:
            assert_that(event.when, is_not(none))
            assert_that(event.description, is_not(none))
            assert_that(event.title, is_not(none))
            assert_that(event.url, is_not(none))