예제 #1
0
 def _find_extra(tag: Tag) -> str:
     extra_tag = tag.find('h4', {'class': 'extra'})
     if extra_tag is None:
         return ''
     extra = ParserUtil.remove_children_text_from(extra_tag, extra_tag.text)
     extra = VeraParser._add_sup_text_from_text(extra_tag, extra)
     return ParserUtil.sanitize_text(extra)
예제 #2
0
    def _transform(venue: Venue, tag: Tag) -> Event:
        source = venue.source_url
        url = tag.find('a', {'class': 'event-link'})['href']
        artist_tag = tag.find('h3', {'class': re.compile(r'artist|artist ')})
        if artist_tag is not None:
            artist = ParserUtil.remove_children_text_from(
                artist_tag, artist_tag.text)
            artist = VeraParser._add_sup_text_from_text(artist_tag, artist)
            artist = ParserUtil.sanitize_text(artist)
        else:
            artist = url

        extra = VeraParser._find_extra(tag)

        extra_title = tag.find('h4', {'class': 'pretitle'})
        if extra_title is not None:
            extra_title = f'({ParserUtil.sanitize_text(extra_title.text)})'
        else:
            extra_title = ''

        when_tag = tag.find('div', {'class': 'date'})
        if when_tag is not None:
            when = ParserUtil.remove_children_text_from(
                when_tag, when_tag.text)
            when = ParserUtil.sanitize_text(when)
            when_time = tag.find('div', {'class': 'schedule'}).text
            when_time = when_time[when_time.find('start: ') +
                                  7:when_time.find('start: ') + 12]
            when_date: datetime = dateparser.parse(
                f'{when} {when_time}{venue.timezone_short}', languages=['nl'])
        else:
            when_date = datetime.min
        image_url = tag.find('div', {'class': 'artist-image'})['style']
        image_url_end = image_url.find('\'', image_url.find('https') + 4)
        image_url = image_url[image_url.find('https'):image_url_end]

        when_date = when_date if when_date is not None else datetime.now()

        return Event(
            url=url,
            title=f'{artist} {extra_title}'.strip(),
            description=
            f'{artist}{" with support" if extra != "" else ""} {extra}'.strip(
            ),
            venue=venue,
            source=source,
            date_published=datetime.now(),
            when=when_date,
            image_url=image_url)
예제 #3
0
    def parse(self, parsing_context: ParsingContext) -> List[Event]:
        venue = parsing_context.venue
        source = venue.source_url
        content = json.loads(parsing_context.content)

        results = []
        for day in content:
            events = [
                event for event in day['events'] if event['type'] == 'event'
            ]
            for event in events:
                description = MelkwegParser._make_description(event)
                date = datetime.fromtimestamp(
                    int(event['date']), pytz.timezone("Europe/Amsterdam"))
                title = event['name']
                image_url = f'https://s3-eu-west-1.amazonaws.com/static.melkweg.nl/uploads/images/' \
                    f'scaled/agenda_thumbnail/{event["thumbnail"]}'
                url = f'https://www.melkweg.nl/nl/agenda/{event["slug"]}'
                results.append(
                    Event(url=url,
                          title=title,
                          description=ParserUtil.sanitize_text(
                              description[:1400]),
                          venue=venue,
                          source=source,
                          date_published=datetime.now(),
                          when=date,
                          image_url=image_url))
        logging.getLogger(__name__).info('parsed %d events melkweg',
                                         len(results))
        return results
예제 #4
0
    def _transform(venue: Venue, data: Dict) -> Event:
        source = venue.source_url
        tz_short = venue.timezone_short
        url = f'https://www.paradiso.nl/en/program/{data["slug"]}/{data["id"]}'
        title = data['title']
        description = data['subtitle']
        description = description if ParserUtil.not_empty(
            description) else title
        when_format = f'{data["start_date_time"]}{tz_short}'

        when = dateparser.parse(when_format, languages=['en'])

        return Event(url=url,
                     title=title,
                     description=description,
                     venue=venue,
                     source=source,
                     date_published=datetime.now(),
                     when=when)
예제 #5
0
    def _transform(venue: Venue, article: Tag) -> Event:
        source = venue.source_url
        base_url = venue.url
        url = article.a.get('href')
        content = article.find('div', {'class': 'program__content'})
        figure = article.find('figure').img.get('data-src')
        date = article.find('time')
        title = content.h1
        content_title = title.text if title.find('span') is None else \
            title.text.replace(title.span.text, '') + ' - ' + title.span.text
        description = ParserUtil.stripped_text_or_default_if_empty(
            content.p, content_title)

        return Event(url=url,
                     title=content_title,
                     description=description,
                     venue=venue,
                     image_url=f'{base_url}{figure}',
                     source=source,
                     date_published=datetime.now(),
                     when=datetime.fromisoformat(date.get('datetime')))
예제 #6
0
    def _transform(venue: Venue, data: Dict) -> Event:
        source = venue.source_url
        tz_short = venue.timezone_short
        url = data['link']
        title = data['title']
        image_url = data['image']
        description = data['subtitle']
        description = description if ParserUtil.not_empty(
            description) else title
        when_format = f'{data["day"]} {data["month"]} {data["year"]} 00:00{tz_short}'

        when = dateparser.parse(when_format, languages=['nl'])

        return Event(url=url,
                     title=title,
                     description=description,
                     venue=venue,
                     image_url=image_url,
                     source=source,
                     date_published=datetime.now(),
                     when=when)
예제 #7
0
 def _add_sup_text_from_text(parent_tag: Tag, text: str) -> str:
     sup = parent_tag.find('sup')
     return f'{text} ({sup.text})' if ParserUtil.has_non_empty_text(
         sup) else text