def _find_extra(tag: Tag) -> str: extra_tag = tag.find('h4', {'class': 'extra'}) if extra_tag is None: return '' extra = ParserUtil.remove_children_text_from(extra_tag, extra_tag.text) extra = VeraParser._add_sup_text_from_text(extra_tag, extra) return ParserUtil.sanitize_text(extra)
def _transform(venue: Venue, tag: Tag) -> Event: source = venue.source_url url = tag.find('a', {'class': 'event-link'})['href'] artist_tag = tag.find('h3', {'class': re.compile(r'artist|artist ')}) if artist_tag is not None: artist = ParserUtil.remove_children_text_from( artist_tag, artist_tag.text) artist = VeraParser._add_sup_text_from_text(artist_tag, artist) artist = ParserUtil.sanitize_text(artist) else: artist = url extra = VeraParser._find_extra(tag) extra_title = tag.find('h4', {'class': 'pretitle'}) if extra_title is not None: extra_title = f'({ParserUtil.sanitize_text(extra_title.text)})' else: extra_title = '' when_tag = tag.find('div', {'class': 'date'}) if when_tag is not None: when = ParserUtil.remove_children_text_from( when_tag, when_tag.text) when = ParserUtil.sanitize_text(when) when_time = tag.find('div', {'class': 'schedule'}).text when_time = when_time[when_time.find('start: ') + 7:when_time.find('start: ') + 12] when_date: datetime = dateparser.parse( f'{when} {when_time}{venue.timezone_short}', languages=['nl']) else: when_date = datetime.min image_url = tag.find('div', {'class': 'artist-image'})['style'] image_url_end = image_url.find('\'', image_url.find('https') + 4) image_url = image_url[image_url.find('https'):image_url_end] when_date = when_date if when_date is not None else datetime.now() return Event( url=url, title=f'{artist} {extra_title}'.strip(), description= f'{artist}{" with support" if extra != "" else ""} {extra}'.strip( ), venue=venue, source=source, date_published=datetime.now(), when=when_date, image_url=image_url)
def parse(self, parsing_context: ParsingContext) -> List[Event]: venue = parsing_context.venue source = venue.source_url content = json.loads(parsing_context.content) results = [] for day in content: events = [ event for event in day['events'] if event['type'] == 'event' ] for event in events: description = MelkwegParser._make_description(event) date = datetime.fromtimestamp( int(event['date']), pytz.timezone("Europe/Amsterdam")) title = event['name'] image_url = f'https://s3-eu-west-1.amazonaws.com/static.melkweg.nl/uploads/images/' \ f'scaled/agenda_thumbnail/{event["thumbnail"]}' url = f'https://www.melkweg.nl/nl/agenda/{event["slug"]}' results.append( Event(url=url, title=title, description=ParserUtil.sanitize_text( description[:1400]), venue=venue, source=source, date_published=datetime.now(), when=date, image_url=image_url)) logging.getLogger(__name__).info('parsed %d events melkweg', len(results)) return results
def _transform(venue: Venue, data: Dict) -> Event: source = venue.source_url tz_short = venue.timezone_short url = f'https://www.paradiso.nl/en/program/{data["slug"]}/{data["id"]}' title = data['title'] description = data['subtitle'] description = description if ParserUtil.not_empty( description) else title when_format = f'{data["start_date_time"]}{tz_short}' when = dateparser.parse(when_format, languages=['en']) return Event(url=url, title=title, description=description, venue=venue, source=source, date_published=datetime.now(), when=when)
def _transform(venue: Venue, article: Tag) -> Event: source = venue.source_url base_url = venue.url url = article.a.get('href') content = article.find('div', {'class': 'program__content'}) figure = article.find('figure').img.get('data-src') date = article.find('time') title = content.h1 content_title = title.text if title.find('span') is None else \ title.text.replace(title.span.text, '') + ' - ' + title.span.text description = ParserUtil.stripped_text_or_default_if_empty( content.p, content_title) return Event(url=url, title=content_title, description=description, venue=venue, image_url=f'{base_url}{figure}', source=source, date_published=datetime.now(), when=datetime.fromisoformat(date.get('datetime')))
def _transform(venue: Venue, data: Dict) -> Event: source = venue.source_url tz_short = venue.timezone_short url = data['link'] title = data['title'] image_url = data['image'] description = data['subtitle'] description = description if ParserUtil.not_empty( description) else title when_format = f'{data["day"]} {data["month"]} {data["year"]} 00:00{tz_short}' when = dateparser.parse(when_format, languages=['nl']) return Event(url=url, title=title, description=description, venue=venue, image_url=image_url, source=source, date_published=datetime.now(), when=when)
def _add_sup_text_from_text(parent_tag: Tag, text: str) -> str: sup = parent_tag.find('sup') return f'{text} ({sup.text})' if ParserUtil.has_non_empty_text( sup) else text