def _find_extra(tag: Tag) -> str:
     extra_tag = tag.find("h4", {"class": "extra"})
     if extra_tag is None:
         return ""
     extra = ParserUtil.remove_children_text_from(extra_tag, extra_tag.text)
     extra = VeraParser._add_sup_text_from_text(extra_tag, extra)
     return ParserUtil.sanitize_text(extra)
Exemple #2
0
    def _transform(venue: Venue, data: Dict,
                   parsing_context: ParsingContext) -> Event:
        parsing_context.currently_parsing = data
        source = venue.source_url
        paradiso_url = f"https://api.paradiso.nl/api/library/lists/events/{data['id']}?lang=en"
        title = data["title"]
        description = data["subtitle"]
        description = description if ParserUtil.not_empty(
            description) else title
        when_format = f'{data["start_date_time"]}'

        when = dateparser.parse(
            when_format,
            languages=["en"],
            settings={
                "TIMEZONE": venue.timezone,
                "RETURN_AS_TIMEZONE_AWARE": True
            }) or datetime.now(tz=pytz.timezone(venue.timezone))

        return Event(
            url=paradiso_url,
            title=title,
            description=description,
            date_published=datetime.now(),
            venue_timezone=venue.timezone,
            venue_id=venue.venue_id,
            venue_name=venue.name,
            venue_url=venue.url,
            venue_country=venue.country,
            venue_city=venue.city,
            venue_short_name=venue.short_name,
            source=source,
            when=when,
        )
    def _transform(venue: Venue, article: Tag,
                   parsing_context: ParsingContext) -> Event:
        parsing_context.currently_parsing = article
        source = venue.source_url
        base_url = venue.url
        url = article.a.get("href")
        content = article.find("div", {"class": "program__content"})
        figure = article.find("figure").img.get("data-src") if article.find(
            "figure").img else None
        date = datetime.fromtimestamp(int(article["data-datetime"]),
                                      tz=timezone(venue.timezone))
        title = content.h1
        content_title = (title.text if title.find("span") is None else
                         title.text.replace(title.span.text, "") + " - " +
                         title.span.text)
        description = ParserUtil.stripped_text_or_default_if_empty(
            content.p, content_title)

        return Event(
            url=url,
            title=content_title,
            description=description,
            venue_timezone=venue.timezone,
            venue_id=venue.venue_id,
            venue_name=venue.name,
            venue_url=venue.url,
            venue_city=venue.city,
            venue_country=venue.country,
            venue_short_name=venue.short_name,
            image_url=f"{base_url}{figure}",
            source=source,
            date_published=datetime.now(),
            when=date,
        )
    def update_event_with_details(self, event: Event, additional_details: str) -> Event:
        soup = BeautifulSoup(additional_details, features="html.parser")
        image_url = None
        when_date = None
        if soup.find("meta", {"name": "twitter:image"}) is not None:
            image_url = soup.find("meta", {"name": "twitter:image"})["content"]
        if soup.find("meta", {"property": "og:image"}):
            image_url = soup.find("meta", {"property": "og:image"})["content"]
        summary_div = soup.find("div", {"class": "summary"})
        summary_item_divs = summary_div.find_all("div", {"class": "summary__item"})
        if len(summary_item_divs) == 2:
            date = summary_item_divs[0].text
            time = summary_item_divs[1].text
            date = ParserUtil.sanitize_text(date)
            time = ParserUtil.sanitize_text(time)
            time = time[time.index("Aanvang ") + 8 :]
            when_date = ParserUtil.parse_date_time_to_datetime(date, time, event.venue_timezone)

        if when_date is not None:
            event.when = when_date
        event.image_url = image_url
        return event
Exemple #5
0
    def do_parse(self, parsing_context: ParsingContext) -> List[Event]:
        venue = parsing_context.venue
        source = venue.source_url
        content = json.loads(parsing_context.content)

        results = []
        for day in content:
            events = [
                event for event in day["events"] if event["type"] == "event"
            ]
            for event in events:
                parsing_context.currently_parsing = event
                description = MelkwegParser._make_description(event)
                date = datetime.fromtimestamp(int(event["date"]),
                                              pytz.timezone(venue.timezone))
                title = event["name"]
                image_url = (
                    f"https://s3-eu-west-1.amazonaws.com/static.melkweg.nl/uploads/images/"
                    f'scaled/agenda_thumbnail/{event["thumbnail"]}')
                url = f'https://www.melkweg.nl/nl/agenda/{event["slug"]}'
                results.append(
                    Event(
                        url=url,
                        title=title,
                        description=ParserUtil.sanitize_text(
                            description[:1400]),
                        venue_timezone=venue.timezone,
                        venue_id=venue.venue_id,
                        venue_name=venue.name,
                        venue_url=venue.url,
                        venue_country=venue.country,
                        venue_city=venue.city,
                        venue_short_name=venue.short_name,
                        source=source,
                        date_published=datetime.now(),
                        when=date,
                        image_url=image_url,
                    ))
        return results
    def _transform(venue: Venue, tag: Tag,
                   parsing_context: ParsingContext) -> Event:
        parsing_context.currently_parsing = tag
        source = venue.source_url
        vera_url = tag.find("a", {"class": "event-link"})["href"]
        artist_tag = tag.find("h3", {"class": re.compile(r"artist|artist ")})
        if artist_tag is not None:
            artist = ParserUtil.remove_children_text_from(
                artist_tag, artist_tag.text)
            artist = VeraParser._add_sup_text_from_text(artist_tag, artist)
            artist = ParserUtil.sanitize_text(artist)
        else:
            artist = vera_url

        extra = VeraParser._find_extra(tag)

        extra_title = tag.find("h4", {"class": "pretitle"})
        if extra_title is not None:
            extra_title = f"({ParserUtil.sanitize_text(extra_title.text)})"
        else:
            extra_title = ""

        when_tag = tag.find("div", {"class": "date"})
        if when_tag is not None:
            when = ParserUtil.remove_children_text_from(
                when_tag, when_tag.text)
            when = ParserUtil.sanitize_text(when)
            when_time = tag.find("div", {"class": "schedule"}).text
            when_time = when_time[when_time.find("start: ") +
                                  7:when_time.find("start: ") + 12]
            when_date: Optional[datetime] = dateparser.parse(
                f"{when} {when_time}",
                languages=["nl"],
                settings={
                    "TIMEZONE": venue.timezone,
                    "RETURN_AS_TIMEZONE_AWARE": True
                },
            )
            if when_date is not None and when_date < (datetime.now(
                    pytz.timezone(venue.timezone)) - relativedelta(days=100)):
                when_date = when_date + relativedelta(years=1)
        image_url = tag.find("div", {"class": "artist-image"})["style"]
        image_url_end = image_url.find("'", image_url.find("https") + 4)
        image_url = image_url[image_url.find("https"):image_url_end]

        when_date = when_date if when_date is not None else datetime.now(
            pytz.timezone(venue.timezone))

        return Event(
            url=vera_url,
            title=f"{artist} {extra_title}".strip(),
            description=
            f'{artist}{" with support" if extra != "" else ""} {extra}'.strip(
            ),
            venue_timezone=venue.timezone,
            venue_id=venue.venue_id,
            venue_name=venue.name,
            venue_url=venue.url,
            venue_country=venue.country,
            venue_city=venue.city,
            venue_short_name=venue.short_name,
            source=source,
            date_published=datetime.now(),
            when=when_date,
            image_url=image_url,
        )
 def _add_sup_text_from_text(parent_tag: Tag, text: str) -> str:
     sup = parent_tag.find("sup")
     return f"{text} ({sup.text})" if ParserUtil.has_non_empty_text(
         sup) else text