def _parse_row(self, row):
        starts_at = parsers.date_time_year(row[1].text_content(),
                                           row[2].text_content())

        title_main = row[3].text_content()
        title_orig = row[4].text_content()

        # TODO scrape tags according to new implementation of tags
        # presented in https://github.com/honzajavorek/zitkino.cz/issues/97
        tags = [
            self.tags_map.get(t)
            for t in (row[5].text_content(), row[6].text_content())
        ]

        url_booking = row[8].link()

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main_scraped=title_main,
                title_orig_scraped=title_orig,
            ),
            starts_at=starts_at,
            tags={tag: None
                  for tag in tags if tag},
            url=self.url,
            url_booking=url_booking,
        )
Ejemplo n.º 2
0
    def _parse_row(self, row):
        starts_at = parsers.date_time_year(row[1].text_content(),
                                           row[2].text_content())

        title_main = row[3].text_content()
        title_orig = row[4].text_content()

        tags = [
            self.tags_map.get(t)
            for t in (row[5].text_content(), row[6].text_content())
        ]

        url_booking = row[8].link()
        price = parsers.price(row[7].text_content())

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main=title_main,
                titles=[title_main, title_orig],
            ),
            starts_at=starts_at,
            tags=tags,
            url_booking=url_booking,
            price=price,
        )
Ejemplo n.º 3
0
    def _parse_row(self, row, subrow, tags=None):
        elements = self._parse_subrow(subrow)

        title_el = elements.get('title')
        if title_el is None:
            return None
        title_main = title_el.text_content()
        if title_main in self.title_blacklist:
            return None

        starts_at = parsers.date_time_year(
            row.cssselect('.film_table_datum')[0].text_content(),
            subrow.cssselect('.cas')[0].text_content(),
        )

        booking_el = elements.get('booking')
        url_booking = booking_el.link() if booking_el is not None else None

        tags = tags or []
        tag_el = elements.get('tag')
        if tag_el is not None:
            tags.append(self.tags_map.get(tag_el.text_content()))

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main=title_main,
                titles=[title_main],
            ),
            starts_at=starts_at,
            tags=tags,
            url_booking=url_booking,
        )
    def _parse_row(self, row):
        starts_at = parsers.date_time_year(
            row[1].text_content(),
            row[2].text_content()
        )

        title_main = row[3].text_content()
        title_orig = row[4].text_content()

        # TODO scrape tags according to new implementation of tags
        # presented in https://github.com/honzajavorek/zitkino.cz/issues/97
        tags = [self.tags_map.get(t) for t
                in (row[5].text_content(), row[6].text_content())]

        url_booking = row[8].link()

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main_scraped=title_main,
                title_orig_scraped=title_orig,
            ),
            starts_at=starts_at,
            tags={tag: None for tag in tags if tag},
            url=self.url,
            url_booking=url_booking,
        )
    def _parse_row(self, row):
        starts_at = parsers.date_time_year(
            row[1].text_content(),
            row[2].text_content()
        )

        title_main = row[3].text_content()
        title_orig = row[4].text_content()

        tags = [self.tags_map.get(t) for t
                in (row[5].text_content(), row[6].text_content())]

        url_booking = row[8].link()
        price = parsers.price(row[7].text_content())

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main=title_main,
                titles=[title_main, title_orig],
            ),
            starts_at=starts_at,
            tags=tags,
            url_booking=url_booking,
            price=price,
        )
Ejemplo n.º 6
0
    def _parse_row(self, row, subrow, tags=None):
        elements = self._parse_subrow(subrow)

        title_el = elements.get('title')
        if title_el is None:
            return None
        title_main = title_el.text_content()
        if title_main in self.title_blacklist:
            return None

        starts_at = parsers.date_time_year(
            row.cssselect('.film_table_datum')[0].text_content(),
            subrow.cssselect('.cas')[0].text_content(),
        )

        booking_el = elements.get('booking')
        url_booking = booking_el.link() if booking_el is not None else None

        tags = tags or []
        tag_el = elements.get('tag')
        if tag_el is not None:
            tags.append(self.tags_map.get(tag_el.text_content()))

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main=title_main,
                titles=[title_main],
            ),
            starts_at=starts_at,
            tags=tags,
            url_booking=url_booking,
        )
Ejemplo n.º 7
0
    def _parse_row(self, row, subrow, tags=None):
        elements = self._parse_subrow(subrow)

        title_el = elements.get('title')
        if title_el is None:
            return None
        title_main = title_el.text_content()
        if title_main in self.title_blacklist:
            return None

        url = title_el.link()

        starts_at = parsers.date_time_year(
            row.cssselect('.film_table_datum')[0].text_content(),
            subrow.cssselect('.cas')[0].text_content(),
        )

        booking_el = elements.get('booking')
        url_booking = booking_el.link() if booking_el is not None else None

        tags = {tag: self.tags[tag] for tag in (tags or [])}
        tag_el = elements.get('tag')
        if tag_el is not None:
            tags.update([self._parse_tag(tag_el)])

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main_scraped=title_main,
                url=url,
            ),
            starts_at=starts_at,
            tags=tags,
            url=self.url,
            url_booking=url_booking,
        )
Ejemplo n.º 8
0
    def _parse_row(self, row, subrow, tags=None):
        elements = self._parse_subrow(subrow)

        title_el = elements.get('title')
        if title_el is None:
            return None
        title_main = title_el.text_content()
        if title_main in self.title_blacklist:
            return None

        url = title_el.link()

        starts_at = parsers.date_time_year(
            row.cssselect('.film_table_datum')[0].text_content(),
            subrow.cssselect('.cas')[0].text_content(),
        )

        booking_el = elements.get('booking')
        url_booking = booking_el.link() if booking_el is not None else None

        tags = {tag: self.tags[tag] for tag in (tags or [])}
        tag_el = elements.get('tag')
        if tag_el is not None:
            tags.update([self._parse_tag(tag_el)])

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main_scraped=title_main,
                url=url,
            ),
            starts_at=starts_at,
            tags=tags,
            url=self.url,
            url_booking=url_booking,
        )