def _parse_row(self, row): starts_at = parsers.date_time_year(row[1].text_content(), row[2].text_content()) title_main = row[3].text_content() title_orig = row[4].text_content() # TODO scrape tags according to new implementation of tags # presented in https://github.com/honzajavorek/zitkino.cz/issues/97 tags = [ self.tags_map.get(t) for t in (row[5].text_content(), row[6].text_content()) ] url_booking = row[8].link() return Showtime( cinema=cinema, film_scraped=ScrapedFilm( title_main_scraped=title_main, title_orig_scraped=title_orig, ), starts_at=starts_at, tags={tag: None for tag in tags if tag}, url=self.url, url_booking=url_booking, )
def _parse_row(self, row): starts_at = parsers.date_time_year(row[1].text_content(), row[2].text_content()) title_main = row[3].text_content() title_orig = row[4].text_content() tags = [ self.tags_map.get(t) for t in (row[5].text_content(), row[6].text_content()) ] url_booking = row[8].link() price = parsers.price(row[7].text_content()) return Showtime( cinema=cinema, film_scraped=ScrapedFilm( title_main=title_main, titles=[title_main, title_orig], ), starts_at=starts_at, tags=tags, url_booking=url_booking, price=price, )
def _parse_row(self, row, subrow, tags=None): elements = self._parse_subrow(subrow) title_el = elements.get('title') if title_el is None: return None title_main = title_el.text_content() if title_main in self.title_blacklist: return None starts_at = parsers.date_time_year( row.cssselect('.film_table_datum')[0].text_content(), subrow.cssselect('.cas')[0].text_content(), ) booking_el = elements.get('booking') url_booking = booking_el.link() if booking_el is not None else None tags = tags or [] tag_el = elements.get('tag') if tag_el is not None: tags.append(self.tags_map.get(tag_el.text_content())) return Showtime( cinema=cinema, film_scraped=ScrapedFilm( title_main=title_main, titles=[title_main], ), starts_at=starts_at, tags=tags, url_booking=url_booking, )
def _parse_row(self, row): starts_at = parsers.date_time_year( row[1].text_content(), row[2].text_content() ) title_main = row[3].text_content() title_orig = row[4].text_content() # TODO scrape tags according to new implementation of tags # presented in https://github.com/honzajavorek/zitkino.cz/issues/97 tags = [self.tags_map.get(t) for t in (row[5].text_content(), row[6].text_content())] url_booking = row[8].link() return Showtime( cinema=cinema, film_scraped=ScrapedFilm( title_main_scraped=title_main, title_orig_scraped=title_orig, ), starts_at=starts_at, tags={tag: None for tag in tags if tag}, url=self.url, url_booking=url_booking, )
def _parse_row(self, row): starts_at = parsers.date_time_year( row[1].text_content(), row[2].text_content() ) title_main = row[3].text_content() title_orig = row[4].text_content() tags = [self.tags_map.get(t) for t in (row[5].text_content(), row[6].text_content())] url_booking = row[8].link() price = parsers.price(row[7].text_content()) return Showtime( cinema=cinema, film_scraped=ScrapedFilm( title_main=title_main, titles=[title_main, title_orig], ), starts_at=starts_at, tags=tags, url_booking=url_booking, price=price, )
def _parse_row(self, row, subrow, tags=None): elements = self._parse_subrow(subrow) title_el = elements.get('title') if title_el is None: return None title_main = title_el.text_content() if title_main in self.title_blacklist: return None url = title_el.link() starts_at = parsers.date_time_year( row.cssselect('.film_table_datum')[0].text_content(), subrow.cssselect('.cas')[0].text_content(), ) booking_el = elements.get('booking') url_booking = booking_el.link() if booking_el is not None else None tags = {tag: self.tags[tag] for tag in (tags or [])} tag_el = elements.get('tag') if tag_el is not None: tags.update([self._parse_tag(tag_el)]) return Showtime( cinema=cinema, film_scraped=ScrapedFilm( title_main_scraped=title_main, url=url, ), starts_at=starts_at, tags=tags, url=self.url, url_booking=url_booking, )