def parse_watchlist_page(self, data): """ Return a generator of parsed movies from watchlist """ items = data.xpath('//*[@id="itemList"]/li[@class="item"]') id_pattern = re.compile(r'(?P<id>\d+)') year_pattern = re.compile(ur'\((?P<year>\d{4})') for item in items: info = item.find('.//div[@class="info"]') if info is not None: link = info.find('.//a[@class="name"]') id_match = id_pattern.findall(link.get('href')) title_rus = link.text info_span = info.find('.//span[1]') title = info_span.text year_match = year_pattern.findall(title) if title.index('('): title = title[0:title.index('(')-1] else: title = '' movie = { 'id': id_match[0], 'title': title, 'title_rus': title_rus, 'link': link.get('href'), 'year': year_match[0], } movie.update({'slug': slugify(movie['title'])}) yield movie
def __init__(self, provider, pid, name, slug=None, person_type=ACTOR_TYPE, name_rus=None, poster=None): parser = ParserFactory.get_parser(provider) if isinstance(parser, IMDBParser): self.imdb_id = pid elif isinstance(parser, KinopoiskParser): self.kp_id = pid else: raise TypeError('Parser %s does not supported' % type(parser)) self.name = name if slug is None: self.slug = slugify(self.name) else: self.slug = slug if not name_rus is None: self.name_rus = name_rus self.person_type = person_type self.status = Person.ACTIVE_STATUS if not person_type is None: self.poster = poster
def parse_voted_movies(self, data): """ Return a generator of parsed movies from voted list """ items = data.xpath('//div[@class="profileFilmsList"]/div') id_pattern = re.compile(r'(?P<id>\d+)') year_pattern = re.compile(ur'(?P<year>\d{4})') serial_pattern = re.compile(ur'сериал') rating_pattern = re.compile(r'rating: \'(?P<id>\d+)\'') for item in items: if 'item' in item.get('class'): link = item.find('.//div[@class="nameRus"]/a').get('href') id_match = id_pattern.findall(link) text = item.find('.//div[@class="nameRus"]/a').text year_match = year_pattern.findall(text) serial_match = serial_pattern.findall(text) rating = rating_pattern.findall(item.find('.//script').text) movie = { 'id': id_match[0], 'title': item.find('.//div[@class="nameEng"]').text, 'title_rus': text[:text.index(' (')], 'link': link, 'year': year_match[0], 'vote': int(rating[0]), } movie.update({'slug': slugify(movie['title'])}) if serial_match: movie.update({'serial': True}) yield movie
def __init__(self, title, title_rus=None, slug=None): self.title = title if not title_rus is None: self.title_rus = title_rus if slug is None: self.slug = slugify(self.title) else: self.slug = slug
def __init__(self, provider, mid, title, year, rating=0.0, title_rus=None, slug=None, poster=None, is_series=False, status=ACTIVE_STATUS, world_premiere=None, rus_premiere=None, dvd_premiere=None): if not isinstance(provider, (IMDBParser, KinopoiskParser)): parser = ParserFactory.get_parser(provider) else: parser = provider if isinstance(parser, IMDBParser): self.imdb_id = mid self.imdb_rating = rating elif isinstance(parser, KinopoiskParser): self.kp_id = mid self.kp_rating = rating else: raise TypeError('Parser %s does not supported' % type(parser)) self.title = title self.year = year self.status = status if not title_rus is None: self.title_rus = title_rus if slug is None: self.slug = slugify(self.title) if poster: self.poster = poster if is_series: self.is_series = is_series if world_premiere: self.world_premiere_date = world_premiere if rus_premiere: self.rus_premiere_date = rus_premiere if dvd_premiere: self.dvd_premiere_date = dvd_premiere