def _get_season_subtitles(self, show_id, season, sub_format): params = { 'apikey': self.apikey, 'show_id': show_id, 'q': 'Stagione %d' % season, 'version': sub_format } r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning( 'Subtitles for season not found, try with rip suffix') params['version'] = sub_format + 'rip' r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles for season not found') return [] subs = [] # Looking for subtitles in first page for subtitle in root.findall('data/subtitles/subtitle'): if 'stagione %d' % season in subtitle.find('name').text.lower(): logger.debug('Found season zip id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) content = self._download_zip(int(subtitle.find('id').text)) if not is_zipfile(io.BytesIO(content)): # pragma: no cover if 'limite di download' in content: raise TooManyRequests() else: raise ConfigurationError('Not a zip file: %r' % content) with ZipFile(io.BytesIO(content)) as zf: episode_re = re.compile('s(\d{1,2})e(\d{1,2})') for index, name in enumerate(zf.namelist()): match = episode_re.search(name) if not match: # pragma: no cover logger.debug('Cannot decode subtitle %r', name) else: sub = ItaSASubtitle( int(subtitle.find('id').text), subtitle.find('show_name').text, int(match.group(1)), int(match.group(2)), None, None, None, name) sub.content = fix_line_ending(zf.read(name)) subs.append(sub) return subs
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'search.php', params=params, timeout=10) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) suggestion = None # get the suggestion try: suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' '), default_characters=self.sanitize_characters) == \ sanitize(series_year, default_characters=self.sanitize_characters): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id finally: if suggestion: suggestion.decompose() soup.decompose() soup = None
def query(self, series, season, episode, video_format, resolution, country=None): # To make queries you need to be logged in if not self.logged_in: # pragma: no cover raise ConfigurationError('Cannot query if not logged in') # get the show id show_id = self.get_show_id(series, country) if show_id is None: logger.error('No show id found for %r ', series) return [] # get the page of the season of the show logger.info( 'Getting the subtitle of show id %d, season %d episode %d, format %r', show_id, season, episode, video_format) subtitles = [] # Default format is SDTV if not video_format or video_format.lower() == 'hdtv': if resolution in ('1080i', '1080p', '720p'): sub_format = resolution else: sub_format = 'normale' else: sub_format = video_format.lower() # Look for year params = {'apikey': self.apikey} r = self.session.get(self.server_url + 'shows/' + str(show_id), params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) year = root.find('data/show/started').text if year: year = int(year.split('-', 1)[0]) tvdb_id = root.find('data/show/id_tvdb').text if tvdb_id: tvdb_id = int(tvdb_id) params = { 'apikey': self.apikey, 'show_id': show_id, 'q': '{0}x{1:02}'.format(season, episode), 'version': sub_format } r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles not found, try with rip suffix') params['version'] = sub_format + 'rip' r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles not found, go season mode') # If no subtitle are found for single episode try to download all season zip subs = self._get_season_subtitles(show_id, season, sub_format) if subs: for subtitle in subs: subtitle.format = video_format subtitle.year = year subtitle.tvdb_id = tvdb_id return subs else: return [] # Looking for subtitles in first page for subtitle in root.findall('data/subtitles/subtitle'): if '{0}x{1:02}'.format( season, episode) in subtitle.find('name').text.lower(): logger.debug('Found subtitle id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) sub = ItaSASubtitle(int(subtitle.find('id').text), subtitle.find('show_name').text, season, episode, video_format, year, tvdb_id, subtitle.find('name').text) subtitles.append(sub) # Not in the first page of result try next (if any) next_page = root.find('data/next') while next_page.text is not None: # pragma: no cover r = self.session.get(next_page.text, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) logger.info('Loading subtitles page %r', root.data.page.text) # Looking for show in following pages for subtitle in root.findall('data/subtitles/subtitle'): if '{0}x{1:02}'.format( season, episode) in subtitle.find('name').text.lower(): logger.debug('Found subtitle id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) sub = ItaSASubtitle(int(subtitle.find('id').text), subtitle.find('show_name').text, season, episode, video_format, year, tvdb_id, subtitle.find('name').text) subtitles.append(sub) next_page = root.find('data/next') # Download the subs found, can be more than one in zip additional_subs = [] for sub in subtitles: # open the zip content = self._download_zip(sub.sub_id) if not is_zipfile(io.BytesIO(content)): # pragma: no cover if 'limite di download' in content: raise TooManyRequests() else: raise ConfigurationError( 'Not a zip file: {!r}'.format(content)) with ZipFile(io.BytesIO(content)) as zf: if len(zf.namelist()) > 1: # pragma: no cover for index, name in enumerate(zf.namelist()): if index == 0: # First element sub.content = fix_line_ending(zf.read(name)) sub.full_data = name else: add_sub = copy.deepcopy(sub) add_sub.content = fix_line_ending(zf.read(name)) add_sub.full_data = name additional_subs.append(add_sub) else: sub.content = fix_line_ending(zf.read(zf.namelist()[0])) sub.full_data = zf.namelist()[0] return subtitles + additional_subs