Esempio n. 1
0
 def login(self):
     try:
         r = self.session.post(self.server_url + 'login',
                               json={"username": self.username, "password": self.password},
                               allow_redirects=False,
                               timeout=10)
     except (ConnectionError, Timeout, ReadTimeout):
         raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r))
     else:
         if r.status_code == 200:
             try:
                 self.token = r.json()['token']
             except ValueError:
                 raise ProviderError('Invalid JSON returned by provider')
             else:
                 self.session.headers.update({'Authorization': 'Beaker ' + self.token})
                 region.set("oscom_token", self.token)
                 return True
         elif r.status_code == 401:
             raise AuthenticationError('Login failed: {}'.format(r.reason))
         elif r.status_code == 429:
             raise TooManyRequests()
         else:
             raise ProviderError('Bad status code: {}'.format(r.status_code))
     finally:
         return False
Esempio n. 2
0
    def search_titles(self, title):
        title_id = None

        parameters = {'query': title.lower()}
        logging.debug('Searching using this title: {}'.format(title))

        results = self.session.get(self.server_url + 'features',
                                   params=parameters,
                                   timeout=30)

        if results.status_code == 401:
            logging.debug(
                'Authentification failed: clearing cache and attempting to login.'
            )
            region.delete("oscom_token")
            self.login()

            results = self.session.get(self.server_url + 'features',
                                       params=parameters,
                                       timeout=30)

            if results.status_code == 429:
                raise TooManyRequests()
            elif results.status_code == 503:
                raise ProviderError(results.reason)
        elif results.status_code == 429:
            raise TooManyRequests()
        elif results.status_code == 503:
            raise ProviderError(results.reason)

        # deserialize results
        try:
            results_dict = results.json()['data']
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            # loop over results
            for result in results_dict:
                if 'title' in result['attributes']:
                    if isinstance(self.video, Episode):
                        if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \
                                (not self.video.year or self.video.year == int(result['attributes']['year'])):
                            title_id = result['id']
                            break
                    else:
                        if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \
                                (not self.video.year or self.video.year == int(result['attributes']['year'])):
                            title_id = result['id']
                            break
                else:
                    continue

            if title_id:
                logging.debug('Found this title ID: {}'.format(title_id))
                return self.sanitize_external_ids(title_id)
        finally:
            if not title_id:
                logger.debug('No match found for {}'.format(title))
Esempio n. 3
0
    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            raise ProviderError('Unidentified archive type')

        # extract subtitle's content
        subs_in_archive = []
        for name in archive.namelist():
            for ext in (".srt", ".sub", ".ssa", ".ass"):
                if name.endswith(ext):
                    subs_in_archive.append(name)

        # select the correct subtitle file
        matching_sub = None
        if len(subs_in_archive) == 1:
            matching_sub = subs_in_archive[0]
        else:
            for sub_name in subs_in_archive:
                guess = guessit(sub_name)

                # consider subtitle valid if:
                # - episode and season match
                # - format matches (if it was matched before)
                # - release group matches (and we asked for one and it was matched, or it was not matched)
                if guess["episode"] == subtitle.episode and guess[
                        "season"] == subtitle.season:
                    format_matches = "format" not in subtitle.matches or \
                                     ("format" in subtitle.matches and guess["format"].lower() in
                                      subtitle.releases.lower())

                    release_group_matches = True
                    if subtitle.asked_for_release_group:
                        release_group_matches = "release_group" not in subtitle.matches or \
                                                ("release_group" in subtitle.matches and
                                                 guess["release_group"].lower() ==
                                                 subtitle.asked_for_release_group.lower())

                    if release_group_matches and format_matches:
                        matching_sub = sub_name
                        break

        if not matching_sub:
            raise ProviderError("None of expected subtitle found in archive")
        subtitle.content = fix_line_ending(archive.read(matching_sub))
Esempio n. 4
0
    def download_subtitle(self, subtitle):
        if self.token is NO_VALUE:
            logger.debug("No cached token, we'll try to login again.")
            self.login()
        if self.token is NO_VALUE:
            logger.debug(
                "Unable to obtain an authentication token right now, we'll try again later."
            )
            raise ProviderError("Unable to obtain an authentication token")

        logger.info('Downloading subtitle %r', subtitle)

        headers = {
            'Accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': 'Beaker ' + self.token
        }
        res = self.session.post(self.server_url + 'download',
                                json={
                                    'file_id': subtitle.file_id,
                                    'sub_format': 'srt'
                                },
                                headers=headers,
                                timeout=30)
        if res.status_code == 429:
            raise TooManyRequests()
        elif res.status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        elif res.status_code == 503:
            raise ProviderError(res.reason)
        else:
            try:
                subtitle.download_link = res.json()['link']
            except ValueError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                r = self.session.get(subtitle.download_link, timeout=30)

                if res.status_code == 429:
                    raise TooManyRequests()
                elif res.status_code == 406:
                    raise DownloadLimitExceeded("Daily download limit reached")
                elif res.status_code == 503:
                    raise ProviderError(res.reason)

                subtitle_content = r.content

                if subtitle_content:
                    subtitle.content = fix_line_ending(subtitle_content)
                else:
                    logger.debug('Could not download subtitle from {}'.format(
                        subtitle.download_link))
Esempio n. 5
0
    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr'
                                         or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive,
                                                   archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)
Esempio n. 6
0
    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)

        headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
        res = self.session.post(self.server_url + 'download',
                                json={'file_id': subtitle.file_id, 'sub_format': 'srt'},
                                headers=headers,
                                timeout=10)
        res.raise_for_status()

        if res.status_code == 429:
            raise TooManyRequests()
        elif res.status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        else:
            try:
                subtitle.download_link = res.json()['link']
            except ValueError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                r = self.session.get(subtitle.download_link, timeout=10)
                r.raise_for_status()

                if res.status_code == 429:
                    raise TooManyRequests()
                elif res.status_code == 406:
                    raise DownloadLimitExceeded("Daily download limit reached")

                subtitle_content = r.content

                if subtitle_content:
                    subtitle.content = fix_line_ending(subtitle_content)
                else:
                    logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
Esempio n. 7
0
    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link,
                             headers={'Referer': self.api_url},
                             timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        if subtitle.is_episode:
            subtitle.content = self._get_subtitle_from_archive(
                subtitle, archive)
        else:
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)
Esempio n. 8
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `series.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.series_url, timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error('Error getting show ids')
            raise ProviderError('Error getting show ids')

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td > a[href^="/show/"]'):
            show_ids[sanitize(show.get_text())] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Esempio n. 9
0
    def download_subtitle(self, subtitle):
        # download
        url = self.server_url + 'subtitle/download/{}/{}/'.format(subtitle.language.alpha2, subtitle.subtitle_id)
        params = {'v': subtitle.subtitle_version, 'key': subtitle.subtitle_key}
        r = self.session.get(url, params=params, headers={'Referer': subtitle.page_link}, timeout=10)
        r.raise_for_status()

        # open the zip
        try:
            with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
                # remove some filenames from the namelist
                namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
                if len(namelist) > 1:
                    raise ProviderError('More than one file to unzip')

                subtitle.content = fix_line_ending(zf.read(namelist[0]))
        except zipfile.BadZipfile:
            # if no zip file was retrieved, daily downloads limit has exceeded
            raise ProviderError('Daily limit exceeded')
Esempio n. 10
0
    def _search_url_titles(self, series, season, episode, year=None):
        """Search the URL titles by kind for the given `title`, `season` and `episode`.

        :param str series: series to search for.
        :param int season: season to search for.
        :param int episode: episode to search for.
        :param int year: year to search for.
        :return: the episode URL.
        :rtype: str

        """
        # make the search
        logger.info('Searching episode url for %s, season %d, episode %d',
                    series, season, episode)
        episode_url = None

        search = '{} {}x{}'.format(series, season, episode)
        r = self.session.get(self.search_url,
                             headers={'Referer': self.server_url},
                             params={'q': search},
                             timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.warning('Error getting episode url')
            raise ProviderError('%s: Error getting episode url',
                                self.__class__.__name__.upper())

        results = json.loads(r.text)

        for result in results:
            title = sanitize(result['name'])

            # attempt series with year
            if sanitize('{} ({})'.format(series, year)) in title:
                for episode_data in result['episodes']:
                    if season == episode_data[
                            'season'] and episode == episode_data['number']:
                        episode_url = self.server_url + 'episodes/{}'.format(
                            episode_data['id'])
                        logger.info('Episode url found with year %s',
                                    episode_url)
                        return episode_url
            # attempt series without year
            elif sanitize(series) in title:
                for episode_data in result['episodes']:
                    if season == episode_data[
                            'season'] and episode == episode_data['number']:
                        episode_url = self.server_url + 'episodes/{}'.format(
                            episode_data['id'])
                        logger.info('Episode url found without year %s',
                                    episode_url)
                        return episode_url

        return episode_url
Esempio n. 11
0
    def search_titles(self, title):
        title_id = None
        imdb_id = None

        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.video.series_imdb_id
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.video.imdb_id

        if imdb_id:
            parameters = {'imdb_id': imdb_id}
            logging.debug('Searching using this IMDB id: {}'.format(imdb_id))
        else:
            parameters = {'query': title}
            logging.debug('Searching using this title: {}'.format(title))

        results = self.session.get(self.server_url + 'features', params=parameters, timeout=10)
        results.raise_for_status()

        if results.status_code == 401:
            logging.debug('Authentification failed: clearing cache and attempting to login.')
            region.delete("oscom_token")
            self.login()

            results = self.session.get(self.server_url + 'features', params=parameters, timeout=10)
            results.raise_for_status()

            if results.status_code == 429:
                raise TooManyRequests()
        elif results.status_code == 429:
            raise TooManyRequests()

        # deserialize results
        try:
            results_dict = results.json()['data']
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            # loop over results
            for result in results_dict:
                if title.lower() == result['attributes']['title'].lower() and \
                        (not self.video.year or self.video.year == int(result['attributes']['year'])):
                    title_id = result['id']
                    break

            if title_id:
                logging.debug('Found this title ID: {}'.format(title_id))
                return title_id
        finally:
            if not title_id:
                logger.debug('No match found for {}'.format(title))
Esempio n. 12
0
    def download_subtitle(self, subtitle):
        # download
        url = 'http://zip.{}/{}.zip'.format(self.server_url, subtitle.subtitle_id)
        r = self.session.get(url, headers={'Referer': subtitle.page_link}, timeout=10)
        r.raise_for_status()

        # open the zip
        with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
            # remove some filenames from the namelist
            namelist = [n for n in zf.namelist() if os.path.splitext(n)[1] in ['.srt', '.sub']]
            if len(namelist) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(namelist[0]))
Esempio n. 13
0
    def _search_url_titles(self, series, season, episode, year=None):
        """Search the URL titles by kind for the given `title`, `season` and `episode`.

        :param str series: series to search for.
        :param int season: season to search for.
        :param int episode: episode to search for.
        :param int year: year to search for.
        :return: the episode URL.
        :rtype: str

        """
        # make the search
        logger.info("Searching episode url for %s, season %d, episode %d",
                    series, season, episode)
        episode_url = None

        search = "{} {}x{}".format(series, season, episode)
        r = self.session.get(self.search_url,
                             headers={"Referer": self.server_url},
                             params={"q": search},
                             timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error("Error getting episode url")
            raise ProviderError("Error getting episode url")

        results = json.loads(r.text)

        for result in results:
            title = sanitize(result["name"])

            # attempt series with year
            if sanitize("{} ({})".format(series, year)) in title:
                for episode_data in result["episodes"]:
                    if season == episode_data[
                            "season"] and episode == episode_data["number"]:
                        episode_url = self.server_url + "episodes/{}".format(
                            episode_data["id"])
                        return episode_url
            # attempt series without year
            elif sanitize(series) in title:
                for episode_data in result["episodes"]:
                    if season == episode_data[
                            "season"] and episode == episode_data["number"]:
                        episode_url = self.server_url + "episodes/{}".format(
                            episode_data["id"])
                        return episode_url

        return episode_url
Esempio n. 14
0
    def fetch_page(self, url, ref=None):
        logger.debug(f"Titulky.com: Fetching url: {url}")

        res = self.session.get(
            url,
            timeout=self.timeout,
            headers={'Referer': ref if ref else self.server_url})

        if res.status_code != 200:
            raise HTTPError(f"Fetch failed with status code {res.status_code}")
        if not res.text:
            raise ProviderError("No response returned from the provider")

        return res.text
Esempio n. 15
0
    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            raise ProviderError('Unidentified archive type')

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
Esempio n. 16
0
    def download_subtitle(self, subtitle):
        if self.token is NO_VALUE:
            logger.debug("No cached token, we'll try to login again.")
            self.login()
        if self.token is NO_VALUE:
            logger.debug(
                "Unable to obtain an authentication token right now, we'll try again later."
            )
            raise ProviderError("Unable to obtain an authentication token")

        logger.info('Downloading subtitle %r', subtitle)

        headers = {
            'Accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': 'Beaker ' + self.token
        }
        res = self.retry(lambda: checked(lambda: self.session.post(
            self.server_url + 'download',
            json={
                'file_id': subtitle.file_id,
                'sub_format': 'srt'
            },
            headers=headers,
            timeout=30),
                                         validate_json=True,
                                         json_key_name='link'),
                         amount=retry_amount)

        download_data = res.json()
        subtitle.download_link = download_data['link']

        r = self.retry(lambda: checked(lambda: self.session.get(
            subtitle.download_link, timeout=30),
                                       validate_content=True),
                       amount=retry_amount)

        if not r:
            logger.debug(
                f'Could not download subtitle from {subtitle.download_link}')
            subtitle.content = None
            return
        else:
            subtitle_content = r.content
            subtitle.content = fix_line_ending(subtitle_content)
Esempio n. 17
0
    def download_subtitle(self, subtitle):
        # download the subtitle
        logger.info('Downloading subtitle %r', subtitle.sub_id)

        params = {
            'mac': subtitle.sub_id
        }

        r = self.session.get(self.server_url + '/subtitle/download', params=params, timeout=30)
        r.raise_for_status()

        # open the zip
        with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
            # remove some filenames from the namelist
            namelist = [n for n in zf.namelist() if os.path.splitext(n)[1] in ['.srt', '.sub']]
            if len(namelist) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(namelist[0]))
Esempio n. 18
0
    def download_subtitle(self, subtitle):
        if isinstance(subtitle, NekurSubtitle):
            # download the subtitle            
            r = self.session.get(subtitle.download_link, timeout=10)
            r.raise_for_status()

            # open the archive
            archive_stream = io.BytesIO(r.content)
            if is_rarfile(archive_stream):
                archive = RarFile(archive_stream)
            elif is_zipfile(archive_stream):
                archive = ZipFile(archive_stream)
            else:
                subtitle.content = r.content
                if subtitle.is_valid():
                    return
                subtitle.content = None

                raise ProviderError('Unidentified archive type')

            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
Esempio n. 19
0
    def download_subtitle(self, subtitle):
        # download
        url = self.server_url + 'get/download/{}/'.format(
            subtitle.language.alpha2)
        params = {
            'id': subtitle.subtitle_id,
            'v': subtitle.releases[0],
            'key': subtitle.subtitle_key
        }
        r = self.session.get(url,
                             params=params,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        # open the zip
        with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
            # remove some filenames from the namelist
            namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
            if len(namelist) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(namelist[0]))
Esempio n. 20
0
    def query(self,
              language,
              video_names,
              type,
              keyword=None,
              year=None,
              season=None,
              episode=None,
              imdb_id=None):
        ## Build the search URL
        params = {}

        # Keyword
        if keyword:
            params['Fulltext'] = keyword
        # Video type
        if type == 'episode':
            params['Serial'] = 'S'
        else:
            params['Serial'] = 'F'
        # Season / Episode
        if season:
            params['Sezona'] = season
        if episode:
            params['Epizoda'] = episode
        # IMDB ID
        if imdb_id:
            params['IMDB'] = imdb_id[2:]  # Remove the tt from the imdb id
        # Year
        if year:
            params['Rok'] = year
        # Language
        if language == Language('ces'):
            params['Jazyk'] = 'CZ'
        elif language == Language('slk'):
            params['Jazyk'] = 'SK'
        elif language == None:
            params['Jazyk'] = ''
        else:
            return []
        # Status
        if self.approved_only:
            logger.debug(f"Titulky.com: Searching only for approved subtitles")
            params['ASchvalene'] = '1'
        else:
            params['ASchvalene'] = ''

        search_url = self.build_search_url(params)

        ## Search results page parsing
        html_src = self.fetch_page(search_url)
        search_page_soup = ParserBeautifulSoup(html_src,
                                               ['lxml', 'html.parser'])

        # If there is a message containing "Žádny odpovídající záznam", it means that there are no results
        # If that's the case, return an empty list
        error_message = search_page_soup.select('.panel-body > strong')
        if len(
                error_message
        ) > 0 and 'Žádný odpovídající záznam' in error_message[0].get_text(
                strip=True):
            logger.info("Titulky.com: No results found")
            return []

        # Get the table containing the search results
        table = search_page_soup.find('table', class_='table')
        if not table:
            logger.debug("Titulky.com: Could not find table")
            raise ParseResponseError(
                "Could not find table. Did the HTML source change?")

        # Get table body containing rows of subtitles
        table_body = table.find('tbody')
        if not table_body:
            logger.debug("Titulky.com: Could not find table body")
            raise ParseResponseError(
                "Could not find table body. Did the HTML source change?")

        ## Loop over all subtitles on the first page and put them in a list
        subtitles = []
        rows = table_body.find_all('tr')

        if not self.multithreading:
            # Process the rows sequentially
            logger.info("Titulky.com: processing results in sequence")
            for i, row in enumerate(rows):
                sub_info = self.process_row(row, video_names, search_url)

                # If subtitle info was returned, then everything was okay
                # and we can instationate it and add it to the list
                if sub_info:
                    logger.debug(
                        f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}"
                    )

                    # If we found the subtitle by IMDB ID, no need to get it from details page
                    sub_imdb_id = imdb_id or sub_info['imdb_id']

                    subtitle_instance = self.subtitle_class(
                        sub_info['id'],
                        sub_imdb_id,
                        sub_info['language'],
                        sub_info['names'],
                        season,
                        episode,
                        sub_info['year'],
                        sub_info['releases'],
                        sub_info['fps'],
                        sub_info['uploader'],
                        sub_info['approved'],
                        sub_info['details_link'],
                        sub_info['download_link'],
                        skip_wrong_fps=self.skip_wrong_fps,
                        asked_for_episode=(type == 'episode'))
                    subtitles.append(subtitle_instance)
                else:
                    # No subtitle info was returned, i. e. something unexpected
                    # happend during subtitle details page fetching and processing.
                    logger.debug(
                        f"Titulky.com: No subtitle info retrieved, row: {i}")
        else:
            # Process the rows in paralell
            logger.info(
                f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time."
            )

            threads = [None] * len(rows)
            threads_data = [None] * len(rows)

            # Process rows in parallel, self.max_threads at a time.
            cycles = math.ceil(len(rows) / self.max_threads)
            for i in range(cycles):
                # Batch number i
                starting_index = i * self.max_threads  # Inclusive
                ending_index = starting_index + self.max_threads  # Non-inclusive

                # Create threads for all rows in this batch
                for j in range(starting_index, ending_index):
                    # Check if j-th row exists
                    if j < len(rows):
                        # Row number j
                        logger.debug(
                            f"Titulky.com: Creating thread {j} (batch: {i})")
                        # Create a thread for row j and start it
                        threads[j] = Thread(
                            target=self.process_row,
                            args=[rows[j], video_names, search_url],
                            kwargs={
                                'thread_id': j,
                                'threads_data': threads_data
                            })
                        threads[j].start()

                # Wait for all created threads to finish before moving to another batch of rows
                for j in range(starting_index, ending_index):
                    # Check if j-th row exists
                    if j < len(rows):
                        threads[j].join()

            # Process the resulting data from all threads
            for i in range(len(threads_data)):
                thread_data = threads_data[i]

                # If the thread returned didn't return anything, but expected a dict object
                if not thread_data:
                    raise ProviderError(
                        f"No data returned from thread ID: {i}")

                # If an exception was raised in a thread, raise it again here
                if 'exception' in thread_data and thread_data['exception']:
                    logger.debug(
                        f"Titulky.com: An error occured while processing a row in the thread ID {i}"
                    )
                    raise thread_data['exception']

                # If the thread returned a subtitle info, great, instantiate it and add it to the list
                if 'sub_info' in thread_data and thread_data['sub_info']:
                    # Instantiate the subtitle object
                    logger.debug(
                        f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}"
                    )
                    sub_info = thread_data['sub_info']

                    # If we found the subtitle by IMDB ID, no need to get it from details page
                    sub_imdb_id = imdb_id or sub_info['imdb_id']

                    subtitle_instance = self.subtitle_class(
                        sub_info['id'],
                        sub_imdb_id,
                        sub_info['language'],
                        sub_info['names'],
                        season,
                        episode,
                        sub_info['year'],
                        sub_info['releases'],
                        sub_info['fps'],
                        sub_info['uploader'],
                        sub_info['approved'],
                        sub_info['details_link'],
                        sub_info['download_link'],
                        skip_wrong_fps=self.skip_wrong_fps,
                        asked_for_episode=(type == 'episode'))
                    subtitles.append(subtitle_instance)
                else:
                    # The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
                    # happend during subtitle details page fetching and processing.
                    logger.debug(
                        f"Titulky.com: No subtitle info retrieved, thread ID: {i}"
                    )

        # Clean up
        search_page_soup.decompose()
        search_page_soup = None

        logger.debug(f"Titulky.com: Found subtitles: {subtitles}")

        return subtitles
Esempio n. 21
0
    def get_archives(self, title_id, language_code):
        """Get the archive list from a given `title_id` and `language_code`.

        :param int title_id: title id.
        :param int language_code: language code.
        :return: the archives.
        :rtype: list of :class:`LegendasTVArchive`

        """
        logger.info('Getting archives for title %d and language %d', title_id, language_code)
        archives = []
        page = 0
        while True:
            # get the archive page
            url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
                language=language_code, page=page, title=title_id)
            r = self.session.get(url)
            r.raise_for_status()

            # parse the results
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            for archive_soup in soup.select('div.list_element > article > div > div.f_left'):
                # create archive
                archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2],
                                            archive_soup.a.text,
                                            'pack' in archive_soup.parent['class'],
                                            'destaque' in archive_soup.parent['class'],
                                            self.server_url + archive_soup.a['href'][1:])

                # extract text containing downloads, rating and timestamp
                data_text = archive_soup.find('p', class_='data').text

                # match downloads
                archive.downloads = int(downloads_re.search(data_text).group('downloads'))

                # match rating
                match = rating_re.search(data_text)
                if match:
                    archive.rating = int(match.group('rating'))

                # match timestamp and validate it
                time_data = {k: int(v) for k, v in timestamp_re.search(data_text).groupdict().items()}
                archive.timestamp = pytz.timezone('America/Sao_Paulo').localize(datetime(**time_data))
                if archive.timestamp > datetime.utcnow().replace(tzinfo=pytz.utc):
                    raise ProviderError('Archive timestamp is in the future')

                # add archive
                logger.info('Found archive for title %d and language %d at page %s: %s',
                            title_id, language_code, page, archive)
                archives.append(archive)

            # stop on last page
            if soup.find('a', attrs={'class': 'load_more'}, string='carregar mais') is None:
                break

            # increment page count
            page += 1

        logger.debug('Found %d archives', len(archives))

        return archives
Esempio n. 22
0
def checked(fn,
            raise_api_limit=False,
            validate_token=False,
            validate_json=False,
            json_key_name=None,
            validate_content=False):
    """Run :fn: and check the response status before returning it.

    :param fn: the function to make an API call to OpenSubtitles.com.
    :param raise_api_limit: if True we wait a little bit longer before running the call again.
    :param validate_token: test if token is valid and return 401 if not.
    :param validate_json: test if response is valid json.
    :param json_key_name: test if returned json contain a specific key.
    :param validate_content: test if response have a content (used with download).
    :return: the response.

    """
    response = None
    try:
        try:
            response = fn()
        except APIThrottled:
            if not raise_api_limit:
                logger.info(
                    "API request limit hit, waiting and trying again once.")
                time.sleep(2)
                return checked(fn, raise_api_limit=True)
            raise
        except (ConnectionError, Timeout, ReadTimeout):
            raise ServiceUnavailable(
                f'Unknown Error, empty response: {response.status_code}: {response}'
            )
        except Exception:
            logging.exception('Unhandled exception raised.')
            raise ProviderError('Unhandled exception raised. Check log.')
        else:
            status_code = response.status_code
    except Exception:
        status_code = None
    else:
        if status_code == 401:
            if validate_token:
                return 401
            else:
                raise AuthenticationError(f'Login failed: {response.reason}')
        elif status_code == 403:
            raise ProviderError("Bazarr API key seems to be in problem")
        elif status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        elif status_code == 410:
            raise ProviderError("Download as expired")
        elif status_code == 429:
            raise TooManyRequests()
        elif status_code == 502:
            # this one should deal with Bad Gateway issue on their side.
            raise APIThrottled()
        elif 500 <= status_code <= 599:
            raise ProviderError(response.reason)

        if status_code != 200:
            raise ProviderError(f'Bad status code: {response.status_code}')

        if validate_json:
            try:
                json_test = response.json()
            except JSONDecodeError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                if json_key_name not in json_test:
                    raise ProviderError(
                        f'Invalid JSON returned by provider: no {json_key_name} key in returned json.'
                    )

        if validate_content:
            if not hasattr(response, 'content'):
                logging.error('Download link returned no content attribute.')
                return False
            elif not response.content:
                logging.error(
                    f'This download link returned empty content: {response.url}'
                )
                return False

    return response
Esempio n. 23
0
    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        title_id = self.search_titles(title)
        if not title_id:
            return []
        lang_strings = [str(lang) for lang in languages]
        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(self.server_url + 'subtitles',
                                   params={'parent_feature_id': title_id,
                                           'languages': langs,
                                           'episode_number': self.video.episode,
                                           'season_number': self.video.season,
                                           'moviehash': hash},
                                   timeout=10)
        else:
            res = self.session.get(self.server_url + 'subtitles',
                                   params={'id': title_id,
                                           'languages': langs,
                                           'moviehash': hash},
                                   timeout=10)
        res.raise_for_status()

        if res.status_code == 429:
            raise TooManyRequests()

        subtitles = []

        try:
            result = res.json()
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes']['feature_details']:
                        season_number = item['attributes']['feature_details']['season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes']['feature_details']:
                        episode_number = item['attributes']['feature_details']['episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                                language=Language.fromietf(item['attributes']['language']),
                                hearing_impaired=item['attributes']['hearing_impaired'],
                                page_link=item['attributes']['url'],
                                file_id=item['attributes']['files'][0]['file_id'],
                                releases=item['attributes']['release'],
                                uploader=item['attributes']['uploader']['name'],
                                title=item['attributes']['feature_details']['movie_name'],
                                year=item['attributes']['feature_details']['year'],
                                season=season_number,
                                episode=episode_number,
                                hash_matched=moviehash_match
                            )
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles
Esempio n. 24
0
    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            file_hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            file_hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        imdb_id = None
        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.series_imdb_id)
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.imdb_id)

        title_id = None
        if not imdb_id:
            title_id = self.search_titles(title)
            if not title_id:
                return []

        lang_strings = [str(lang.basename) for lang in languages]
        only_foreign = all([lang.forced for lang in languages])
        also_foreign = any([lang.forced for lang in languages])
        if only_foreign:
            forced = 'only'
        elif also_foreign:
            forced = 'include'
        else:
            forced = 'exclude'

        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('episode_number',
                         self.video.episode), ('foreign_parts_only', forced),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('parent_feature_id', title_id) if title_id else
                        ('imdb_id', imdb_id), ('season_number',
                                               self.video.season),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)
        else:
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('foreign_parts_only', forced),
                        ('id', title_id) if title_id else ('imdb_id', imdb_id),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)

        if res.status_code == 429:
            raise TooManyRequests()

        elif res.status_code == 503:
            raise ProviderError(res.reason)

        subtitles = []

        try:
            result = res.json()
            if 'data' not in result:
                raise ValueError
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(
                len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes'][
                            'feature_details']:
                        season_number = item['attributes']['feature_details'][
                            'season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes'][
                            'feature_details']:
                        episode_number = item['attributes']['feature_details'][
                            'episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                            language=Language.fromietf(
                                item['attributes']['language']),
                            forced=item['attributes']['foreign_parts_only'],
                            hearing_impaired=item['attributes']
                            ['hearing_impaired'],
                            page_link=item['attributes']['url'],
                            file_id=item['attributes']['files'][0]['file_id'],
                            releases=item['attributes']['release'],
                            uploader=item['attributes']['uploader']['name'],
                            title=item['attributes']['feature_details']
                            ['movie_name'],
                            year=item['attributes']['feature_details']['year'],
                            season=season_number,
                            episode=episode_number,
                            hash_matched=moviehash_match)
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles