Python ParseResponseError Examples, subliminal_patch.exceptions.ParseResponseError Python Examples

Example #1

0

Show file

    def _get_download_link(self, subtitle):
        response = self.session.get(subtitle.page_link, timeout=10)
        self._check_response(response)
        try:
            page_soup = ParserBeautifulSoup(
                response.content.decode('iso-8859-1', 'ignore'),
                ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
                    return self.server_url + link_soup['href']
        except Exception as e:
            raise ParseResponseError('Error parsing download link: ' + str(e))

        raise ParseResponseError('Download link not found')

Example #2

0

Show file

    def _get_archive(self, content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise ParseResponseError('Unsupported compressed format')

        return archive

Example #3

0

Show file

    def _get_subtitle_from_archive(self, archive):
        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue

            return archive.read(name)

        raise ParseResponseError(
            'Can not find the subtitle in the compressed file')

Example #4

0

Show file

    def _get_subtitle_from_archive(self, archive, subtitle):
        # some files have a non subtitle with .txt extension
        _tmp = list(SUBTITLE_EXTENSIONS)
        _tmp.remove('.txt')
        _subtitle_extensions = tuple(_tmp)
        _max_score = 0
        _scores = get_scores(subtitle.video)

        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(_subtitle_extensions):
                continue

            _guess = guessit(name)
            if isinstance(subtitle.video, Episode):
                logger.debug("guessing %s" % name)
                logger.debug("subtitle S{}E{} video S{}E{}".format(
                    _guess['season'], _guess['episode'], subtitle.video.season,
                    subtitle.video.episode))

                if subtitle.video.episode != _guess[
                        'episode'] or subtitle.video.season != _guess['season']:
                    logger.debug('subtitle does not match video, skipping')
                    continue

            matches = set()
            matches |= guess_matches(subtitle.video, _guess)
            logger.debug('srt matches: %s' % matches)
            _score = sum((_scores.get(match, 0) for match in matches))
            if _score > _max_score:
                _max_name = name
                _max_score = _score
                logger.debug("new max: {} {}".format(name, _score))

        if _max_score > 0:
            logger.debug("returning from archive: {} scored {}".format(
                _max_name, _max_score))
            return archive.read(_max_name)

        raise ParseResponseError(
            'Can not find the subtitle in the compressed file')

Example #5

0

Show file

    def _get_subtitle_from_archive(self, archive):
        # some files have a non subtitle with .txt extension
        _tmp = list(SUBTITLE_EXTENSIONS)
        _tmp.remove('.txt')
        _subtitle_extensions = tuple(_tmp)

        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(_subtitle_extensions):
                continue

            logger.debug("returning from archive: %s" % name)
            return archive.read(name)

        raise ParseResponseError(
            'Can not find the subtitle in the compressed file')

Example #6

0

Show file

    def query(self, keyword, season=None, episode=None, year=None):
        query = keyword
        if season and episode:
            query += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                           episode=episode)
        elif year:
            query += ' {:4d}'.format(year)

        params = {
            'buscar': query,  # search string
            'accion': 5,  # action search
            'oxdown': 1,  # order by downloads descending
            'pg': 1  # page 1
        }

        logger.debug('Searching subtitles %r', query)
        subtitles = []
        language = self.language_list[0]
        search_link = self.server_url + 'index.php'
        while True:
            response = self.session.get(search_link, params=params, timeout=10)
            self._check_response(response)

            try:
                page_subtitles = self._parse_subtitles_page(response, language)
            except Exception as e:
                raise ParseResponseError('Error parsing subtitles list: ' +
                                         str(e))

            subtitles += page_subtitles

            if len(page_subtitles) >= 20:
                params['pg'] += 1  # search next page
                time.sleep(self.multi_result_throttle)
            else:
                break

        return subtitles

Example #7

0

Show file

File: titulky.py Project: mvanbaak/bazarr

    def query(self,
              language,
              video_names,
              type,
              keyword=None,
              year=None,
              season=None,
              episode=None,
              imdb_id=None):
        ## Build the search URL
        params = {}

        # Keyword
        if keyword:
            params['Fulltext'] = keyword
        # Video type
        if type == 'episode':
            params['Serial'] = 'S'
        else:
            params['Serial'] = 'F'
        # Season / Episode
        if season:
            params['Sezona'] = season
        if episode:
            params['Epizoda'] = episode
        # IMDB ID
        if imdb_id:
            params['IMDB'] = imdb_id[2:]  # Remove the tt from the imdb id
        # Year
        if year:
            params['Rok'] = year
        # Language
        if language == Language('ces'):
            params['Jazyk'] = 'CZ'
        elif language == Language('slk'):
            params['Jazyk'] = 'SK'
        elif language == None:
            params['Jazyk'] = ''
        else:
            return []
        # Status
        if self.approved_only:
            logger.debug(f"Titulky.com: Searching only for approved subtitles")
            params['ASchvalene'] = '1'
        else:
            params['ASchvalene'] = ''

        search_url = self.build_search_url(params)

        ## Search results page parsing
        html_src = self.fetch_page(search_url)
        search_page_soup = ParserBeautifulSoup(html_src,
                                               ['lxml', 'html.parser'])

        # If there is a message containing "Žádny odpovídající záznam", it means that there are no results
        # If that's the case, return an empty list
        error_message = search_page_soup.select('.panel-body > strong')
        if len(
                error_message
        ) > 0 and 'Žádný odpovídající záznam' in error_message[0].get_text(
                strip=True):
            logger.info("Titulky.com: No results found")
            return []

        # Get the table containing the search results
        table = search_page_soup.find('table', class_='table')
        if not table:
            logger.debug("Titulky.com: Could not find table")
            raise ParseResponseError(
                "Could not find table. Did the HTML source change?")

        # Get table body containing rows of subtitles
        table_body = table.find('tbody')
        if not table_body:
            logger.debug("Titulky.com: Could not find table body")
            raise ParseResponseError(
                "Could not find table body. Did the HTML source change?")

        ## Loop over all subtitles on the first page and put them in a list
        subtitles = []
        rows = table_body.find_all('tr')

        if not self.multithreading:
            # Process the rows sequentially
            logger.info("Titulky.com: processing results in sequence")
            for i, row in enumerate(rows):
                sub_info = self.process_row(row, video_names, search_url)

                # If subtitle info was returned, then everything was okay
                # and we can instationate it and add it to the list
                if sub_info:
                    logger.debug(
                        f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}"
                    )

                    # If we found the subtitle by IMDB ID, no need to get it from details page
                    sub_imdb_id = imdb_id or sub_info['imdb_id']

                    subtitle_instance = self.subtitle_class(
                        sub_info['id'],
                        sub_imdb_id,
                        sub_info['language'],
                        sub_info['names'],
                        season,
                        episode,
                        sub_info['year'],
                        sub_info['releases'],
                        sub_info['fps'],
                        sub_info['uploader'],
                        sub_info['approved'],
                        sub_info['details_link'],
                        sub_info['download_link'],
                        skip_wrong_fps=self.skip_wrong_fps,
                        asked_for_episode=(type == 'episode'))
                    subtitles.append(subtitle_instance)
                else:
                    # No subtitle info was returned, i. e. something unexpected
                    # happend during subtitle details page fetching and processing.
                    logger.debug(
                        f"Titulky.com: No subtitle info retrieved, row: {i}")
        else:
            # Process the rows in paralell
            logger.info(
                f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time."
            )

            threads = [None] * len(rows)
            threads_data = [None] * len(rows)

            # Process rows in parallel, self.max_threads at a time.
            cycles = math.ceil(len(rows) / self.max_threads)
            for i in range(cycles):
                # Batch number i
                starting_index = i * self.max_threads  # Inclusive
                ending_index = starting_index + self.max_threads  # Non-inclusive

                # Create threads for all rows in this batch
                for j in range(starting_index, ending_index):
                    # Check if j-th row exists
                    if j < len(rows):
                        # Row number j
                        logger.debug(
                            f"Titulky.com: Creating thread {j} (batch: {i})")
                        # Create a thread for row j and start it
                        threads[j] = Thread(
                            target=self.process_row,
                            args=[rows[j], video_names, search_url],
                            kwargs={
                                'thread_id': j,
                                'threads_data': threads_data
                            })
                        threads[j].start()

                # Wait for all created threads to finish before moving to another batch of rows
                for j in range(starting_index, ending_index):
                    # Check if j-th row exists
                    if j < len(rows):
                        threads[j].join()

            # Process the resulting data from all threads
            for i in range(len(threads_data)):
                thread_data = threads_data[i]

                # If the thread returned didn't return anything, but expected a dict object
                if not thread_data:
                    raise ProviderError(
                        f"No data returned from thread ID: {i}")

                # If an exception was raised in a thread, raise it again here
                if 'exception' in thread_data and thread_data['exception']:
                    logger.debug(
                        f"Titulky.com: An error occured while processing a row in the thread ID {i}"
                    )
                    raise thread_data['exception']

                # If the thread returned a subtitle info, great, instantiate it and add it to the list
                if 'sub_info' in thread_data and thread_data['sub_info']:
                    # Instantiate the subtitle object
                    logger.debug(
                        f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}"
                    )
                    sub_info = thread_data['sub_info']

                    # If we found the subtitle by IMDB ID, no need to get it from details page
                    sub_imdb_id = imdb_id or sub_info['imdb_id']

                    subtitle_instance = self.subtitle_class(
                        sub_info['id'],
                        sub_imdb_id,
                        sub_info['language'],
                        sub_info['names'],
                        season,
                        episode,
                        sub_info['year'],
                        sub_info['releases'],
                        sub_info['fps'],
                        sub_info['uploader'],
                        sub_info['approved'],
                        sub_info['details_link'],
                        sub_info['download_link'],
                        skip_wrong_fps=self.skip_wrong_fps,
                        asked_for_episode=(type == 'episode'))
                    subtitles.append(subtitle_instance)
                else:
                    # The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
                    # happend during subtitle details page fetching and processing.
                    logger.debug(
                        f"Titulky.com: No subtitle info retrieved, thread ID: {i}"
                    )

        # Clean up
        search_page_soup.decompose()
        search_page_soup = None

        logger.debug(f"Titulky.com: Found subtitles: {subtitles}")

        return subtitles

Example #8

0

Show file

File: titulky.py Project: mvanbaak/bazarr

    def parse_details(self, details_url, search_url):
        html_src = self.fetch_page(details_url, ref=search_url)
        details_page_soup = ParserBeautifulSoup(html_src,
                                                ['lxml', 'html.parser'])

        details_container = details_page_soup.find('div', class_='detail')
        if not details_container:
            # The subtitles could be removed and got redirected to a different page. Better treat this silently.
            logger.info(
                "Titulky.com: Could not find details div container. Skipping.")
            return False

        ### IMDB ID
        imdb_id = None
        imdb_tag = details_container.find('a', attrs={'target': 'imdb'})

        if imdb_tag:
            imdb_url = imdb_tag.get('href')
            imdb_id = re.findall(r'tt(\d+)', imdb_url)[0]

        if not imdb_id:
            logger.debug("Titulky.com: No IMDB ID supplied on details page.")

        ### RELEASE
        release = None
        release_tag = details_container.find('div', class_='releas')

        if not release_tag:
            raise ParseResponseError(
                "Could not find release tag. Did the HTML source change?")

        release = release_tag.get_text(strip=True)

        if not release:
            logger.debug(
                "Titulky.com: No release information supplied on details page."
            )

        ### LANGUAGE
        language = None
        czech_flag = details_container.select('img[src*=\'flag-CZ\']')
        slovak_flag = details_container.select('img[src*=\'flag-SK\']')

        if czech_flag and not slovak_flag:
            language = Language('ces')
        elif slovak_flag and not czech_flag:
            language = Language('slk')

        if not language:
            logger.debug(
                "Titulky.com: No language information supplied on details page."
            )

        ### UPLOADER
        uploader = None
        uploader_tag = details_container.find('div', class_='ulozil')

        if not uploader_tag:
            raise ParseResponseError(
                "Could not find uploader tag. Did the HTML source change?")

        uploader_anchor_tag = uploader_tag.find('a')

        if not uploader_anchor_tag:
            raise ParseResponseError(
                "Could not find uploader anchor tag. Did the HTML source change?"
            )

        uploader = uploader_anchor_tag.string.strip(
        ) if uploader_anchor_tag else None

        if not uploader:
            logger.debug(
                "Titulky.com: No uploader name supplied on details page.")

        ### FPS
        fps = None
        fps_icon_tag_selection = details_container.select(
            'img[src*=\'Movieroll\']')

        if not fps_icon_tag_selection and not hasattr(
                fps_icon_tag_selection[0], 'parent'):
            raise ParseResponseError(
                "Could not find parent of the fps icon tag. Did the HTML source change?"
            )

        fps_icon_tag = fps_icon_tag_selection[0]
        parent_text = fps_icon_tag.parent.get_text(strip=True)
        match = re.findall(r'(\d+,\d+) fps', parent_text)

        # If the match is found, change the decimal separator to a dot and convert to float
        fps = float(match[0].replace(',', '.')) if len(match) > 0 else None

        if not fps:
            logger.debug("Titulky.com: No fps supplied on details page.")

        ### YEAR
        year = None
        h1_tag = details_container.find('h1', id='titulky')

        if not h1_tag:
            raise ParseResponseError(
                "Could not find h1 tag. Did the HTML source change?")

        # The h1 tag contains the name of the subtitle and a year
        h1_texts = [text for text in h1_tag.stripped_strings]
        year = int(h1_texts[1]) if len(h1_texts) > 1 else None

        if not year:
            logger.debug("Titulky.com: No year supplied on details page.")

        # Clean up
        details_page_soup.decompose()
        details_page_soup = None

        # Return the subtitle details
        return {
            'releases': [release],
            'language': language,
            'uploader': uploader,
            'fps': fps,
            'year': year,
            'imdb_id': imdb_id
        }