Python ParserBeautifulSoup.select_oneの例、subliminal.providers.ParserBeautifulSoup.select_one Pythonの例

コード例 #1

0

ファイルを表示

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, Subs4FreeSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            download_element = soup.select_one('input[name="id"]')
            image_element = soup.select_one('input[type="image"]')
            subtitle_id = download_element[
                'value'] if download_element else None
            width = int(str(
                image_element['width']).strip('px')) if image_element else 0
            height = int(str(
                image_element['height']).strip('px')) if image_element else 0

            if not subtitle_id:
                logger.debug(
                    'Unable to download subtitle. No download link found')
                return

            self.apply_anti_block(subtitle)

            download_url = self.server_url + self.download_url
            r = self.session.post(download_url,
                                  data={
                                      'id': subtitle_id,
                                      'x': random.randint(0, width),
                                      'y': random.randint(0, height)
                                  },
                                  headers={'Referer': subtitle.download_link},
                                  timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)

            subtitle_content = _get_subtitle_from_archive(
                archive) if archive else r.content

            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)

コード例 #2

0

ファイルを表示

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, Subs4SeriesSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            download_element = soup.select_one('a.style55ws')
            if not download_element:
                download_element = soup.select_one('form[method="post"]')
                target = download_element[
                    'action'] if download_element else None
            else:
                target = download_element['href']

            if not target:
                logger.debug(
                    'Unable to download subtitle. No download link found')
                return

            self.apply_anti_block(subtitle)

            download_url = self.server_url + target
            r = self.session.get(download_url,
                                 headers={'Referer': subtitle.download_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)
            subtitle_content = _get_subtitle_from_archive(
                archive) if archive else r.content

            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)

コード例 #3

0

ファイルを表示

ファイル: subtitriid.py プロジェクト: rickytin/bazarr

    def query(self, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('.eBlock')
        for row in rows:
            result_anchor_el = row.select_one('.eTitle > a')

            # page link
            page_link = result_anchor_el.get('href')

            # fetch/parse additional info
            r = self.session.get(page_link, timeout=10)
            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # title
            movie_titles_string = soup.select_one('.main-header').text.strip()
            movie_titles_list = movie_titles_string.split(' / ')
            title = movie_titles_list[-1]

            # year
            year = soup.select_one('#film-page-year').text.strip()

            # imdb id
            imdb_link = soup.select_one('#actors-page > a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # download link
            href = soup.select_one('.hvr').get('href')
            download_link = self.server_url + href

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id)
            logger.debug('subtitri.id.lv: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

コード例 #4

0

ファイルを表示

    def query(self, movie_id, title, year):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', movie_id)
        if movie_id:
            page_link = self.server_url + '/' + movie_id
        else:
            page_link = self.server_url + self.search_url.format(' '.join(
                [title, str(year)]))

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])

        year = None
        year_element = soup.select_one('td#dates_header > table div')
        matches = False
        if year_element:
            matches = year_re.match(str(year_element.contents[2]).strip())
        if matches:
            year = int(matches.group(1))

        title_tag = soup.select_one('td#dates_header > table u')
        show_title = str(title_tag.contents[0]).strip() if title_tag else None

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('.movie-details'):
            # read common info
            version = subs_tag.find('span').text
            download_link = self.server_url + subs_tag.find('a')['href']
            uploader = subs_tag.select_one('.movie-info').find('p').find(
                'a').text
            language_code = subs_tag.select_one('.sprite')['class'][1].split(
                'gif')[0]
            language = Language.fromietf(language_code)

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year, version, download_link,
                                           uploader)

            logger.debug('Found subtitle {!r}'.format(subtitle))
            subtitles.append(subtitle)

        return subtitles

コード例 #5

0

ファイルを表示

ファイル: subs4free.py プロジェクト: jonudewux/bazarr

    def query(self, movie_id, title, year):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', movie_id)
        if movie_id:
            page_link = self.server_url + '/' + movie_id
        else:
            page_link = self.server_url + text_type(self.search_url).format(
                ' '.join([title, str(year)]))

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])

        year_num = None
        year_element = soup.select_one('td#dates_header > table div')
        matches = False
        if year_element:
            matches = year_re.match(str(year_element.contents[2]).strip())
        if matches:
            year_num = int(matches.group(1))

        title_element = soup.select_one('td#dates_header > table u')
        show_title = str(
            title_element.contents[0]).strip() if title_element else None

        subtitles = []
        # loop over episode rows
        for subtitle in soup.select(
                'table.table_border div[align="center"] > div'):
            # read common info
            version = subtitle.find('b').text
            download_link = self.server_url + subtitle.find('a')['href']
            language = Language.fromalpha2(
                subtitle.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year_num, version, download_link)

            logger.debug('Found subtitle {!r}'.format(subtitle))
            subtitles.append(subtitle)

        return subtitles

コード例 #6

0

ファイルを表示

    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        if all((show_id, season, episode)):
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year = None
        matches = year_re.match(
            str(soup.select_one(
                '#dates_header_br > table div').contents[2]).strip())
        if matches:
            year = int(matches.group(1))
        show_title = str(
            soup.select_one('#dates_header_br > table div u').string).strip()

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('table .seeDark,.seeMedium'):
            # read common info
            version = subs_tag.find_all('b')[0].text
            download_link = self.server_url + subs_tag.find('a')['href']
            uploader = subs_tag.find_all('b')[1].text
            language = Language.fromalpha2(
                subs_tag.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year, version, download_link,
                                           uploader)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

コード例 #7

0

ファイルを表示

    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        is_episode = False
        if all((show_id, season, episode)):
            is_episode = True
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        elif all((show_id, title)):
            page_link = self.server_url + self.movie_link.format(show_id)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        if r.status_code == 404:
            return []

        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year = None
        if not is_episode:
            year = int(soup.select_one('span.year').text)

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('div[id="subtitles"] tr[data-id]'):
            # read common info
            version = subs_tag.find('td', {'class': 'name'}).text
            download_link = subs_tag.find(
                'a', {'class': 'btn-success'})['href'].strip('\'')

            # read the episode info
            if is_episode:
                episode_numbers = soup.select_one(
                    '#summary-wrapper > div.container.summary span.main-title-sxe'
                ).text
                season = None
                episode = None
                matches = episode_re.match(episode_numbers.strip())
                if matches:
                    season = int(matches.group(1))
                    episode = int(matches.group(2))

                series = soup.select_one(
                    '#summary-wrapper > div.summary h2 > a').string.strip()
                title = soup.select_one(
                    '#summary-wrapper > div.container.summary span.main-title'
                ).text

                subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                               page_link, series, season,
                                               episode, title, year, version,
                                               download_link)
            # read the movie info
            else:
                title = str(
                    soup.select_one('#summary-wrapper > div.summary h1').
                    contents[0]).strip()
                subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                               page_link, None, None, None,
                                               title, year, version,
                                               download_link)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles