Python order_dict Examples, collections.order_dict Python Examples

Example #1

0

Show file

    def get_subtitles(self, video_name, sub_num=5):

        print(prefix + ' Searching ZIMUZU...', end='\r')

        keywords, info_dict = Downloader.get_keywords(video_name)
        keyword = ' '.join(keywords)

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(ZimuzuDownloader.search_url.format(keyword),
                      headers=Downloader.header,
                      timeout=10)
            bs_obj = BeautifulSoup(r.text, 'html.parser')
            tab_text = bs_obj.find('div', {'class': 'article-tab'}).text
            tab_text = tab_text.encode('utf8') if py == 2 else tab_text
            if '字幕(0)' not in tab_text:
                for one_box in bs_obj.find_all('div',
                                               {'class': 'search-item'}):
                    sub_name = ZimuzuDownloader.choice_prefix + \
                        one_box.find('strong', {'class': 'list_title'}).text
                    sub_name = sub_name.encode('utf8') if py == 2 else sub_name

                    if info_dict['type'] == 'movie' and '美剧字幕' in sub_name:
                        continue

                    a = one_box.find('a')
                    text = a.text.encode('utf8') if py == 2 else a.text
                    sub_url = ZimuzuDownloader.site_url + a.attrs['href']
                    type_score = 0
                    type_score += ('英文' in text) * 1
                    type_score += ('繁体' in text) * 2
                    type_score += ('简体' in text) * 4
                    type_score += ('中英' in text) * 8
                    sub_dict[sub_name] = {
                        'lan': type_score,
                        'link': sub_url,
                        'session': None
                    }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], '')
                keywords.pop(-1)
                continue

            break

        # 第一个候选字幕没有双语
        if (len(sub_dict.items()) > 0
                and list(sub_dict.items())[0][1]['lan'] < 8):
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]['lan'],
                       reverse=True))
        return sub_dict

Example #2

0

Show file

File: zimuzu.py Project: vcan/GetSubtitles

    def get_subtitles(self, video_name, sub_num=5):

        print(PREFIX + " Searching ZIMUZU...", end="\r")

        keywords, info_dict = Downloader.get_keywords(video_name)
        keyword = " ".join(keywords)

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(
                ZimuzuDownloader.search_url.format(keyword),
                headers=Downloader.header,
                timeout=10,
            )
            bs_obj = BeautifulSoup(r.text, "html.parser")
            tab_text = bs_obj.find("div", {"class": "article-tab"}).text
            if "字幕(0)" not in tab_text:
                for one_box in bs_obj.find_all("div", {"class": "search-item"}):
                    sub_name = (
                        ZimuzuDownloader.choice_prefix
                        + one_box.find("strong", {"class": "list_title"}).text
                    )

                    if info_dict["type"] == "movie" and "美剧字幕" in sub_name:
                        continue

                    a = one_box.find("a")
                    text = a.text
                    sub_url = ZimuzuDownloader.site_url + a.attrs["href"]
                    type_score = 0
                    type_score += ("英文" in text) * 1
                    type_score += ("繁体" in text) * 2
                    type_score += ("简体" in text) * 4
                    type_score += ("中英" in text) * 8
                    sub_dict[sub_name] = {
                        "lan": type_score,
                        "link": sub_url,
                        "session": None,
                    }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], "")
                keywords.pop(-1)
                continue

            break

        # 第一个候选字幕没有双语
        if len(sub_dict.items()) > 0 and list(sub_dict.items())[0][1]["lan"] < 8:
            sub_dict = order_dict(
                sorted(sub_dict.items(), key=lambda e: e[1]["lan"], reverse=True)
            )
        return sub_dict

Example #3

0

Show file

File: zimuzu.py Project: plmxs2017/GetSubtitles

    def get_subtitles(self, keywords, sub_num=5):

        print(prefix + ' Searching ZIMUZU...', end='\r')

        keywords = list(keywords)
        keyword = ''
        for one in keywords:
            keyword += (one + ' ')

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(self.search_url.format(keyword),
                      headers=self.headers,
                      timeout=10)
            bs_obj = BeautifulSoup(r.text, 'html.parser')
            tab_text = bs_obj.find('div', {'class': 'article-tab'}).text
            tab_text = tab_text.encode('utf8') if py == 2 else tab_text
            if '字幕(0)' not in tab_text:
                for one_box in bs_obj.find_all('div',
                                               {'class': 'search-item'}):
                    sub_name = '[ZMZ]' + one_box.find('p').find('font').text
                    sub_name = sub_name.encode('utf8') if py == 2 else sub_name
                    a = one_box.find('a')
                    text = a.text.encode('utf8') if py == 2 else a.text
                    sub_url = self.site_url + a.attrs['href']
                    type_score = 0
                    type_score += ('英文' in text) * 1
                    type_score += ('繁体' in text) * 2
                    type_score += ('简体' in text) * 4
                    type_score += ('中英' in text) * 8
                    sub_dict[sub_name] = {'lan': type_score, 'link': sub_url}
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], '')
                keywords.pop(-1)
                continue

            break

        # 第一个候选字幕没有双语
        if (len(sub_dict.items()) > 0
                and list(sub_dict.items())[0][1]['lan'] < 8):
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]['lan'],
                       reverse=True))
        return sub_dict

Example #4

0

Show file

File: subhd.py Project: awesome-archive/GetSubtitles

    def get_subtitles(self, keywords, sub_num=5):

        """ 传入关键字列表，返回有序字典。
                keywords:重要度降序的关键字列表
                sub_num: 字幕结果数，默认为5
            返回：
                字幕字典{'字幕名'：{'lan':'字幕包含语言值', 'link': '字幕链接'}}，按语言值降序排列
                字幕包含语言值：英文加1， 繁体加2， 简体加4， 双语加8 """

        print('├ Searching SUBHD...', end='\r')

        keywords = list(keywords)
        keyword = ''
        for one in keywords:
            keyword += (one + ' ')

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(self.search_url + keyword, headers=self.headers)
            bs_obj = BeautifulSoup(r.text, 'html.parser')
            if '总共 0 条' not in bs_obj.find('small').text:
                for one_box in bs_obj.find_all('div', {'class': 'box'}):
                    a = one_box.find('div', {'class': 'd_title'}).find('a')
                    sub_url = self.site_url + a.attrs['href']
                    sub_name = '[SUBHD]' + a.text
                    if '/ar1/' in a.attrs['href']:
                        type_score = 0
                        type_score += ('英文' in one_box.text) * 1
                        type_score += ('繁体' in one_box.text) * 2
                        type_score += ('简体' in one_box.text) * 4
                        type_score += ('双语' in one_box.text) * 8
                        sub_dict[sub_name] = {'lan': type_score, 'link': sub_url}
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], '')
                keywords.pop(-1)
                continue

            break

        if len(sub_dict.items()) > 0 and list(sub_dict.items())[0][1]['lan'] < 8:  # 第一个候选字幕没有双语
            sub_dict = order_dict(sorted(sub_dict.items(), key=lambda e: e[1]['lan'], reverse=True))
        return sub_dict

Example #5

0

Show file

File: zimuzu.py Project: dindom999/getsubtitle

    def get_subtitles(self, keywords, sub_num=5):

        print(prefix + " Searching ZIMUZU...", end="\r")

        keywords = list(keywords)
        keyword = ""
        for one in keywords:
            keyword += one + " "

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(self.search_url.format(keyword), headers=self.headers, timeout=10)
            bs_obj = BeautifulSoup(r.text, "html.parser")
            tab_text = bs_obj.find("div", {"class": "article-tab"}).text
            tab_text = tab_text
            if "字幕(0)" not in tab_text:
                for one_box in bs_obj.find_all("div", {"class": "search-item"}):
                    sub_name = "[ZMZ]" + one_box.find("p").find("font").text
                    a = one_box.find("a")
                    text = a.text
                    sub_url = self.site_url + a.attrs["href"]
                    sub_dict[sub_name] = {
                        "lan": get_type_score(text),
                        "link": sub_url,
                        "version": one_box.find("font", "f4").text,
                    }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], "")
                keywords.pop(-1)
                continue

            break

        # 第一个候选字幕没有双语
        if len(sub_dict.items()) > 0 and list(sub_dict.items())[0][1]["lan"] < 8:
            sub_dict = order_dict(
                sorted(sub_dict.items(), key=lambda e: e[1]["lan"], reverse=True)
            )
        return sub_dict

Example #6

0

Show file

    def get_path_name(self, args):

        """ 传入输入的视频名称或路径,
            构造一个包含视频路径和是否存在字幕信息的字典返回。
            video_dict: {'path': path, 'have_subtitle': sub_exists} """

        mix_str = args.replace('"', '')
        video_dict = order_dict()
        if os.path.isdir(mix_str):  # 一个文件夹
            for root, dirs, files in os.walk(mix_str):
                for one_name in files:
                    suffix = os.path.splitext(one_name)[1]
                    # 检查后缀是否为视频格式
                    if suffix not in self.video_format_list:
                        continue
                    v_name_no_format = os.path.splitext(one_name)[0]
                    sub_exists = max(
                        list(
                            map(
                                lambda sub_type:
                                    int(v_name_no_format + sub_type in files),
                                    self.sub_format_list
                            )
                        )
                    )
                    video_dict[one_name] = {'path': root,
                                            'have_subtitle': sub_exists}

        elif os.path.isabs(mix_str):  # 视频绝对路径
            v_path, v_name = os.path.split(mix_str)
            v_name_no_format = os.path.splitext(v_name)[0]
            sub_exists = max(
                list(
                    map(
                        lambda sub_type:
                            os.path.exists(
                                os.path.join(v_path, v_name_no_format+sub_type)
                            ),
                            self.sub_format_list
                    )
                )
            )
            video_dict[v_name] = {'path': os.path.dirname(mix_str),
                                  'have_subtitle': sub_exists}
        else:  # 单个视频名字，无路径
            video_dict[mix_str] = {'path': os.getcwd(), 'have_subtitle': 0}
        return video_dict

Example #7

0

Show file

File: main.py Project: fakegit/GetSubtitles

    def start(self):

        all_video_dict = self.get_path_name(self.arg_name, self.sub_store_path)

        for one_video, video_info in all_video_dict.items():

            self.s_error = ''  # 重置错误记录
            self.f_error = ''

            try:
                print('\n' + prefix + ' ' + one_video)  # 打印当前视频及其路径
                print(prefix + ' ' + video_info['path'] + '\n' + prefix)

                if video_info['have_subtitle'] and not self.over:
                    print(prefix +
                          " subtitle already exists, add '-o' to replace it.")
                    continue

                sub_dict = order_dict()
                for i, downloader in enumerate(self.downloader):
                    try:
                        sub_dict.update(
                            downloader.get_subtitles(one_video,
                                                     sub_num=self.sub_num))
                    except ValueError as e:
                        if str(e) == 'Zimuku搜索结果出现未知结构页面':
                            print(prefix + ' warn: ' + str(e))
                        else:
                            raise (e)
                    except (exceptions.Timeout, exceptions.ConnectionError):
                        print(prefix + ' connect timeout, search next site.')
                        if i < (len(self.downloader) - 1):
                            continue
                        else:
                            print(prefix + ' PLEASE CHECK YOUR NETWORK STATUS')
                            sys.exit(0)
                    if len(sub_dict) >= self.sub_num:
                        break
                if len(sub_dict) == 0:
                    self.s_error += 'no search results. '
                    continue

                extract_sub_names = []
                # 遍历字幕包直到有猜测字幕
                while not extract_sub_names and len(sub_dict) > 0:
                    exit, sub_choices = self.choose_subtitle(sub_dict)
                    if exit:
                        break
                    for i, choice in enumerate(sub_choices):
                        sub_choice, link, session = choice
                        sub_dict.pop(sub_choice)
                        try:
                            if i == 0:
                                error, n_extract_sub_names = self.process_archive(
                                    one_video, video_info, sub_choice, link,
                                    session)
                            else:
                                error, n_extract_sub_names = self.process_archive(
                                    one_video,
                                    video_info,
                                    sub_choice,
                                    link,
                                    session,
                                    rename=False,
                                    delete=False)
                            if error:
                                print(prefix + ' error: ' + error)
                                print(prefix)
                                continue
                            elif not n_extract_sub_names:
                                print(prefix +
                                      ' no matched subtitle in this archive')
                                continue
                            else:
                                extract_sub_names += n_extract_sub_names
                        except TypeError as e:
                            print(format_exc())
                            continue
                        except (rarfile.BadRarFile, TypeError) as e:
                            print(prefix + ' Error:' + str(e))
                            continue
            except rarfile.RarCannotExec:
                self.s_error += 'Unrar not installed?'
            except AttributeError:
                self.s_error += 'unknown error. try again.'
                self.f_error += format_exc()
            except Exception as e:
                self.s_error += str(e) + '. '
                self.f_error += format_exc()
            finally:
                if ('extract_sub_names' in dir() and not extract_sub_names
                        and len(sub_dict) == 0):
                    # 自动模式下所有字幕包均没有猜测字幕
                    self.s_error += " failed to guess one subtitle,"
                    self.s_error += "use '-q' to try query mode."

                if self.s_error and not self.debug:
                    self.s_error += "add --debug to get more info of the error"

                if self.s_error:
                    self.failed_list.append({
                        'name': one_video,
                        'path': video_info['path'],
                        'error': self.s_error,
                        'trace_back': self.f_error
                    })
                    print(prefix + ' error:' + self.s_error)

        if len(self.failed_list):
            print('\n===============================', end='')
            print('FAILED LIST===============================\n')
            for i, one in enumerate(self.failed_list):
                print('%2s. name: %s' % (i + 1, one['name']))
                print('%3s path: %s' % ('', one['path']))
                print('%3s info: %s' % ('', one['error']))
                if self.debug:
                    print('%3s TRACE_BACK: %s' % ('', one['trace_back']))

        print('\ntotal: %s  success: %s  fail: %s\n' %
              (len(all_video_dict), len(all_video_dict) -
               len(self.failed_list), len(self.failed_list)))

        return {
            'total': len(all_video_dict),
            'success': len(all_video_dict) - len(self.failed_list),
            'fail': len(self.failed_list),
            'fail_videos': self.failed_list
        }

Example #8

0

Show file

File: zimuku.py Project: sty001/GetSubtitles

    def get_subtitles(self, keywords, sub_num=10):

        print(prefix + ' Searching ZIMUKU...', end='\r')

        keywords = list(keywords)
        keyword = ' '.join(keywords)
        info = guessit(keyword)
        keywords.pop(0)
        keywords.insert(0, info['title'])
        if info.get('season'):
            season = str(info['season']).zfill(2)
            keywords.insert(1, 's' + season)

        sub_dict = order_dict()
        s = requests.session()
        s.headers.update(self.headers)

        while True:
            # 当前关键字搜索
            r = s.get(self.search_url + keyword, timeout=10)
            if py == 2:
                html = r.text.encode('utf8')
            else:
                html = r.text

            if '搜索不到相关字幕' not in html:
                bs_obj = BeautifulSoup(r.text, 'html.parser')

                if bs_obj.find('div', {'class': 'item'}):
                    # 综合搜索页面
                    for item in bs_obj.find_all('div', {'class': 'item'}):
                        title_boxes = item.find(
                            'div', {'class': 'title'}).find_all('p')
                        title_box = title_boxes[0]
                        sub_title_box = title_boxes[1]
                        if py == 2:
                            item_title = title_box.text.encode('utf8')
                            item_sub_title = sub_title_box.text.encode('utf8')
                        else:
                            item_title = title_box.text
                            item_sub_title = sub_title_box.text
                        item_info = guessit(item_title)
                        if info.get('year') and item_info.get('year'):
                            if info['year'] != item_info['year']:
                                # 年份不匹配，跳过
                                continue
                        item_titles = [
                            item_info.get('title', '').lower(),
                            item_info.get('alternative_title', '').lower()
                        ] + item_sub_title.lower().strip().split(',')
                        title_included = sum([
                            1 for _ in item_sub_title
                            if info['title'].lower() not in _
                        ])
                        if title_included == 0:
                            # guessit抽取标题不匹配，跳过
                            item_title_split = \
                                [one.split() for one in item_titles]
                            info_title_split = info['title'].lower().split()
                            sum1 = sum([1 for _ in info_title_split
                                        if _ in item_title_split[0]])
                            sum2 = sum([1 for _ in info_title_split
                                        if _ in item_title_split[1]])
                            if not (sum1 / len(info_title_split) >= 0.5
                                    or sum2 / len(info_title_split) >= 0.5):
                                # 标题不匹配，跳过
                                continue
                        for a in item.find_all('td', {'class': 'first'})[:3]:
                            a = a.a
                            a_link = self.site_url + a.attrs['href']
                            if py == 2:
                                a_title = a.text.encode('utf8')
                            else:
                                a_title = a.text
                            a_title = '[ZIMUKU]' + a_title
                            sub_dict[a_title] = {'type': 'default',
                                                 'link': a_link}
                elif bs_obj.find('div', {'class': 'persub'}):
                    # 射手字幕页面
                    for persub in bs_obj.find_all('div', {'class': 'persub'}):
                        if py == 2:
                            a_title = persub.h1.text.encode('utf8')
                        else:
                            a_title = persub.h1.text
                        a_link = self.site_url + persub.h1.a.attrs['href']
                        a_title = '[ZIMUKU]' + a_title
                        sub_dict[a_title] = {'type': 'shooter', 'link': a_link}
                else:
                    raise ValueError('Zimuku搜索结果出现未知结构页面')

            if len(sub_dict) >= sub_num:
                del keywords[:]
                break

            if len(keywords) > 1:
                keyword = keyword.replace(keywords[-1], '').strip()
                keywords.pop(-1)
                continue

            break

        for sub_name, sub_info in sub_dict.items():
            if sub_info['type'] == 'default':
                # 综合搜索字幕页面
                r = s.get(sub_info['link'], timeout=60)
                bs_obj = BeautifulSoup(r.text, 'html.parser')
                lang_box = bs_obj.find('ul', {'class': 'subinfo'}).find('li')
                type_score = 0
                for lang in lang_box.find_all('img'):
                    if 'uk' in lang.attrs['src']:
                        type_score += 1
                    elif 'hongkong' in lang.attrs['src']:
                        type_score += 2
                    elif 'china' in lang.attrs['src']:
                        type_score += 4
                    elif 'jollyroger' in lang.attrs['src']:
                        type_score += 8
                sub_info['lan'] = type_score
                download_link = bs_obj.find('a', {'id': 'down1'}).attrs['href']
                download_link = urljoin(self.site_url, download_link)
                r = s.get(download_link, timeout=60)
                bs_obj = BeautifulSoup(r.text, 'html.parser')
                download_link = bs_obj.find('a', {'rel': 'nofollow'})
                download_link = download_link.attrs['href']
                download_link = urljoin(self.site_url, download_link)
                sub_info['link'] = download_link
            else:
                # 射手字幕页面
                r = s.get(sub_info['link'], timeout=60)
                bs_obj = BeautifulSoup(r.text, 'html.parser')
                lang_box = bs_obj.find('ul', {'class': 'subinfo'}).find('li')
                type_score = 0
                if py == 2:
                    text = lang_box.text.encode('utf8')
                else:
                    text = lang_box.text
                if '英' in text:
                    type_score += 1
                elif '繁' in text:
                    type_score += 2
                elif '简' in text:
                    type_score += 4
                elif '双语' in text:
                    type_score += 8
                sub_info['lan'] = type_score
                download_link = bs_obj.find('a', {'id': 'down1'}).attrs['href']
                sub_info['link'] = download_link
            backup_session = requests.session()
            backup_session.headers.update(s.headers)
            backup_session.headers['Referer'] = sub_info['link']
            backup_session.cookies.update(s.cookies)
            sub_info['session'] = backup_session

        return sub_dict

Example #9

0

Show file

File: main.py Project: vsmawoex/GetSubtitles

    def start(self):

        all_video_dict = self.get_path_name(self.arg_name)

        for one_video, video_info in all_video_dict.items():

            self.s_error = ''  # 重置错误记录
            self.f_error = ''

            try:
                keywords, info_dict = self.sort_keyword(one_video)
                print('\n' + prefix + ' ' + one_video)  # 打印当前视频及其路径
                print(prefix + ' ' + video_info['path'] + '\n' + prefix)

                if video_info['have_subtitle'] and not self.over:
                    print(prefix +
                          " subtitle already exists, add '-o' to replace it.")
                    continue

                sub_dict = order_dict()
                for i, downloader in enumerate(self.downloader):
                    try:
                        sub_dict.update(
                            downloader.get_subtitles(tuple(keywords)))
                    except (exceptions.Timeout,
                            exceptions.ConnectionError) as e:
                        print(prefix + ' connect timeout, search next site.')
                        if i < (len(self.downloader) - 1):
                            continue
                        else:
                            print(prefix + ' PLEASE CHECK YOUR NETWORK STATUS')
                            sys.exit(0)
                    if len(sub_dict) >= self.sub_num:
                        break
                if len(sub_dict) == 0:
                    self.s_error += 'no search results. '
                    continue

                extract_sub_name = None
                # 遍历字幕包直到有猜测字幕
                while not extract_sub_name and len(sub_dict) > 0:
                    sub_choice, link = self.choose_subtitle(sub_dict)
                    sub_dict.pop(sub_choice)
                    if py == 2:
                        encoding = chardet.detect(sub_choice)['encoding']
                        if isinstance(sub_choice, str):
                            sub_choice = sub_choice.decode(encoding)
                        try:
                            sub_choice = sub_choice.encode(
                                GetSubtitles.output_encode)
                        except:
                            if isinstance(sub_choice, str):
                                sub_choice = sub_choice.encode(encoding)
                            sub_choice = sub_choice.decode('utf8')
                            sub_choice = sub_choice.encode(
                                GetSubtitles.output_encode)
                    if self.query:
                        print(prefix + ' ')
                    if '[ZMZ]' in sub_choice:
                        datatype, sub_data_bytes = self.zimuzu.download_file(
                            sub_choice, link)
                    elif '[SUBHD]' in sub_choice:
                        datatype, sub_data_bytes, msg = self.subhd.\
                            download_file(sub_choice, link)
                        if msg == 'false':
                            print(prefix + ' error: '
                                  'download too frequently '
                                  'with subhd downloader, '
                                  'please change to other downloaders')
                            return
                    elif '[ZIMUKU]' in sub_choice:
                        datatype, sub_data_bytes = self.zimuku.download_file(
                            sub_choice, link)

                    if datatype in self.support_file_list:
                        # 获得猜测字幕名称
                        # 查询模式必有返回值，自动模式无猜测值返回None
                        try:
                            extract_sub_name = self.extract_subtitle(
                                one_video, video_info['path'], datatype,
                                sub_data_bytes, info_dict, self.single)
                        except rarfile.BadRarFile:
                            continue
                        if extract_sub_name:
                            extract_sub_name = extract_sub_name.split('/')[-1]
                            try:
                                # zipfile: Historical ZIP filename encoding
                                # try cp437 encoding
                                extract_sub_name = extract_sub_name.\
                                    encode('cp437').decode('gbk')
                            except:
                                pass
                            try:
                                if py == 2:
                                    if isinstance(extract_sub_name, str):
                                        encoding = chardet.\
                                                detect(extract_sub_name)
                                        encoding = encoding['encoding']
                                        if 'ISO' in encoding:
                                            encoding = 'gbk'
                                        extract_sub_name = extract_sub_name.\
                                            decode(encoding)
                                        extract_sub_name = extract_sub_name.\
                                            encode(GetSubtitles.output_encode)
                                    else:
                                        extract_sub_name = extract_sub_name.\
                                            encode(GetSubtitles.output_encode)
                                print(prefix + ' ' + extract_sub_name + '\n')
                            except UnicodeDecodeError:
                                print(prefix + ' ' +
                                      extract_sub_name.encode('gbk') + '\n')
                    elif self.query:  # 查询模式下下载字幕包为不支持类型
                        print(prefix +
                              '  unsupported file type %s' % datatype[1:])

            except rarfile.RarCannotExec:
                self.s_error += 'Unrar not installed?'
            except AttributeError:
                self.s_error += 'unknown error. try again.'
                self.f_error += format_exc()
            except Exception as e:
                self.s_error += str(e) + '. '
                self.f_error += format_exc()
            finally:
                if ('extract_sub_name' in dir() and not extract_sub_name
                        and len(sub_dict) == 0):
                    # 自动模式下所有字幕包均没有猜测字幕
                    self.s_error += " failed to guess one subtitle,"
                    self.s_error += "use '-q' to try query mode."

                if self.s_error and not self.debug:
                    self.s_error += "add --debug to get more info of the error"

                if self.s_error:
                    self.failed_list.append({
                        'name': one_video,
                        'path': video_info['path'],
                        'error': self.s_error,
                        'trace_back': self.f_error
                    })
                    print(prefix + ' error:' + self.s_error)

        if len(self.failed_list):
            print('\n===============================', end='')
            print('FAILED LIST===============================\n')
            for i, one in enumerate(self.failed_list):
                print('%2s. name: %s' % (i + 1, one['name']))
                print('%3s path: %s' % ('', one['path']))
                print('%3s info: %s' % ('', one['error']))
                if self.debug:
                    print('%3s TRACE_BACK: %s' % ('', one['trace_back']))

        print('\ntotal: %s  success: %s  fail: %s\n' %
              (len(all_video_dict), len(all_video_dict) -
               len(self.failed_list), len(self.failed_list)))

Example #10

0

Show file

    def get_subtitles(self, video_name, sub_num=5):

        print(prefix + ' Searching SUBHD...', end='\r')

        keywords, info_dict = Downloader.get_keywords(video_name)
        keyword = ' '.join(keywords)

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(SubHDDownloader.search_url + keyword,
                      headers=Downloader.header,
                      timeout=10)
            bs_obj = BeautifulSoup(r.text, 'html.parser')
            try:
                small_text = bs_obj.find('small').text
            except AttributeError as e:
                char_error = 'The URI you submitted has disallowed characters'
                if char_error in bs_obj.text:
                    print(prefix + ' [SUBHD ERROR] ' + char_error + ': ' +
                          keyword)
                    return sub_dict
                # 搜索验证按钮
                time.sleep(2)
                continue

            if "总共 0 条" not in small_text:

                results = bs_obj.find_all(
                    "div", class_="mb-4 bg-white rounded shadow-sm")

                for one_box in results:

                    if info_dict['type'] == 'movie' \
                       and not one_box.find('div', class_="px-1 rounded-sm bg-danger text-white"):
                        continue

                    a = one_box.find('div', class_="f12 pt-1").find('a')
                    sub_url = SubHDDownloader.site_url + a.attrs['href']
                    sub_name = SubHDDownloader.choice_prefix + a.text
                    text = one_box.text
                    if '/a' in a.attrs['href']:
                        type_score = 0
                        type_score += ('英文' in text) * 1
                        type_score += ('繁体' in text) * 2
                        type_score += ('简体' in text) * 4
                        type_score += ('双语' in text) * 8
                        sub_dict[sub_name] = {
                            'lan': type_score,
                            'link': sub_url,
                            'session': None
                        }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], '')
                keywords.pop(-1)
                continue

            break

        if (len(sub_dict.items()) > 0
                and list(sub_dict.items())[0][1]['lan'] < 8):
            # 第一个候选字幕没有双语
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]['lan'],
                       reverse=True))
        return sub_dict

Example #11

0

Show file

    def get_subtitles(self, video_name, sub_num=5):

        print(prefix + ' Searching SUBHD...', end='\r')

        keywords, info_dict = Downloader.get_keywords(video_name)
        keyword = ' '.join(keywords)

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(SubHDDownloader.search_url + keyword,
                      headers=Downloader.header,
                      timeout=10)
            bs_obj = BeautifulSoup(r.text, 'html.parser')
            try:
                if py == 2:
                    small_text = bs_obj.find('small').text.encode('utf8')
                else:
                    small_text = bs_obj.find('small').text
            except AttributeError as e:
                char_error = 'The URI you submitted has disallowed characters'
                if char_error in bs_obj.text:
                    print(prefix + ' [SUBHD ERROR] ' + char_error + ': ' +
                          keyword)
                    return sub_dict
                # 搜索验证按钮
                time.sleep(2)
                continue

            if '总共 0 条' not in small_text:
                for one_box in bs_obj.find_all('div', {'class': 'box'}):

                    if info_dict['type'] == 'movie' \
                       and not one_box.find('div', {'class': 'movielist'}):
                        continue

                    a = one_box.find('div', {'class': 'd_title'}).find('a')
                    sub_url = SubHDDownloader.site_url + a.attrs['href']
                    sub_name = SubHDDownloader.choice_prefix + a.text.encode('utf8') if py == 2 \
                        else SubHDDownloader.choice_prefix + a.text
                    if py == 2:
                        text = one_box.text.encode('utf8')
                    else:
                        text = one_box.text
                    if '/ar' in a.attrs['href']:
                        type_score = 0
                        type_score += ('英文' in text) * 1
                        type_score += ('繁体' in text) * 2
                        type_score += ('简体' in text) * 4
                        type_score += ('双语' in text) * 8

                        # no_dot_text=text.replace('.',' ').lower()
                        # for qkeyword in keywords:
                        #     if no_dot_text.find(qkeyword.strip().lower()) != -1:
                        #         type_score += 3

                        sub_dict[sub_name] = {
                            'lan': type_score,
                            'link': sub_url,
                            'session': None
                        }

                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], '')
                keywords.pop(-1)
                continue

            break

        if (len(sub_dict.items()) > 0
                and list(sub_dict.items())[0][1]['lan'] < 8):
            # 第一个候选字幕没有双语
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]['lan'],
                       reverse=True))
        return sub_dict

Example #12

0

Show file

File: zimuku.py Project: dindom999/getsubtitle

    def get_subtitles(self, keywords, sub_num=10):

        print(prefix + " Searching ZIMUKU...", end="\r")

        keywords = list(keywords)
        keyword = " ".join(keywords)
        info = guessit(keyword)
        keywords.pop(0)
        keywords.insert(0, info["title"])
        if info.get("season"):
            season = str(info["season"]).zfill(2)
            keywords.insert(1, "s" + season)

        sub_dict = order_dict()
        s = requests.session()
        s.headers.update(self.headers)

        while True:
            # 当前关键字搜索
            r = s.get(self.search_url + keyword, timeout=10)
            html = r.text

            if "搜索不到相关字幕" not in html:
                bs_obj = BeautifulSoup(r.text, "html.parser")

                if bs_obj.find("div", {"class": "item"}):
                    # 综合搜索页面
                    for item in bs_obj.find_all("div", {"class": "item"}):
                        title_boxes = item.find("div", {
                            "class": "title"
                        }).find_all("p")
                        title_box = title_boxes[0]
                        sub_title_box = title_boxes[1]
                        item_title = title_box.text
                        item_sub_title = sub_title_box.text
                        item_info = guessit(item_title)
                        if info.get("year") and item_info.get("year"):
                            if info["year"] != item_info["year"]:
                                # 年份不匹配，跳过
                                continue
                        item_titles = [
                            item_info.get("title", "").lower(),
                            item_info.get("alternative_title", "").lower(),
                        ] + item_sub_title.lower().strip().split(",")
                        title_included = sum([
                            1 for _ in item_sub_title
                            if info["title"].lower() not in _
                        ])
                        if title_included == 0:
                            # guessit抽取标题不匹配，跳过
                            item_title_split = [
                                one.split() for one in item_titles
                            ]
                            info_title_split = info["title"].lower().split()
                            sum1 = sum([
                                1 for _ in info_title_split
                                if _ in item_title_split[0]
                            ])
                            sum2 = sum([
                                1 for _ in info_title_split
                                if _ in item_title_split[1]
                            ])
                            if not (sum1 / len(info_title_split) >= 0.5
                                    or sum2 / len(info_title_split) >= 0.5):
                                # 标题不匹配，跳过
                                continue
                        for a in item.find_all("td", {"class": "first"})[:3]:
                            a = a.a
                            a_link = self.site_url + a.attrs["href"]
                            a_title = a.text
                            a_title = "[ZIMUKU]" + a_title
                            sub_dict[a_title] = {
                                "type": "default",
                                "link": a_link
                            }
                elif bs_obj.find("div", {"class": "persub"}):
                    # 射手字幕页面
                    for persub in bs_obj.find_all("div", {"class": "persub"}):
                        a_title = persub.h1.text
                        a_link = self.site_url + persub.h1.a.attrs["href"]
                        a_title = "[ZIMUKU]" + a_title
                        sub_dict[a_title] = {"type": "shooter", "link": a_link}
                else:
                    raise ValueError("Zimuku搜索结果出现未知结构页面")

            if len(sub_dict) >= sub_num:
                del keywords[:]
                break

            if len(keywords) > 1:
                keyword = keyword.replace(keywords[-1], "").strip()
                keywords.pop(-1)
                continue

            break

        for sub_name, sub_info in sub_dict.items():
            if sub_info["type"] == "default":
                # 综合搜索字幕页面
                r = s.get(sub_info["link"], timeout=60)
                bs_obj = BeautifulSoup(r.text, "html.parser")
                lang_box = bs_obj.find("ul", {"class": "subinfo"}).find("li")
                type_score = 0
                for lang in lang_box.find_all("img"):
                    if "uk" in lang.attrs["src"]:
                        type_score += 1
                    elif "hongkong" in lang.attrs["src"]:
                        type_score += 2
                    elif "china" in lang.attrs["src"]:
                        type_score += 4
                    elif "jollyroger" in lang.attrs["src"]:
                        type_score += 8
                sub_info["lan"] = type_score
                download_link = bs_obj.find("a", {"id": "down1"}).attrs["href"]
                download_link = urljoin(self.site_url, download_link)
                r = s.get(download_link, timeout=60)
                bs_obj = BeautifulSoup(r.text, "html.parser")
                download_link = bs_obj.find("a", {"rel": "nofollow"})
                download_link = download_link.attrs["href"]
                download_link = urljoin(self.site_url, download_link)
                sub_info["link"] = download_link
            else:
                # 射手字幕页面
                r = s.get(sub_info["link"], timeout=60)
                bs_obj = BeautifulSoup(r.text, "html.parser")
                lang_box = bs_obj.find("ul", {"class": "subinfo"}).find("li")
                text = lang_box.text
                sub_info["lan"] = get_type_score(text)
                download_link = bs_obj.find("a", {"id": "down1"}).attrs["href"]
                sub_info["link"] = download_link
            backup_session = requests.session()
            backup_session.headers.update(s.headers)
            backup_session.headers["Referer"] = sub_info["link"]
            backup_session.cookies.update(s.cookies)
            sub_info["session"] = backup_session

        return sub_dict

Example #13

0

Show file

    def start(self):

        all_video_dict = self.get_path_name(self.arg_name)

        for one_video, video_info in all_video_dict.items():

            self.s_error = ''  # 重置错误记录
            self.f_error = ''

            print('\n├ ' + one_video)  # 打印当前视频及其路径
            print('├ ' + video_info['path'] + '\n├')

            if video_info['have_subtitle'] and not self.over:
                print("├ subtitle already exists, add '-o' to replace it.")
                continue
            try:
                keywords, info_dict = self.sort_keyword(one_video)
                sub_dict = order_dict()
                for downloader in self.downloader:
                    sub_dict.update(
                        downloader.get_subtitles(tuple(keywords),
                                                 sub_num=self.sub_num))
                    if len(sub_dict) >= self.sub_num:
                        break
                if len(sub_dict) == 0:
                    self.s_error += 'no search results'
                    continue

                extract_sub_name = None
                while not extract_sub_name and len(
                        sub_dict) > 0:  # 遍历字幕包直到有猜测字幕
                    sub_choice = self.choose_subtitle(sub_dict)
                    if self.query:
                        print('├ ')
                    if '[ZMZ]' in sub_choice:
                        datatype, sub_data_bytes = self.zimuzu.download_file(
                            sub_choice, sub_dict[sub_choice]['link'])
                    elif '[SUBHD]' in sub_choice:
                        datatype, sub_data_bytes = self.subhd.download_file(
                            sub_choice, sub_dict[sub_choice]['link'])

                    if datatype in self.support_file_list:
                        # 获得猜测字幕名称，查询模式必有返回值，自动模式无猜测值返回None
                        extract_sub_name = self.extract_subtitle(
                            one_video, video_info['path'], datatype,
                            sub_data_bytes, info_dict)
                        if extract_sub_name:
                            print('├ ' + extract_sub_name + '\n')
                    elif self.query:  # 查询模式下下载字幕包为不支持类型
                        print('├  unsupported file type %s' % datatype[1:])
                    sub_dict.pop(sub_choice)
            except exceptions.Timeout or exceptions.ConnectionError:
                self.s_error += 'connect failed, check network status.'
            except rarfile.RarCannotExec:
                self.s_error += 'Unrar not installed?'
            except AttributeError:
                self.s_error += 'unknown error. try again.'
                self.f_error += format_exc()
            except Exception as e:
                self.s_error += str(e) + '. '
                self.f_error += format_exc()
            finally:
                if 'extract_sub_name' in dir(
                ) and not extract_sub_name and len(sub_dict) == 0:
                    # 自动模式下所有字幕包均没有猜测字幕
                    self.s_error += " failed to guess one subtitle, use '-q' to try query mode."

                if self.s_error and not self.debug:
                    self.s_error += "add --debug to get more info of the error"

                if self.s_error:
                    self.failed_list.append({
                        'name': one_video,
                        'path': video_info['path'],
                        'error': self.s_error,
                        'trace_back': self.f_error
                    })
                    print('├ error:' + self.s_error)

        if len(self.failed_list):
            print(
                '\n===============================FAILED LIST===============================\n'
            )
            for i, one in enumerate(self.failed_list):
                print('%2s. name: %s' % (i + 1, one['name']))
                print('%3s path: %s' % ('', one['path']))
                print('%3s info: %s' % ('', one['error']))
                if self.debug:
                    print('%3s TRACE_BACK: %s' % ('', one['trace_back']))

        print('\ntotal: %s  success: %s  fail: %s\n' %
              (len(all_video_dict), len(all_video_dict) -
               len(self.failed_list), len(self.failed_list)))

Example #14

0

Show file

    def get_subtitles(self, keywords, sub_num=5):
        """ 传入关键字列表，返回有序字典。
                keywords:重要度降序的关键字列表
                sub_num: 字幕结果数，默认为5
            返回：
                字幕字典:{
                            '字幕名': {'lan': '字幕包含语言值',
                                       'link': '字幕链接'}
                         }
                         按语言值降序排列
                字幕包含语言值：英文加1， 繁体加2， 简体加4， 双语加8 """

        print(prefix + " Searching SUBHD...", end="\r")

        keywords = list(keywords)
        keyword = ""
        for one in keywords:
            keyword += one + " "

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(self.search_url + keyword,
                      headers=self.headers,
                      timeout=10)
            bs_obj = BeautifulSoup(r.text, "html.parser")
            try:
                small_text = bs_obj.find("small").text
            except AttributeError as e:
                char_error = "The URI you submitted has disallowed characters"
                if char_error in bs_obj.text:
                    print(prefix + " [SUBHD ERROR] " + char_error + ": " +
                          keyword)
                    return None
                # 搜索验证按钮
                time.sleep(2)
                continue

            if "总共 0 条" not in small_text:
                for one_box in bs_obj.find_all("div", {"class": "box"}):
                    a = one_box.find("div", {"class": "d_title"}).find("a")
                    sub_url = self.site_url + a.attrs["href"]
                    sub_name = "[SUBHD]" + a.text
                    text = one_box.text
                    if "/ar" in a.attrs["href"]:
                        sub_dict[sub_name] = {
                            "lan": get_type_score(text),
                            "link": sub_url,
                            "version": a.attrs["title"],
                        }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], "")
                keywords.pop(-1)
                continue

            break

        if len(sub_dict.items()) > 0 and list(
                sub_dict.items())[0][1]["lan"] < 8:
            # 第一个候选字幕没有双语
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]["lan"],
                       reverse=False))
        return sub_dict

Example #15

0

Show file

File: main.py Project: fakegit/GetSubtitles

    def get_path_name(self, args, args1):
        """ 传入输入的视频名称或路径,
            构造一个包含视频路径和是否存在字幕信息的字典返回。
            video_dict: {'path': path, 'have_subtitle': sub_exists} """

        mix_str = args.replace('"', '')
        if args1:
            store_path = args1.replace('"', '')
        else:
            store_path = ''
        store_path_files = []
        if not os.path.isdir(store_path):
            print(
                'no valid path specfied,download sub file to video file location.'
            )
            store_path = ''
        else:
            for root, dirs, files in os.walk(store_path):
                store_path_files.extend(files)
        video_dict = order_dict()
        if os.path.isdir(mix_str):  # 一个文件夹
            for root, dirs, files in os.walk(mix_str):
                for one_name in files:
                    suffix = os.path.splitext(one_name)[1]
                    # 检查后缀是否为视频格式
                    if suffix not in self.video_format_list:
                        continue
                    v_name_no_format = os.path.splitext(one_name)[0]
                    sub_exists = max(
                        list(
                            map(
                                lambda sub_type:
                                int(v_name_no_format + sub_type in files +
                                    store_path_files or v_name_no_format +
                                    '.zh' + sub_type in files +
                                    store_path_files), self.sub_format_list)))
                    video_dict[one_name] = {
                        'path':
                        next(item
                             for item in [store_path,
                                          os.path.abspath(root)]
                             if item != ''),
                        'have_subtitle':
                        sub_exists
                    }

        elif os.path.isabs(mix_str):  # 视频绝对路径
            v_path, v_name = os.path.split(mix_str)
            v_name_no_format = os.path.splitext(v_name)[0]
            if os.path.isdir(store_path):
                s_path = os.path.abspath(store_path)
            else:
                s_path = v_path
            sub_exists = max(
                list(
                    map(
                        lambda sub_type: os.path.exists(
                            os.path.join(s_path, v_name_no_format + sub_type)),
                        self.sub_format_list)))
            video_dict[v_name] = {'path': s_path, 'have_subtitle': sub_exists}
        else:  # 单个视频名字，无路径
            if not os.path.isdir(store_path):
                video_dict[mix_str] = {'path': os.getcwd(), 'have_subtitle': 0}
            else:
                video_dict[mix_str] = {
                    'path': os.path.abspath(store_path),
                    'have_subtitle': 0
                }
        return video_dict

Example #16

0

Show file

File: subhd.py Project: zll2/GetSubtitles

    def get_subtitles(self, video_name, sub_num=5):

        print("Searching SUBHD...", end="\r")

        keywords, info_dict = Downloader.get_keywords(video_name)
        keyword = " ".join(keywords)

        sub_dict = order_dict()
        s = requests.session()
        s.headers.update(Downloader.header)
        while True:
            # 当前关键字查询
            r = s.get(
                SubHDDownloader.search_url + keyword,
                timeout=10,
            )
            bs_obj = BeautifulSoup(r.text, "html.parser")
            try:
                small_text = bs_obj.find("small").text
            except AttributeError:
                char_error = "The URI you submitted has disallowed characters"
                if char_error in bs_obj.text:
                    print("[SUBHD ERROR] " + char_error + ": " + keyword)
                    return sub_dict
                # 搜索验证按钮
                time.sleep(2)
                continue

            if "总共 0 条" not in small_text:

                results = bs_obj.find_all(
                    "div", class_="mb-4 bg-white rounded shadow-sm")

                for one_box in results:

                    if info_dict["type"] == "movie" and not one_box.find(
                            "div",
                            class_="px-1 rounded-sm bg-danger text-white"):
                        continue

                    a = one_box.find("div", class_="f12 pt-1").find("a")
                    sub_url = SubHDDownloader.site_url + a.attrs["href"]
                    sub_name = SubHDDownloader.choice_prefix + a.text
                    text = one_box.text
                    if "/a" in a.attrs["href"]:
                        type_score = 0
                        type_score += ("英文" in text) * 1
                        type_score += ("繁体" in text) * 2
                        type_score += ("简体" in text) * 4
                        type_score += ("双语" in text) * 8
                        sub_dict[sub_name] = {
                            "lan": type_score,
                            "link": sub_url,
                            "session": None,
                        }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], "")
                keywords.pop(-1)
                continue

            break

        if len(sub_dict.items()) > 0 and list(
                sub_dict.items())[0][1]["lan"] < 8:
            # 第一个候选字幕没有双语
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]["lan"],
                       reverse=True))
        return sub_dict

Example #17

0

Show file

File: subhd.py Project: ahgan/GetSubtitles

    def get_subtitles(self, keywords, sub_num=5):
        """ 传入关键字列表，返回有序字典。
                keywords:重要度降序的关键字列表
                sub_num: 字幕结果数，默认为5
            返回：
                字幕字典:{
                            '字幕名': {'lan': '字幕包含语言值',
                                       'link': '字幕链接'}
                         }
                         按语言值降序排列
                字幕包含语言值：英文加1， 繁体加2， 简体加4， 双语加8 """

        print(prefix + ' Searching SUBHD...', end='\r')

        keywords = list(keywords)
        keyword = ''
        for one in keywords:
            keyword += (one + ' ')

        sub_dict = order_dict()
        s = requests.session()
        while True:
            # 当前关键字查询
            r = s.get(self.search_url + keyword,
                      headers=self.headers,
                      timeout=10)
            bs_obj = BeautifulSoup(r.text, 'html.parser')
            try:
                if py == 2:
                    small_text = bs_obj.find('small').text.encode('utf8')
                else:
                    small_text = bs_obj.find('small').text
            except AttributeError:
                char_error = 'The URI you submitted has disallowed characters'
                if char_error in bs_obj.text:
                    print(prefix + ' [SUBHD ERROR] ' + char_error + ': ' +
                          keyword)
                    return None

            if '总共 0 条' not in small_text:
                for one_box in bs_obj.find_all('div', {'class': 'box'}):
                    a = one_box.find('div', {'class': 'd_title'}).find('a')
                    sub_url = self.site_url + a.attrs['href']
                    sub_name = '[SUBHD]' + a.text.encode('utf8') if py == 2 \
                               else '[SUBHD]' + a.text
                    if py == 2:
                        text = one_box.text.encode('utf8')
                    else:
                        text = one_box.text
                    if '/ar' in a.attrs['href']:
                        type_score = 0
                        type_score += ('英文' in text) * 1
                        type_score += ('繁体' in text) * 2
                        type_score += ('简体' in text) * 4
                        type_score += ('双语' in text) * 8
                        sub_dict[sub_name] = {
                            'lan': type_score,
                            'link': sub_url,
                            'ref': self.search_url + keyword
                        }
                    if len(sub_dict) >= sub_num:
                        del keywords[:]  # 字幕条数达到上限，清空keywords
                        break

            if len(keywords) > 1:  # 字幕数未满，更换关键词继续查询
                keyword = keyword.replace(keywords[-1], '')
                keywords.pop(-1)
                continue

            break

        if (len(sub_dict.items()) > 0
                and list(sub_dict.items())[0][1]['lan'] < 8):
            # 第一个候选字幕没有双语
            sub_dict = order_dict(
                sorted(sub_dict.items(),
                       key=lambda e: e[1]['lan'],
                       reverse=True))
        return sub_dict