Python compute_subtitle_score Examples, getsub.util.compute_subtitle_score Python Examples

Example #1

0

Show file

 def test_compute_subtitle_score(self):
     for test_sub, score in TestGuessSubtitle.test_episode_subs:
         self.assertEqual(
             compute_subtitle_score(TestGuessSubtitle.test_episode_info,
                                    test_sub),
             score,
         )
     for test_sub, score in TestGuessSubtitle.test_movie_subs:
         self.assertEqual(
             compute_subtitle_score(TestGuessSubtitle.test_movie_info,
                                    test_sub),
             score,
         )

Example #2

0

Show file

 def test_compute_subtitle_score_not_match_episode(self):
     for test_package, score in TestGuessSubtitle.test_episode_package:
         self.assertEqual(
             compute_subtitle_score(
                 TestGuessSubtitle.test_episode_info,
                 test_package,
                 match_episode=False,
             ),
             score,
         )

Example #3

0

Show file

    def _parse_episode_page(self, session, link, info, match_episode=True):
        """
        compute scores for each subtitle in the episode page
        params:
            session: request.Session
            link: str, episode page link
            info: dict, result of guessit
        return:
            subs: OrderedDict, format same as get_subtitles
                  sorted in ascending order of scores
        """
        def _get_archive_dowload_link(sub_page_link):
            r = session.get(sub_page_link)
            bs_obj = BeautifulSoup(r.text, "html.parser")
            down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"]
            down_page_link = urljoin(ZimukuDownloader.site_url, down_page_link)
            r = session.get(down_page_link)
            bs_obj = BeautifulSoup(r.text, "html.parser")
            download_link = bs_obj.find("a", {"rel": "nofollow"})
            download_link = download_link.attrs["href"]
            download_link = urljoin(ZimukuDownloader.site_url, download_link)
            return download_link

        r = session.get(link)
        bs_obj = BeautifulSoup(r.text, "html.parser")
        subs_body = bs_obj.find("div",
                                class_="subs box clearfix").find("tbody")
        subs = dict()
        for sub in subs_body.find_all("tr"):
            a = sub.find("a")
            name = extract_name(a.text, en=True)

            score = compute_subtitle_score(info,
                                           name,
                                           match_episode=match_episode)
            if score == -1:
                continue
            type_score = 0
            for img in sub.find("td", class_="tac lang").find_all("img"):
                if "uk" in img.attrs["src"]:
                    type_score += 1
                elif "hongkong" in img.attrs["src"]:
                    type_score += 2
                elif "china" in img.attrs["src"]:
                    type_score += 4
                elif "jollyroger" in img.attrs["src"]:
                    type_score += 8

            sub_page_link = urljoin(ZimukuDownloader.site_url, a.attrs["href"])
            download_link = _get_archive_dowload_link(sub_page_link)

            backup_session = copy.deepcopy(session)
            backup_session.headers["Referer"] = link

            # TODO: consider download times when computing scores
            subs[ZimukuDownloader.choice_prefix + name] = {
                "link": download_link,
                "lan": type_score,
                "session": backup_session,
                "score": score,
            }

        return subs

Example #4

0

Show file

    def get_subtitles(self, video, sub_num=10):

        print("Searching ZIMUKU...", end="\r")

        keywords = self.get_keywords(video)
        info_dict = video.info

        s = requests.session()
        s.headers.update(Downloader.header)

        sub_dict = dict()
        for i in range(len(keywords), 1, -1):
            keyword = ".".join(keywords[:i])
            r = s.get(ZimukuDownloader.search_url + keyword, timeout=10)
            html = r.text

            if "搜索不到相关字幕" in html:
                continue

            bs_obj = BeautifulSoup(r.text, "html.parser")

            # 综合搜索页面
            if bs_obj.find("div", {"class": "item"}):
                for item in bs_obj.find_all("div", {"class": "item"}):
                    title_a = item.find("p", class_="tt clearfix").find("a")
                    if info_dict["type"] == "episode":
                        title = title_a.text
                        season_cn1 = re.search("第(.*)季",
                                               title).group(1).strip()
                        season_cn2 = num_to_cn(str(info_dict["season"]))
                        if season_cn1 != season_cn2:
                            continue
                    episode_link = ZimukuDownloader.site_url + title_a.attrs[
                        "href"]
                    new_subs = self._parse_episode_page(
                        s, episode_link, info_dict)
                    if not new_subs:
                        new_subs = self._parse_episode_page(
                            s, episode_link, info_dict, match_episode=False)
                    sub_dict.update(new_subs)

            # 射手字幕页面
            elif bs_obj.find("div", {"class": "persub"}):
                for persub in bs_obj.find_all("div", {"class": "persub"}):
                    title = persub.h1.text.split("/")[-1]
                    # NOTE: this will filter out all subtitle packages
                    score = compute_subtitle_score(info_dict, title)
                    if score == -1:
                        continue
                    link = ZimukuDownloader.site_url + persub.h1.a.attrs["href"]
                    sub = self._parse_shooter_episode_page(s, title, link)
                    sub[list(sub.keys())[0]]["score"] = score
                    sub_dict.update(sub)

            else:
                raise ValueError("zimuku downloader needs updates")

            if len(sub_dict) >= sub_num:
                del keywords[:]
                break

        sub_dict = OrderedDict(
            sorted(sub_dict.items(), key=lambda e: e[1]["score"],
                   reverse=True))
        keys = list(sub_dict.keys())[:sub_num]
        return {key: sub_dict[key] for key in keys}