async def search_danmaku( self, keyword: str, *, callback: Callable[[DanmakuMeta], None] = None, co_callback: Callable[[DanmakuMeta], Coroutine] = None ) -> None: """ 搜索弹幕库 """ async def run(searcher: DanmakuSearcher): logger.info(f"{searcher.__class__.__name__} is searching for [{keyword}]") if callback is not None: async for item in searcher._search(keyword): callback(item) return if co_callback is not None: async for item in searcher._search(keyword): await co_callback(item) searchers = self._loader.get_danmaku_searcher() if not searchers: logger.warning(f"No danmaku searcher enabled") return logger.info(f"Searching Danmaku -> [{keyword}], enabled engines: {len(searchers)}") start_time = perf_counter() await asyncio.wait([run(s) for s in searchers]) end_time = perf_counter() logger.info(f"Searching danmaku finished in {end_time - start_time:.2f}s")
def parse_one_page(self, keyword: str, page: int): """处理一页的所有番剧摘要信息, 同时返回当前页面的 HTML""" logger.info(f"Searching for: {keyword}, page: {page}") resp = self.get(self._search_api, params={ 'query': keyword, 'page': page }) if resp.status_code != 200 or "0纪录" in resp.text: logger.info(f"No search result for {keyword}") return [], "" ret = [] anime_meta_list = self.xpath( resp.text, '//div[@class="cell blockdiff2"] | //div[@class="cell blockdiff"]') for meta in anime_meta_list: anime = AnimeMetaInfo() anime.title = meta.xpath( './/a[@class="cell_imform_name"]/text()')[0] anime.cover_url = "https:" + meta.xpath( './/a[@class="cell_poster"]/img/@src')[0] anime.category = meta.xpath( '//div[@class="cell_imform_kv"][7]/span[2]/text()')[0] anime.detail_page_url = meta.xpath("a/@href")[ 0] # "/detail/20170172" ret.append(anime) return ret, resp.text
def enable_danmaku(self, danmaku: str) -> bool: """启用某个弹幕搜索引擎""" if danmaku in self._danmaku_engine: logger.info(f"Danmaku Engine {danmaku} has already loaded") return True # 已经启用了 self._load_danmaku(danmaku) # 动态加载引擎 return GLOBAL_CONFIG.enable_danmaku(danmaku) # 更新配置文件
def get_detail(self, detail_page_url: str): url = self._base_url + detail_page_url logger.info(f"Parsing detail page: {url}") resp = self.get(url) if resp.status_code != 200: logger.warning(f"Response error: {resp.status_code} {url}") return AnimeDetailInfo() body = self.xpath(resp.text, '//div[@class="fire l"]')[0] anime_detail = AnimeDetailInfo() anime_detail.title = body.xpath("./div/h1/text()")[0] anime_detail.category = " ".join( body.xpath('.//div[@class="sinfo"]/span[3]/a/text()')) anime_detail.desc = body.xpath( './/div[@class="info"]/text()')[0].replace("\r\n", "").strip() anime_detail.cover_url = body.xpath( './/div[@class="thumb l"]/img/@src')[0] vc = VideoCollection() vc.name = "播放列表" video_blocks = body.xpath('.//div[@class="movurl"]//li') for block in video_blocks: video = Video() video.name = block.xpath("./a/text()")[0] video.raw_url = block.xpath("./a/@href")[0] # '/v/3849-162.html' video.handler = "YHDMVideoHandler" vc.append(video) anime_detail.append(vc) return anime_detail
def load_utils_module(self, module: str): """ 只加载工具模块, 用于资源数据的解析 """ py_module = import_module(module) for name, cls in getmembers(py_module, isclass): if issubclass(cls, AnimeDetailParser) and cls != AnimeDetailParser \ and module not in self._anime_detail_parsers: self._anime_detail_parsers[module] = cls() logger.info(f"Loading {name}: {cls}") if issubclass(cls, AnimeUrlParser) and cls != AnimeUrlParser \ and module not in self._anime_url_parsers: self._anime_url_parsers[module] = cls() logger.info(f"Loading {name}: {cls}") if issubclass(cls, StreamProxy) and cls != StreamProxy \ and module not in self._anime_proxy_cls: self._anime_proxy_cls[module] = cls # 只加载 class, 动态创建 logger.info(f"Loading {name}: {cls}") if issubclass(cls, DanmakuDetailParser) and cls != DanmakuDetailParser \ and module not in self._danmaku_detail_parsers: self._danmaku_detail_parsers[module] = cls() logger.info(f"Loading {name}: {cls}") if issubclass(cls, DanmakuDataParser) and cls != DanmakuDataParser \ and module not in self._danmaku_data_parsers: self._danmaku_data_parsers[module] = cls() logger.info(f"Loading {name}: {cls}")
def enable_engine(self, engine: str) -> bool: """启用某个动漫搜索引擎""" if engine in self._engines: logger.info(f"Anime Engine {engine} has already loaded") return True # 已经启用了 self._load_engine(engine) # 动态加载引擎 return GLOBAL_CONFIG.enable_engine(engine) # 更新配置文件
def enable_danmaku(self, danmaku: str) -> bool: """启用某个弹幕引擎""" if danmaku in self.get_all_danmaku(): logger.info(f"Danmaku {danmaku} enabled") self._dict["danmaku"][danmaku] = True self._save() return True return False
def enable_engine(self, engine: str) -> bool: """启用某个引擎""" if engine in self.get_all_engines(): logger.info(f"Engine {engine} enabled") self._dict["engines"][engine] = True self._save() return True return False
def disable_engine(self, engine: str) -> bool: """禁用某个动漫搜索引擎, engine: api.engines.xxx""" if engine not in self._engines: logger.info(f"Anime Engine {engine} has already disabled") return True # 本来就没启用 self._engines.pop(engine) logger.info(f"Disabled Anime Engine: {engine}") return GLOBAL_CONFIG.disable_engine(engine)
def disable_danmaku(self, danmaku: str) -> bool: """禁用某个弹幕搜索引擎, engine: api.danmaku.xxx""" if danmaku not in self._danmaku_engine: logger.info(f"Danmaku Engine {danmaku} has already disabled") return True # 本来就没启用 self._danmaku_engine.pop(danmaku) logger.info(f"Disabled Danmaku Engine: {danmaku}") return GLOBAL_CONFIG.disable_danmaku(danmaku)
def __init__(self): self._file = os.path.dirname(__file__) + os.sep + "config.json" self._dict = {} logger.info(f"Loading config from {self._file}") with open(self._file, "r") as f: self._dict = json.load(f)
def set_proxy_headers(self): # 有些视频是超星学习通网盘里面的, 需要设置为客户端的 UA, 直接访问会 403 real_url = self.get_cached_real_url() if "chaoxing.com" in real_url: logger.info(f"Set proxy headers for {real_url}") return { "User-Agent": "Dalvik/2.1.0 (Linux; U; Android 8.1.0; 16th Build/OPM1.171019.026)" }
async def run(searcher: AnimeSearcher): logger.info(f"{searcher.__class__.__name__} is searching for [{keyword}]") if callback is not None: async for item in searcher._search(keyword): callback(item) # 每产生一个搜索结果, 通过回调函数处理 return if co_callback is not None: async for item in searcher._search(keyword): await co_callback(item)
async def run(searcher: DanmakuSearcher): logger.info(f"{searcher.__class__.__name__} is searching for [{keyword}]") if callback is not None: async for item in searcher._search(keyword): callback(item) return if co_callback is not None: async for item in searcher._search(keyword): await co_callback(item)
async def parse_anime_real_url(self, anime: Anime) -> AnimeInfo: """解析一集视频的直链""" url_parser = self._loader.get_anime_url_parser(anime.module) logger.info(f"{url_parser.__class__.__name__} parsing {anime.raw_url}") url = await url_parser._parse(anime.raw_url) if url.is_available(): return url logger.warning(f"Parse real url failed") return AnimeInfo()
async def parse_danmaku_detail(self, meta: DanmakuMeta) -> DanmakuDetail: """解析弹幕库详情信息""" detail_parser = self._loader.get_danmaku_detail_parser(meta.module) if not detail_parser: self._loader.load_utils_module(meta.module) detail_parser = self._loader.get_danmaku_detail_parser(meta.module) logger.info(f"{detail_parser.__class__.__name__} parsing {meta.play_url}") if detail_parser is not None: return await detail_parser._parse(meta.play_url) return DanmakuDetail()
def parse_response(data: str) -> dict: """处理接口响应数据""" try: # 3s内频繁搜索会导致接口返回异常提醒 # 有时候部分数据未加载导致 json 格式错误 data = data.replace("&comments&", "").replace("&is_fav&", "0") return json.loads(data) except JSONDecodeError: logger.info("Please wait for 3s...") return {}
def _get_stream_with_range(self): """按客户端请求头设置的 Range 范围获取视频流""" byte_start = 0 range_header = request.headers.get("Range", None) logger.info(f"Client header: Range={range_header}") if range_header: result = re.search(r"(\d+)-\d*", range_header) if result: byte_start = int(result.group(1)) # 客户端要求的视频流起始位置 return self._get_stream_from_server(byte_start)
async def _detect_lifetime(self): """尝试从直链中找到资源失效时间戳, 计算直链寿命""" ts_start = int(time() / 1e5) # 当前时间戳的前5位 stamps = re.findall(rf"{ts_start}\d{{5}}", self._url) for stamp in stamps: lifetime = int(stamp) - int(time()) if lifetime > 60: # 有效期大于 1 分钟的算有效 logger.info(f"Found timestamp in real url, resource left lifetime: {lifetime}s") return lifetime return self._lifetime
def _load_danmaku(self, danmaku: str): """按照配置加载弹幕库引擎 @danmaku: api.danmaku.xxx """ module = import_module(danmaku) for _, cls in getmembers(module, isclass): if issubclass(cls, DanmakuEngine) and cls != DanmakuEngine: self._danmaku_engine.setdefault(cls.__module__, cls) # 'api.danmaku.xxx': <class 'api.danmaku.xx.xxEngine'> logger.info(f"Loading DanmakuEngine {cls.__module__}.{cls.__name__}: {cls}")
async def parse_anime_detail(self, meta: AnimeMeta) -> AnimeDetail: """解析番剧详情页信息""" detail_parser = self._loader.get_anime_detail_parser(meta.module) if not detail_parser: # 直接访问直链, 且配置文件已关闭模块, 把工具类加载起来完成解析 self._loader.load_utils_module(meta.module) detail_parser = self._loader.get_anime_detail_parser(meta.module) logger.info(f"{detail_parser.__class__.__name__} parsing {meta.detail_url}") if detail_parser is not None: return await detail_parser._parse(meta.detail_url) return AnimeDetail()
async def parse_danmaku_data(self, danmaku: Danmaku) -> DanmakuData: """解析一集弹幕的数据""" data_parser = self._loader.get_danmaku_data_parser(danmaku.module) logger.debug(f"{data_parser.__class__.__name__} parsing {danmaku.cid}") if data_parser is not None: start_time = perf_counter() data = await data_parser._parse(danmaku.cid) end_time = perf_counter() logger.info(f"Reading danmaku data finished in {end_time - start_time:.2f}s") return data return DanmakuData()
def unload_full_module(self, module: str) -> None: """卸载模块名对应的引擎""" if module.startswith("api.anime"): self._anime_searchers.pop(module, None) self._anime_detail_parsers.pop(module, None) self._anime_url_parsers.pop(module, None) self._anime_proxy_cls.pop(module, None) if module.startswith("api.danmaku"): self._danmaku_searchers.pop(module, None) self._danmaku_detail_parsers.pop(module, None) self._danmaku_data_parsers.pop(module, None) logger.info(f"Unloaded <module {module}>")
def fetch_rss() -> Dict[str, int]: collection = RssFetcher.from_file() logger.info(f"Fetch RSS feeds from {len(collection.sources)} sources") rss_results = OrderedDict({'total': 0, 'successful': 0, 'failed': 0}) for article in collection.fetch_all(): rss_results = save_resource(article, rss_results) logger.info( f"Added {rss_results['successful']} resources, {rss_results['failed']} errors" ) return rss_results
def fetch_newsapi() -> Dict[str, int]: newsapi = NewsAPIClient() logger.info( f"Getting articles from {len(newsapi.DEFAULT_SOURCES)} sources") api_results = OrderedDict({'total': 0, 'successful': 0, 'failed': 0}) for article in newsapi.get_top_headlines(): api_results = save_resource(article, api_results) logger.info( f"Added {api_results['successful']} resources, {api_results['failed']} errors" ) return api_results
async def parse_anime_real_url(self, anime: Anime) -> AnimeInfo: """解析一集视频的直链""" url_parser = self._loader.get_anime_url_parser(anime.module) logger.info(f"{url_parser.__class__.__name__} parsing {anime.raw_url}") for _ in range(3): # 3 次解析机会, 再不行就真的不行了 url = await url_parser._parse(anime.raw_url) if url.is_available(): return url logger.warning(f"Parse real url failed, retry...") logger.warning( f"Parse real url failed 3 times, maybe this resource is not available" ) return AnimeInfo()
def get_anime_detail(self, meta: AnimeMetaInfo) -> AnimeDetailInfo: """解析一部番剧的详情页,返回包含视频列表的详细信息""" if not meta: logger.error(f"Invalid request") return AnimeDetailInfo() target_engine = self._engines.get(meta.engine) if target_engine is not None: return target_engine()._get_detail(meta.detail_page_url) # 如果引擎没加载, 临时加载一次 logger.info(f"Engine not found: {meta.engine}, it will be loaded temporarily.") self._load_engine(meta.engine) target_engine = self._engines.pop(meta.engine) logger.info(f"Unloading engine: {target_engine}") return target_engine()._get_detail(meta.detail_page_url)
async def detect_more_info(self): await self.init_session() logger.info("Detect information of video...") self._lifetime = await self._detect_lifetime() for _ in range(3): resp = await self.get(self._url, allow_redirects=True) if not resp or resp.status != 200: continue self._format = self._detect_format(resp.content_type) self._size = resp.content_length chunk = await resp.content.read(512) self._resolution = self._detect_resolution(chunk) break await self.close_session()
def get_category_feed(self, category_name:str) -> None: if category_name not in self.available_categories: return url = self.get_url(category_name) logger.info(f"Request feed from {url}") result = feedparser.parse(url) if hasattr(result, "status"): if result.status // 400 > 0: raise requests.HTTPError(result.status) else: logger.warn(result) with Lock(): self._results[category_name] = result
def _load_engine(self, engine: str): """按照配置加载引擎和对应的 VideoHandler @engine: api.engine.xxx """ module = import_module(engine) for cls_name, cls in getmembers(module, isclass): if issubclass(cls, VideoHandler): if cls_name not in self._handlers: self._handlers[cls_name] = cls # 'xxHandler': <class 'api.engines.xx.xxHandler'> logger.info(f"Loading VideoHandler: {cls_name}: {cls}") if issubclass(cls, BaseEngine) and cls != BaseEngine: engine_name = cls.__module__ if engine_name not in self._engines: self._engines[engine_name] = cls # 'api.engines.xx': <class 'api.engines.xx.xxEngine'> logger.info(f"Loading Engine: {cls.__name__}: {cls}")