def simple_script(url: str, rule: Optional[Union[str, int]], script_task: ScriptTask, *, prevent: bool = False, **kwargs): if rule is None: rule = script_task.config.get('default_rule') qn = script_task.quality_ranking quality = qn[max(0, round((100 - int(rule)) * len(qn) / 100) - 1)] # 请求来源脚本请求 ctx.upload( url=url, name=script_task.name, script={ 'name': script_task.name, 'config': script_task.config, 'version': script_task.version, 'quality_ranking': script_task.quality_ranking }, config=script_task.config, rule=rule, quality=quality, ) # 创建并运行脚本 script_task(url=url, quality=quality, prevent=prevent, **kwargs).run() return dict(ctx.iterdata())
def _apply(self, *args, **kwargs): cli = get_client('script') result = cli.remote_apply(funcid=self.callerid, args=args, kwargs=kwargs) # 同步上下文 ctx.upload(**(result['context'] or {})) return result['ret']
def run(self): splitresult = urlsplit(self.url) scheme, netloc, path, query, fragment = splitresult bvid = path.strip('/').split('/')[-1] self.view_detail(bvid) html_res = self.request_get(self.url, headers=dict(HEADERS)) # 汇报处理情况。 if html_res.status_code != 200: ctx.error('%s %s: %s' % (html_res.status_code, html_res.reason, html_res.url)) else: ctx.success('%s %s: %s' % (html_res.status_code, html_res.reason, html_res.url)) # 上传标题 html_parse = bs4.BeautifulSoup(html_res.text, features='html.parser') ctx.upload(title=html_parse.find('h1').text) playinfo = self.parse_playinfo(html_res) or {} initial_state = self.parse_initial_state(html_res) or {} # if not initial_state: raise ValueError('initial_state参数无法正确解析。') if initial_state.get('videoData'): aid = initial_state['videoData']['aid'] cid = initial_state['videoData']['cid'] elif initial_state.get('epInfo'): aid = initial_state['epInfo']['aid'] cid = initial_state['epInfo']['cid'] else: raise ValueError('参数aid, cid未找到。') # 是否分页 videos_p = initial_state['videoData']['videos'] if videos_p > 1: # 获取分p列表 pagelist_res = self.api_pagelist(aid) page_cids = [d['cid'] for d in pagelist_res['data']] else: page_cids = [cid] request_params = { 'avid': aid, 'cid': cid, 'qn': self.quality, 'session': playinfo.get('session', '') } results = [] for cid in page_cids: request_params['cid'] = cid # api: playurl result = self.api_playurl(request_params) results.append(optional(result)) ctx.upload(items=results)
async def start_task(url: str, rule: Union[int, str] = None, **options): """ 创建任务的起点: """ async def _worker(index, layer): """ 执行流程。""" ctxmgr_value = { a: index, script['key']: f'{id(script_req):x}', script['config']: script_req.getdata('config', {}), script['basecnf']: dict(get_conf('script')['base']), # 方便获取脚本数据 script['__getitem__']: script_req.__getitem__, } with ExitStack() as stack: for ctxmgr, value in ctxmgr_value.items(): stack.enter_context(ctxmgr.apply(value)) stack.enter_context(layer) async with sema: return await layer.run() async def _stop(): return await asyncio.wait( [layer.stop() for layer in [scriptlay] + subscripts]) script_req = script_request( url=url, rule=rule, prevent=False, ) scriptlay = ScriptLayer(script_req) with a.apply(0): subscripts = await scriptlay.execute_script() ctx.upload(title=script_req.getdata('title'), url=script_req.getdata('url'), name=script_req.getdata('name'), roots=[scriptlay.script] + [s.script for s in subscripts], root_layers=[scriptlay] + subscripts) max_workers = 3 sema = asyncio.Semaphore(max_workers) tasks = [ asyncio.create_task(_worker(i, s)) for i, s in enumerate([scriptlay] + subscripts) ] # 使用当前任务协程事件循环来停止任务 ctx.add_stopper(_stop) return await asyncio.wait(tasks)
def run(self): """__NEPTUNE_IS_MY_WAIFU__""" splitresult = urlsplit(self.url) scheme, netloc, path, query, fragment = splitresult room_id = path.strip('/') if not room_id.isnumeric(): raise TypeError(f'url输入不正确,得到room_id为:{room_id}') # 获取直播间信息 self.live_room(room_id) # 直播持久化取流 ctx.upload( item=live_daemon(export_func(lambda: self.get_live(room_id))))
def live_room(self, room_id): """ 直播间信息。""" api = 'https://api.live.bilibili.com/xlive/web-room/v1/index/getInfoByRoom' params = { 'room_id': room_id, } headers = dict(HEADERS) headers.update({'Referer': self.url}) resp = self.request_get(api, params=params, headers=headers) resp_json = resp.json() data = resp_json['data'] room_info = data['room_info'] anchor_info = data['anchor_info'] ctx.upload(title=room_info['title'], uid=room_info['uid'], live_start_time=room_info['live_start_time'], area_name=room_info['area_name'], upname=anchor_info['base_info']['uname'])
def script_request(url: str, rule: Optional[Union[str, int]] = None, *, prevent: bool = False, **kwargs): """ Args: url: 目标URL rule: 选择规则 prevent: 是否允许子脚本请求 """ cli = get_client('script') result = cli.exec_script(url=url, rule=rule) ctx.upload(**result) title = safety_filename(ctx.getdata('title', '')) srp = ctx.getdata('script', {}) # 创建临时目录 tempdir = os.path.realpath( os.path.join( ctx.glb.config['tempdir'], srp['name'], title, )) items = ctx.getdata('items', []) if not items: item = ctx.getdata('item', None) if item is not None: items = [item] else: raise ValueError('没有上传有效的处理流程。') ctx.upload( items=items, title=title, tempdir=tempdir, n=len(items), ) return items
def fake_script(request_items: List[Requester], rule: str or int, **options): """ (调试模式) 调试模式下的虚假脚本请求Root。""" from app.script.manager import ScriptTask from app.script import ScriptBaseClass url = 'http://fake.script' script = ScriptTask(ScriptBaseClass)('') ctx.upload( url=url, name=script.name, script=script, rule=rule, quality=100, title=f'debug_{time.time() * 1000}', tempdir=ctx.glb.config['tempdir'], n=1, config=script.config, ) ctx.upload(**options) ctx.upload(items=[request_items]) return ctx.getdata('items')
def get_live(self, room_id): """ api: https://api.live.bilibili.com/xlive/web-room/v2/index/getRoomPlayInfo? Params: room_id=910819 &protocol=0%2C1 &format=0%2C2 &codec=0 &qn=10000 &platform=web &ptype=16 """ api = 'https://api.live.bilibili.com/xlive/web-room/v2/index/getRoomPlayInfo' params = { 'room_id': room_id, 'protocol': '0,1,2', # 'format': '0,1,2', 'format': '0,1', 'codec': '0', 'qn': '10000', 'platform': 'web', 'ptype': '16' } headers = dict(HEADERS) headers.update({'Referer': self.url}) resp = self.request_get(api, params=params, headers=headers) resp_json = resp.json() data = resp_json['data'] ctx.upload( room_id=room_id, live_status=data['live_status'], live_time=data['live_time'], ) self.live_room(room_id) # 直播状态 if data['live_status'] == 0: # 未开播 raise ValueError('直播未开。') elif data['live_status'] == 1: # 已开播 pass playurl_info = data['playurl_info'] playurl = playurl_info['playurl'] # streams = playurl['stream'] options = [] for stream in playurl['stream']: for format in stream['format']: format_name = format['format_name'] for codec in format['codec']: current_qn = codec['current_qn'] qn_desc = [ qn['desc'] for qn in playurl['g_qn_desc'] if qn['qn'] == current_qn ][0] desc = { 'format': format_name, 'quality': qn_desc, 'qn': current_qn, } uris = [] for url_info in codec['url_info']: urlpath = codec['base_url'] + url_info['extra'] url = urljoin(url_info['host'], urlpath) uris.append(url) options.append( option(stream_download(uris.pop(), headers=headers), descriptions=desc)) return optional(options)
async def download(uri: str = None, headers: Dict = None, *, multi_sources: List[Dict] = None, **kwargs): """ 下载请求 Args: uri: 下载uri headers: 指定下载请求头 multi_sources: 多下载源的添加方式。 [{'uri': 'http://xxx', 'headers': headers}, ...] """ def speed(): nonlocal dl transfer_rate = dl.transfer_rate() return f'{readable_file_size(transfer_rate)}/s' # unitdict = { # 'GB/s': 1024 * 1024 * 1024, # 'MB/s': 1024 * 1024, # 'KB/s': 1024, # 'B/s': 1, # } # for k, v in unitdict.items(): # if transfer_rate > v: # return f'{round(transfer_rate / v, 2)} {k}' # return f'{round(transfer_rate / v, 2)} B/s' # 创建下载请求对象 tempf = ctx.tempdir.mktemp() dlr = DlRequest(file_path=tempf.filepath) sources = [] if uri: sources = [{'uri': uri, 'headers': headers, **kwargs}] sources += multi_sources or [] for source in sources: dlr.put(**source) async with dlopen(dlr) as dl: ctx.upload( filesize=dl.file.size, dstpath=dl.file.pathname, downloadSize=lambda: readable_file_size(dl.walk_length()), writeSize=lambda: readable_file_size(dl.done_length()), ) ctx.set_percent(dl.percent_complete) ctx.set_timeleft(dl.remaining_time) ctx.set_speed(speed) dl.start(loop=asyncio.get_running_loop()) # FIX: Nbdler 下载器在协程下出现的问题 while not dl._future: await asyncio.sleep(0.01) # 创建下载停止器 ctx.add_stopper(dl.pause) async for exception in dl.aexceptions(): ctx.warning(exception.exc_info) if isinstance(exception, HandlerError): await dl.apause() break else: exception = None await dl.ajoin() if exception: # 若发生异常,抛出异常 raise exception from exception.exception # 更新文件信息 ctx.upload( dstpath=dl.file.pathname, filesize=dl.file.size, downloadSize=lambda: readable_file_size(dl.walk_length()), writeSize=lambda: readable_file_size(dl.done_length()), )
async def stream_download(uri: str = None, headers: Dict = None, buffsize: float = 1024 * 1024, timeout: float = None, **kwargs): """ 文件流下载,通常用于下载具有实时性的数据。 """ def stop(): nonlocal stop_flag stop_flag = True stop_event.wait() def size(): nonlocal sizecnt return sizecnt def speed(): nonlocal avgspeed return f'{readable_file_size(avgspeed)}/s' # unitdict = { # 'GB/s': 1024 * 1024 * 1024, # 'MB/s': 1024 * 1024, # 'KB/s': 1024, # 'B/s': 1, # } # for k, v in unitdict.items(): # if avgspeed > v: # return f'{round(avgspeed / v, 2)} {k}' # return f'{avgspeed} B/s' def percent(): nonlocal total_size return sizecnt / total_size maxsize = ctx.script.config['maxsize'] stop_event = threading.Event() stop_flag = False ctx.add_stopper(stop) try: tempf = ctx.tempdir.mktemp() async with aiohttp.ClientSession() as sess: resp = await sess.get(url=uri, headers=headers, **kwargs) if resp.status not in (200, 206): raise ConnectionAbortedError() chunksize = 1024 * 4 sizecnt = 0 avgspeed = 0 donesize = 0 starttime = time.time() if resp.content_length is None: # 不确定的进度 ctx.set_percent(None) else: total_size = resp.content_length ctx.set_percent(percent) buffcnt = 0 buff_lst = [] ctx.set_speed(speed) ctx.upload( filesize=size, dstpath=tempf.filepath, downloadSize=lambda: readable_file_size(sizecnt), writeSize=lambda: readable_file_size(donesize), ) with tempf('wb') as f: try: async for chunk in resp.content.iter_chunked(chunksize): # 已下载文件大小 chunklen = len(chunk) sizecnt += chunklen buffcnt += chunklen buff_lst.append(chunk) # 缓冲溢出后写入文件 if buffcnt >= buffsize: f.writelines(buff_lst) donesize = sum([len(buff) for buff in buff_lst], donesize) buffcnt = 0 buff_lst = [] # 计算平均下载速度 avgspeed = sizecnt / ( (time.time() - starttime) or float('inf')) if stop_flag: stop_event.set() break # 切割视频 if maxsize <= sizecnt: raise Warning() finally: if buff_lst: f.writelines(buff_lst) finally: stop_flag = True stop_event.set()
async def ffmpeg(inputs: Union[List[str], str], cmd_operator: str, cal_len, **kwargs): """ ffmpeg 数据流处理引擎。""" def get_input_filepath(inp) -> str: if isinstance(inp, str): return inp elif isinstance(inp, (Requester, Optional, Option)): return inp.getdata('dstpath') assert inp def percent(): nonlocal time_length, f return f.complete_length() * 100 / (time_length or float('inf')) time_length = ctx.glb.script['length'] or float('inf') temp = ctx.tempdir.mktemp(ctx.glb.config['to_format'][0]) inputs = inputs if not isinstance(inputs, (list, tuple, set)): inputs = [inputs] # 通过命令操作符名称获取被修饰的函数进行生成ffmpeg命令 cmd = await getattr(ffmpeg, cmd_operator).__wrapped__( inputs=[get_input_filepath(input) for input in inputs], output=temp.filepath, **kwargs) if cal_len and time_length in (float('inf'), None): # 总长度计算 time_length = await cal_total_length(inputs) ctx.upload(length=time_length) source = os.path.join(ctx.config['source'], ctx.config['name']) if isinstance(cmd, (list, tuple)): cmd = [source] + list(cmd) cmd = list2cmdline(cmd) else: cmd = f'{source} ' + cmd if ctx.config['overwrite']: cmd += ' -y' print(cmd) process = await asyncio.create_subprocess_shell( cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) f = FfmpegStreamHandler(process) ctx.set_speed(f.speed) ctx.set_percent(percent) ctx.upload( cmd=cmd, dstpath=temp.filepath, input=f.get_inputs, output=f.get_outputs, ) ctx.add_stopper(f.stop_threadsafe) await f.run(timeout=ctx.config.get('timeout', None), close_stdin=False)