Python upload 예제들, helper.ctxtools.ctx.upload Python 예제들

예제 #1

0

파일 보기

파일: script.py 프로젝트: zerosetall/VideoCrawlerEngine

def simple_script(url: str,
                  rule: Optional[Union[str, int]],
                  script_task: ScriptTask,
                  *,
                  prevent: bool = False,
                  **kwargs):
    if rule is None:
        rule = script_task.config.get('default_rule')
    qn = script_task.quality_ranking
    quality = qn[max(0, round((100 - int(rule)) * len(qn) / 100) - 1)]

    # 请求来源脚本请求
    ctx.upload(
        url=url,
        name=script_task.name,
        script={
            'name': script_task.name,
            'config': script_task.config,
            'version': script_task.version,
            'quality_ranking': script_task.quality_ranking
        },
        config=script_task.config,
        rule=rule,
        quality=quality,
    )

    # 创建并运行脚本
    script_task(url=url, quality=quality, prevent=prevent, **kwargs).run()

    return dict(ctx.iterdata())

예제 #2

0

파일 보기

 def _apply(self, *args, **kwargs):
     cli = get_client('script')
     result = cli.remote_apply(funcid=self.callerid,
                               args=args,
                               kwargs=kwargs)
     # 同步上下文
     ctx.upload(**(result['context'] or {}))
     return result['ret']

예제 #3

0

파일 보기

파일: bilibili.py 프로젝트: zerosetall/VideoCrawlerEngine

    def run(self):
        splitresult = urlsplit(self.url)
        scheme, netloc, path, query, fragment = splitresult
        bvid = path.strip('/').split('/')[-1]
        self.view_detail(bvid)

        html_res = self.request_get(self.url, headers=dict(HEADERS))
        # 汇报处理情况。
        if html_res.status_code != 200:
            ctx.error('%s %s: %s' %
                      (html_res.status_code, html_res.reason, html_res.url))
        else:
            ctx.success('%s %s: %s' %
                        (html_res.status_code, html_res.reason, html_res.url))

        # 上传标题
        html_parse = bs4.BeautifulSoup(html_res.text, features='html.parser')
        ctx.upload(title=html_parse.find('h1').text)

        playinfo = self.parse_playinfo(html_res) or {}
        initial_state = self.parse_initial_state(html_res) or {}

        #
        if not initial_state:
            raise ValueError('initial_state参数无法正确解析。')
        if initial_state.get('videoData'):
            aid = initial_state['videoData']['aid']
            cid = initial_state['videoData']['cid']
        elif initial_state.get('epInfo'):
            aid = initial_state['epInfo']['aid']
            cid = initial_state['epInfo']['cid']
        else:
            raise ValueError('参数aid, cid未找到。')

        # 是否分页
        videos_p = initial_state['videoData']['videos']
        if videos_p > 1:
            # 获取分p列表
            pagelist_res = self.api_pagelist(aid)
            page_cids = [d['cid'] for d in pagelist_res['data']]
        else:
            page_cids = [cid]

        request_params = {
            'avid': aid,
            'cid': cid,
            'qn': self.quality,
            'session': playinfo.get('session', '')
        }

        results = []
        for cid in page_cids:
            request_params['cid'] = cid
            # api: playurl
            result = self.api_playurl(request_params)
            results.append(optional(result))

        ctx.upload(items=results)

예제 #4

0

파일 보기

async def start_task(url: str, rule: Union[int, str] = None, **options):
    """ 创建任务的起点：

    """
    async def _worker(index, layer):
        """ 执行流程。"""
        ctxmgr_value = {
            a: index,
            script['key']: f'{id(script_req):x}',
            script['config']: script_req.getdata('config', {}),
            script['basecnf']: dict(get_conf('script')['base']),
            # 方便获取脚本数据
            script['__getitem__']: script_req.__getitem__,
        }
        with ExitStack() as stack:
            for ctxmgr, value in ctxmgr_value.items():
                stack.enter_context(ctxmgr.apply(value))

            stack.enter_context(layer)
            async with sema:
                return await layer.run()

    async def _stop():
        return await asyncio.wait(
            [layer.stop() for layer in [scriptlay] + subscripts])

    script_req = script_request(
        url=url,
        rule=rule,
        prevent=False,
    )

    scriptlay = ScriptLayer(script_req)

    with a.apply(0):
        subscripts = await scriptlay.execute_script()

    ctx.upload(title=script_req.getdata('title'),
               url=script_req.getdata('url'),
               name=script_req.getdata('name'),
               roots=[scriptlay.script] + [s.script for s in subscripts],
               root_layers=[scriptlay] + subscripts)

    max_workers = 3
    sema = asyncio.Semaphore(max_workers)
    tasks = [
        asyncio.create_task(_worker(i, s))
        for i, s in enumerate([scriptlay] + subscripts)
    ]
    # 使用当前任务协程事件循环来停止任务
    ctx.add_stopper(_stop)
    return await asyncio.wait(tasks)

예제 #5

0

파일 보기

파일: bilibili.py 프로젝트: zerosetall/VideoCrawlerEngine

    def run(self):
        """__NEPTUNE_IS_MY_WAIFU__"""

        splitresult = urlsplit(self.url)
        scheme, netloc, path, query, fragment = splitresult
        room_id = path.strip('/')
        if not room_id.isnumeric():
            raise TypeError(f'url输入不正确，得到room_id为：{room_id}')

        # 获取直播间信息
        self.live_room(room_id)

        # 直播持久化取流
        ctx.upload(
            item=live_daemon(export_func(lambda: self.get_live(room_id))))

예제 #6

0

파일 보기

파일: bilibili.py 프로젝트: zerosetall/VideoCrawlerEngine

    def live_room(self, room_id):
        """ 直播间信息。"""
        api = 'https://api.live.bilibili.com/xlive/web-room/v1/index/getInfoByRoom'
        params = {
            'room_id': room_id,
        }
        headers = dict(HEADERS)
        headers.update({'Referer': self.url})

        resp = self.request_get(api, params=params, headers=headers)
        resp_json = resp.json()
        data = resp_json['data']
        room_info = data['room_info']
        anchor_info = data['anchor_info']
        ctx.upload(title=room_info['title'],
                   uid=room_info['uid'],
                   live_start_time=room_info['live_start_time'],
                   area_name=room_info['area_name'],
                   upname=anchor_info['base_info']['uname'])

예제 #7

0

파일 보기

파일: script.py 프로젝트: zerosetall/VideoCrawlerEngine

def script_request(url: str,
                   rule: Optional[Union[str, int]] = None,
                   *,
                   prevent: bool = False,
                   **kwargs):
    """
    Args:
        url: 目标URL
        rule: 选择规则
        prevent: 是否允许子脚本请求

    """

    cli = get_client('script')
    result = cli.exec_script(url=url, rule=rule)
    ctx.upload(**result)
    title = safety_filename(ctx.getdata('title', ''))

    srp = ctx.getdata('script', {})
    # 创建临时目录
    tempdir = os.path.realpath(
        os.path.join(
            ctx.glb.config['tempdir'],
            srp['name'],
            title,
        ))

    items = ctx.getdata('items', [])
    if not items:
        item = ctx.getdata('item', None)
        if item is not None:
            items = [item]
        else:
            raise ValueError('没有上传有效的处理流程。')
    ctx.upload(
        items=items,
        title=title,
        tempdir=tempdir,
        n=len(items),
    )

    return items

예제 #8

0

파일 보기

파일: script.py 프로젝트: zerosetall/VideoCrawlerEngine

def fake_script(request_items: List[Requester], rule: str or int, **options):
    """ (调试模式) 调试模式下的虚假脚本请求Root。"""
    from app.script.manager import ScriptTask
    from app.script import ScriptBaseClass

    url = 'http://fake.script'
    script = ScriptTask(ScriptBaseClass)('')
    ctx.upload(
        url=url,
        name=script.name,
        script=script,
        rule=rule,
        quality=100,
        title=f'debug_{time.time() * 1000}',
        tempdir=ctx.glb.config['tempdir'],
        n=1,
        config=script.config,
    )
    ctx.upload(**options)
    ctx.upload(items=[request_items])
    return ctx.getdata('items')

예제 #9

0

파일 보기

파일: bilibili.py 프로젝트: zerosetall/VideoCrawlerEngine

    def get_live(self, room_id):
        """
        api: https://api.live.bilibili.com/xlive/web-room/v2/index/getRoomPlayInfo?
        Params:
            room_id=910819
            &protocol=0%2C1
            &format=0%2C2
            &codec=0
            &qn=10000
            &platform=web
            &ptype=16
        """
        api = 'https://api.live.bilibili.com/xlive/web-room/v2/index/getRoomPlayInfo'
        params = {
            'room_id': room_id,
            'protocol': '0,1,2',
            # 'format': '0,1,2',
            'format': '0,1',
            'codec': '0',
            'qn': '10000',
            'platform': 'web',
            'ptype': '16'
        }
        headers = dict(HEADERS)
        headers.update({'Referer': self.url})

        resp = self.request_get(api, params=params, headers=headers)
        resp_json = resp.json()
        data = resp_json['data']
        ctx.upload(
            room_id=room_id,
            live_status=data['live_status'],
            live_time=data['live_time'],
        )

        self.live_room(room_id)

        # 直播状态
        if data['live_status'] == 0:
            # 未开播
            raise ValueError('直播未开。')
        elif data['live_status'] == 1:
            # 已开播
            pass
        playurl_info = data['playurl_info']
        playurl = playurl_info['playurl']
        # streams = playurl['stream']

        options = []
        for stream in playurl['stream']:
            for format in stream['format']:
                format_name = format['format_name']
                for codec in format['codec']:
                    current_qn = codec['current_qn']
                    qn_desc = [
                        qn['desc'] for qn in playurl['g_qn_desc']
                        if qn['qn'] == current_qn
                    ][0]
                    desc = {
                        'format': format_name,
                        'quality': qn_desc,
                        'qn': current_qn,
                    }

                    uris = []
                    for url_info in codec['url_info']:
                        urlpath = codec['base_url'] + url_info['extra']
                        url = urljoin(url_info['host'], urlpath)
                        uris.append(url)

                    options.append(
                        option(stream_download(uris.pop(), headers=headers),
                               descriptions=desc))

        return optional(options)

예제 #10

0

파일 보기

async def download(uri: str = None,
                   headers: Dict = None,
                   *,
                   multi_sources: List[Dict] = None,
                   **kwargs):
    """ 下载请求
    Args:
        uri: 下载uri
        headers: 指定下载请求头
        multi_sources: 多下载源的添加方式。
            [{'uri': 'http://xxx', 'headers': headers}, ...]

    """
    def speed():
        nonlocal dl
        transfer_rate = dl.transfer_rate()
        return f'{readable_file_size(transfer_rate)}/s'
        # unitdict = {
        #     'GB/s': 1024 * 1024 * 1024,
        #     'MB/s': 1024 * 1024,
        #     'KB/s': 1024,
        #     'B/s': 1,
        # }
        # for k, v in unitdict.items():
        #     if transfer_rate > v:
        #         return f'{round(transfer_rate / v, 2)} {k}'
        # return f'{round(transfer_rate / v, 2)} B/s'

    # 创建下载请求对象
    tempf = ctx.tempdir.mktemp()
    dlr = DlRequest(file_path=tempf.filepath)
    sources = []
    if uri:
        sources = [{'uri': uri, 'headers': headers, **kwargs}]
    sources += multi_sources or []

    for source in sources:
        dlr.put(**source)
    async with dlopen(dlr) as dl:
        ctx.upload(
            filesize=dl.file.size,
            dstpath=dl.file.pathname,
            downloadSize=lambda: readable_file_size(dl.walk_length()),
            writeSize=lambda: readable_file_size(dl.done_length()),
        )
        ctx.set_percent(dl.percent_complete)
        ctx.set_timeleft(dl.remaining_time)
        ctx.set_speed(speed)

        dl.start(loop=asyncio.get_running_loop())
        # FIX: Nbdler 下载器在协程下出现的问题
        while not dl._future:
            await asyncio.sleep(0.01)

        # 创建下载停止器
        ctx.add_stopper(dl.pause)

        async for exception in dl.aexceptions():
            ctx.warning(exception.exc_info)
            if isinstance(exception, HandlerError):
                await dl.apause()
                break
        else:
            exception = None
        await dl.ajoin()
    if exception:
        # 若发生异常，抛出异常
        raise exception from exception.exception

    # 更新文件信息
    ctx.upload(
        dstpath=dl.file.pathname,
        filesize=dl.file.size,
        downloadSize=lambda: readable_file_size(dl.walk_length()),
        writeSize=lambda: readable_file_size(dl.done_length()),
    )

예제 #11

0

파일 보기

async def stream_download(uri: str = None,
                          headers: Dict = None,
                          buffsize: float = 1024 * 1024,
                          timeout: float = None,
                          **kwargs):
    """ 文件流下载，通常用于下载具有实时性的数据。 """
    def stop():
        nonlocal stop_flag
        stop_flag = True
        stop_event.wait()

    def size():
        nonlocal sizecnt
        return sizecnt

    def speed():
        nonlocal avgspeed
        return f'{readable_file_size(avgspeed)}/s'
        # unitdict = {
        #     'GB/s': 1024 * 1024 * 1024,
        #     'MB/s': 1024 * 1024,
        #     'KB/s': 1024,
        #     'B/s': 1,
        # }
        # for k, v in unitdict.items():
        #     if avgspeed > v:
        #         return f'{round(avgspeed / v, 2)} {k}'
        # return f'{avgspeed} B/s'

    def percent():
        nonlocal total_size
        return sizecnt / total_size

    maxsize = ctx.script.config['maxsize']
    stop_event = threading.Event()
    stop_flag = False
    ctx.add_stopper(stop)
    try:
        tempf = ctx.tempdir.mktemp()
        async with aiohttp.ClientSession() as sess:
            resp = await sess.get(url=uri, headers=headers, **kwargs)
            if resp.status not in (200, 206):
                raise ConnectionAbortedError()

            chunksize = 1024 * 4
            sizecnt = 0
            avgspeed = 0
            donesize = 0
            starttime = time.time()
            if resp.content_length is None:
                # 不确定的进度
                ctx.set_percent(None)
            else:
                total_size = resp.content_length
                ctx.set_percent(percent)

            buffcnt = 0
            buff_lst = []
            ctx.set_speed(speed)
            ctx.upload(
                filesize=size,
                dstpath=tempf.filepath,
                downloadSize=lambda: readable_file_size(sizecnt),
                writeSize=lambda: readable_file_size(donesize),
            )
            with tempf('wb') as f:
                try:
                    async for chunk in resp.content.iter_chunked(chunksize):
                        # 已下载文件大小
                        chunklen = len(chunk)
                        sizecnt += chunklen
                        buffcnt += chunklen
                        buff_lst.append(chunk)
                        # 缓冲溢出后写入文件
                        if buffcnt >= buffsize:
                            f.writelines(buff_lst)
                            donesize = sum([len(buff) for buff in buff_lst],
                                           donesize)
                            buffcnt = 0
                            buff_lst = []
                        # 计算平均下载速度
                        avgspeed = sizecnt / (
                            (time.time() - starttime) or float('inf'))

                        if stop_flag:
                            stop_event.set()
                            break

                        # 切割视频
                        if maxsize <= sizecnt:
                            raise Warning()
                finally:
                    if buff_lst:
                        f.writelines(buff_lst)
    finally:
        stop_flag = True
        stop_event.set()

예제 #12

0

파일 보기

파일: ffmpeg.py 프로젝트: zerosetall/VideoCrawlerEngine

async def ffmpeg(inputs: Union[List[str], str], cmd_operator: str, cal_len,
                 **kwargs):
    """ ffmpeg 数据流处理引擎。"""
    def get_input_filepath(inp) -> str:
        if isinstance(inp, str):
            return inp
        elif isinstance(inp, (Requester, Optional, Option)):
            return inp.getdata('dstpath')
        assert inp

    def percent():
        nonlocal time_length, f
        return f.complete_length() * 100 / (time_length or float('inf'))

    time_length = ctx.glb.script['length'] or float('inf')
    temp = ctx.tempdir.mktemp(ctx.glb.config['to_format'][0])

    inputs = inputs
    if not isinstance(inputs, (list, tuple, set)):
        inputs = [inputs]

    # 通过命令操作符名称获取被修饰的函数进行生成ffmpeg命令
    cmd = await getattr(ffmpeg, cmd_operator).__wrapped__(
        inputs=[get_input_filepath(input) for input in inputs],
        output=temp.filepath,
        **kwargs)
    if cal_len and time_length in (float('inf'), None):
        # 总长度计算
        time_length = await cal_total_length(inputs)
        ctx.upload(length=time_length)
    source = os.path.join(ctx.config['source'], ctx.config['name'])

    if isinstance(cmd, (list, tuple)):
        cmd = [source] + list(cmd)
        cmd = list2cmdline(cmd)
    else:
        cmd = f'{source} ' + cmd

    if ctx.config['overwrite']:
        cmd += ' -y'
    print(cmd)

    process = await asyncio.create_subprocess_shell(
        cmd,
        stdin=asyncio.subprocess.PIPE,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )

    f = FfmpegStreamHandler(process)
    ctx.set_speed(f.speed)
    ctx.set_percent(percent)

    ctx.upload(
        cmd=cmd,
        dstpath=temp.filepath,
        input=f.get_inputs,
        output=f.get_outputs,
    )
    ctx.add_stopper(f.stop_threadsafe)
    await f.run(timeout=ctx.config.get('timeout', None), close_stdin=False)