예제 #1
0
파일: raven.py 프로젝트: aiokitchen/aiomisc
class QueuedKeepaliveAioHttpTransport(
        QueuedPatchedAioHttpTransport, ):
    DNS_CACHE_TTL = 600
    DNS_CACHE = True
    TCP_CONNECTION_LIMIT = 32
    TCP_CONNECTION_LIMIT_HOST = 8
    WORKERS = 1
    QUEUE_SIZE = 1000

    def __init__(self,
                 *args: Any,
                 family: int = socket.AF_UNSPEC,
                 loop: asyncio.AbstractEventLoop = None,
                 dns_cache: bool = DNS_CACHE,
                 dns_cache_ttl: int = DNS_CACHE_TTL,
                 connection_limit: int = TCP_CONNECTION_LIMIT,
                 connection_limit_host: int = TCP_CONNECTION_LIMIT_HOST,
                 workers: int = WORKERS,
                 qsize: int = QUEUE_SIZE,
                 **kwargs: Any):
        self.connection_limit = connection_limit
        self.connection_limit_host = connection_limit_host
        self.dns_cache = dns_cache
        self.dns_cache_ttl = dns_cache_ttl

        super().__init__(*args,
                         family=family,
                         loop=loop,
                         keepalive=True,
                         workers=workers,
                         qsize=qsize,
                         **kwargs)

    def _client_session_factory(self) -> ClientSession:
        self.connector = TCPConnector(
            family=self.family,
            limit=self.connection_limit,
            limit_per_host=self.connection_limit_host,
            ttl_dns_cache=self.dns_cache_ttl,
            use_dns_cache=self.dns_cache,
            verify_ssl=self.verify_ssl,
        )

        return ClientSession(
            connector=self.connector,
            connector_owner=False,
        )

    async def _close(self) -> Transport:
        transport = await super()._close()
        if inspect.iscoroutinefunction(self.connector.close()):
            await self.connection.close()
        else:
            self.connector.close()
        return transport
예제 #2
0
class DownloadDispatcher:
    def __init__(self, config):
        self.config = config
        self.pageAdviser = parsers.PageAdviser(config)
        self._loop = asyncio.get_event_loop()
        self.pages = []
        self.pic_page_urls = []
        self.conn = TCPConnector(ssl=False, limit=10, use_dns_cache=True)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        self._loop.run_until_complete(self.conn.close())
        self._loop.close()

    def dispatch(self):
        self.pages = self.pageAdviser.get_pages()
        print("%d images are expected to be downloaded." %
              self.pageAdviser.get_pic_number())
        print("parsing list page, %d list pages found" % len(self.pages))
        work_limit = min(len(self.pages), self.config.maxr)
        self._start_tasks(work_limit, self._handle_list_page())
        self._check_pic_number()
        work_limit = min(len(self.pic_page_urls), self.config.maxr)
        print("started to download images")
        self._start_tasks(work_limit, self._handle_pic_page())

    def _start_tasks(self, work_limit, cor_gen):
        tasks = []
        for i in range(work_limit):
            tasks.append(cor_gen)
        self._loop.run_until_complete(asyncio.wait(tasks))

    def _check_pic_number(self):
        print("parsing list page finished. %d pic urls found" %
              len(self.pic_page_urls))
        missed_number = self.pageAdviser.get_pic_number() - len(
            self.pic_page_urls)
        if missed_number > 0:
            print("there is(are) %d image(s) can't be found." % missed_number)

    async def _handle_list_page(self):
        while len(self.pages) > 0:
            page = self.pages.pop()
            content = await self._get_request_text(
                "%s%s&page=%d" % (_URL_ROOT, _LIST_PAGE, page))
            parser = parsers.ListPageParser()
            parsed_urls = parser.parse_img_urls(content)
            parser.close()
            for date_str, url in parsed_urls:
                if self.pageAdviser.is_required_img(date_str):
                    self.pic_page_urls.append((date_str, url))
            await asyncio.sleep(0.1)

    async def _handle_pic_page(self):
        while len(self.pic_page_urls) > 0:
            img_name, url = self.pic_page_urls.pop()
            content = await self._get_request_text(_URL_ROOT + url)
            parser = parsers.ImgViewPageParser()
            src = parser.get_img_src(content)
            parser.close()
            if src:
                await self._download_img(src, img_name)
            await asyncio.sleep(0.1)

    async def _download_img(self, src, img_name):
        file_path = "%s%s.png" % (self.config.directory, img_name)
        content = None
        async with client.request("GET", src, connector=self.conn) as response:
            content = await response.read()
        if content:
            with open(file_path, 'wb') as fd:
                fd.write(content)
        print(" :=> downloaded wallpaper of date: %s" % img_name)

    async def _get_request_text(self, url):
        async with client.request("GET", url, connector=self.conn) as response:
            text = await response.text()
            return text
예제 #3
0
파일: replay.py 프로젝트: marin-m/peristop
def downloadReplay(bcst, info):
    resp = call('getAccessPublic', {'broadcast_id': bcst})
    
    # 1. Download replay info
    
    if 'cookies' not in resp:
        print('==> "%s" (@%s) has been deleted!' % (info['status'], info['username']))
        return
    cookies = {i['Name']: i['Value'] for i in resp['cookies']}
    baseUrl = '/'.join(resp['replay_url'].split('/')[:-1]) + '/chunk_%d.ts'
    
    m3u8 = get(resp['replay_url'], cookies=cookies).text
    nbChunks = int(m3u8.split('chunk_')[-1].split('.')[0]) + 1
    
    # 2. Download parallelly TS chunks and save into mp4
    
    set_event_loop(new_event_loop())
    
    connector = TCPConnector(conn_timeout=10, limit=6)
    resps = [None] * nbChunks
    with ClientSession(connector=connector, cookies=cookies, headers=headers2) as client:
        get_event_loop().run_until_complete(wait([replayCoro(baseUrl % i, resps, i, client) for i in range(nbChunks)]))
    connector.close()
    
    if None in resps:
        print('==> "%s" (@%s) has been deleted!' % (info['status'], info['username']))
        return
    
    with NamedTemporaryFile(suffix='.ts') as tmp:
        for chunk in resps:
            tmp.write(chunk)
        
        tmp.flush()
        run(['ffmpeg', '-y', '-v', 'fatal', '-i', tmp.name, '-bsf:a', 'aac_adtstoasc', '-c', 'copy', 'storage/live/' + bcst + '.mp4'], check=True)
    
    del resps
    
    Popen(['ffmpeg', '-y', '-v', '-8', '-i', 'storage/live/' + bcst + '.mp4', '-vframes', '1', '-ss', '5', '-vf', 'crop=in_w:1/PHI*in_w, scale=-1:65', 'storage/thumb/' + bcst + '.jpg'])
    postProcessChat(bcst)
    
    # 4. Download chat info (other thread?)
    
    cursor = None
    retries = 0
    chat = ChatEngine(bcst, info)
    
    while cursor != '':
        hist = post(resp['endpoint'] + '/chatapi/v1/history', json={
            'access_token': resp['access_token'],
            'cursor': cursor,
            'duration': 9999999,
            'since': 0
        })
        hist.encoding = 'utf-8'
        
        if hist.text.strip() == 'list room events in progress' and retries < 20:
            sleep(5)
            retries += 1
            continue
        retries = 0
        
        try:
            hist = loads(hist.text)
        except JSONDecodeError:
            try:
                hist = loads(sub(r'([\u007f-\uffff])\\*("\}?,)(\\+)', r'\1\3\2\3', hist.text))
            except JSONDecodeError:
                print('=> Retrieval of "%s" (@%s) failed with: %s (%d)' % (info['status'], info['username'], repr(hist.text), hist.status_code))
                return
        
        for msg in hist['messages']:
            chat.parse(msg=msg)
        
        cursor = hist['cursor']
    
    chat.save()
    print('=> Ended up downloading: "%s" (@%s)' % (info['status'], info['username']))
예제 #4
0
class MarkdownFormatter:
    def __init__(self, base_url, redis=None):
        self.base_url = base_url
        self.matcher = {}
        self.re = re.compile(r"(\(https?://\S+\))")
        self.connector = TCPConnector()
        self.redis: Redis = redis

    def __del__(self):
        self.connector.close()

    @cache
    async def get_shorten_link(self, url) -> Tuple[str, str]:
        if '@' in url:
            return url, url

        res = await self.get_click(url)

        # Забанили урл
        if len(res) > 30:
            try:
                res = await self.get_isgd(url)
                return url, res
            except ClientResponseError:
                return url, url

        return url, res

    async def get_click(self, url):
        try:
            chatbase = quote(get_chatbase_url(url), encoding="ascii")
            req_url = (f'https://clck.ru/--?url={chatbase}')
        except UnicodeEncodeError:
            log.exception('Failed to encode url %r', url)
            req_url = url
        async with ClientSession(raise_for_status=True,
                                 connector=self.connector,
                                 connector_owner=False) as session:
            async with session.get(req_url) as r:  # type: ClientResponse
                return await r.text()

    async def get_isgd(self, url):
        try:
            chatbase = quote(get_chatbase_url(url), encoding="ascii")
            req_url = f'https://is.gd/create.php?format=simple&url={chatbase}'
        except UnicodeEncodeError:
            log.exception('Failed to encode url %r', url)
            req_url = url
        async with ClientSession(raise_for_status=True,
                                 connector=self.connector,
                                 connector_owner=False) as session:
            async with session.get(req_url) as r:  # type: ClientResponse
                return await r.text()

    def collect_matches(self, match: Match[str]):
        self.matcher[match.group(0)[1:-1]] = ""

    async def convert_links(self, markdown: str) -> str:
        self.matcher = {}
        # noinspection PyTypeChecker
        self.re.sub(partial(self.collect_matches), markdown)
        tasks = [self.get_shorten_link(url) for url in self.matcher.keys()]
        res = await gather(*tasks)
        return reduce(lambda md, b: md.replace(f'({b[0]})', f'({b[1]})'), res,
                      markdown)

    async def parse_markdown(self, html, width=269, max_length=4096) -> str:
        h = html2text.HTML2Text(baseurl=self.base_url, bodywidth=width)
        # Небольшие изощрения с li
        html_to_parse = str(html).replace('<li',
                                          '<div').replace("</li>", "</div>")
        if '[' in html_to_parse and ']' in html_to_parse:
            html_to_parse = html_to_parse.replace('[', '{').replace(']', '}')

        html_to_parse.replace('lite.mfd.ru', 'forum.mfd.ru')

        html_to_parse = utils.transform_emoji(html_to_parse)
        md = await self.convert_links(h.handle(html_to_parse).strip())
        if len(md) > max_length:
            md = f'{md[:max_length - 60]}...'
        return md