Python InvalidURLの例、aiohttp.InvalidURL Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_urlscraper.py プロジェクト: achyutjoshi/cjworkbench

async def mock_async_get(status, text, lag):
    await asyncio.sleep(lag)

    if status == 'Timed out':
        raise asyncio.TimeoutError
    elif status == 'Invalid URL':
        raise aiohttp.InvalidURL()
    elif status == "Can't connect: blah":
        raise aiohttp.client_exceptions.ClientConnectionError('blah')

    return MockResponse(status, text)

コード例 #2

0

ファイルを表示

async def spooled_data_from_url(
    url: str,
    headers: Dict[str, str] = {},
    timeout: aiohttp.ClientTimeout = None,
    *,
    ssl: Optional[ssl.SSLContext] = None,
):
    """
    Download `url` to a tempfile and yield `(bytesio, headers, charset)`.

    `bytesio` is backed by a temporary file: the file at path `bytesio.name`
    will exist within this context.

    Raise aiohttp.ClientError on generic error. Subclasses of note:
    * aiohttp.InvalidURL on invalid URL
    * aiohttp.ClientResponseError when HTTP status is not 200
    * aiohttp.ClientPayloadError when server closes connection prematurely
    * aiohttp.ClientConnectionError (OSError) when connection fails

    Raise asyncio.TimeoutError when `timeout` seconds have expired.
    """

    # aiohttp internally performs URL canonization before sending
    # request. DISABLE THIS: it breaks oauth and user's expectations.
    #
    # https://github.com/aio-libs/aiohttp/issues/3424
    url = yarl.URL(url, encoded=True)  # prevent magic
    if url.scheme not in ("http", "https"):
        raise aiohttp.InvalidURL("URL must start with http:// or https://")

    with tempfile_context(prefix="loadurl") as spool_path:
        async with aiohttp.ClientSession() as session:
            # raise aiohttp.ClientError, asyncio.TimeoutError
            async with session.get(url,
                                   headers=headers,
                                   timeout=timeout,
                                   ssl=ssl) as response:
                # raise aiohttp.ClientResponseError
                response.raise_for_status()
                headers = response.headers
                charset = response.charset

                with spool_path.open("wb") as spool:
                    # raise aiohttp.ClientPayloadError
                    async for blob in response.content.iter_chunked(
                            _ChunkSize):
                        spool.write(blob)

        yield spool_path.open("rb"), headers, charset

コード例 #3

0

ファイルを表示

ファイル: agent.py プロジェクト: mdheller/rasa

async def _update_model_from_server(model_server: EndpointConfig,
                                    agent: "Agent") -> None:
    """Load a zipped Rasa Core model from a URL and update the passed agent."""

    if not is_url(model_server.url):
        raise aiohttp.InvalidURL(model_server.url)

    model_directory_and_fingerprint = await _pull_model_and_fingerprint(
        model_server, agent.fingerprint)
    if model_directory_and_fingerprint:
        model_directory, new_model_fingerprint = model_directory_and_fingerprint
        _load_and_set_updated_model(agent, model_directory,
                                    new_model_fingerprint)
    else:
        logger.debug(f"No new model found at URL {model_server.url}")

コード例 #4

0

ファイルを表示

ファイル: agent.py プロジェクト: lsx0930/rasa_usage

async def _update_model_from_server(model_server: EndpointConfig,
                                    agent: 'Agent') -> None:
    """Load a zipped Rasa Core model from a URL and update the passed agent."""

    if not is_url(model_server.url):
        raise aiohttp.InvalidURL(model_server.url)

    model_directory = tempfile.mkdtemp()

    new_model_fingerprint = await _pull_model_and_fingerprint(
        model_server, model_directory, agent.fingerprint)
    if new_model_fingerprint:
        _load_and_set_updated_model(agent, model_directory,
                                    new_model_fingerprint)
    else:
        logger.debug("No new model found at "
                     "URL {}".format(model_server.url))

コード例 #5

0

ファイルを表示

ファイル: insta_scraper.py プロジェクト: askhat-arslanov/insta-parser

async def get_next_page(session, csrf_token, insta_gis, query_hash, variables):
    """ Returns edges with media info and cursor for further query
    """
    cookies = {'csrftoken': csrf_token}
    # Add cookies to given session (aiohttp.ClientSession)
    session.cookie_jar.update_cookies(cookies)
    # And make headers
    headers = prepare_headers(insta_gis)
    # Create url
    url = INSTAGRAM_URL + NEXT_PAGE_URL.format(query_hash=query_hash,
                                               variables=variables)
    response = await session.get(url, headers=headers)
    if response.status == 200:
        json_obj = await response.json()
        data = json_obj['data']['user']['edge_owner_to_timeline_media']
        end_cursor = data['page_info']['end_cursor']
        next_edges = data['edges']
        return next_edges, end_cursor
    elif response.status == 404:
        raise aiohttp.InvalidURL(url=url)

コード例 #6

0

ファイルを表示

        async def session_get(url, *, timeout=None):
            # Silly mock HTTP GET computes the test's input based on its
            # expected output. This defeats the purpose of a test.
            row = results[results['url'] == url]
            if row.empty:
                raise ValueError('called with URL we did not expect')
            index = row.index[0]
            delay = response_times[index]
            await asyncio.sleep(delay)

            status = row.at[index, 'status']
            text = row.at[index, 'html']

            if status == 'Timed out':
                raise asyncio.TimeoutError
            elif status == 'Invalid URL':
                raise aiohttp.InvalidURL(url)
            elif status == "Can't connect: blah":
                raise aiohttp.client_exceptions.ClientConnectionError('blah')
            else:
                return MockResponse(int(status), text)

コード例 #7

0

ファイルを表示

ファイル: agent.py プロジェクト: zoovu/rasa

async def _update_model_from_server(model_server: EndpointConfig, agent: Agent) -> None:
    """Load a zipped Rasa Core model from a URL and update the passed agent."""
    if not is_url(model_server.url):
        raise aiohttp.InvalidURL(model_server.url)

    with tempfile.TemporaryDirectory() as temporary_directory:
        try:
            new_fingerprint = await _pull_model_and_fingerprint(
                model_server, agent.fingerprint, temporary_directory
            )

            if new_fingerprint:
                _load_and_set_updated_model(agent, temporary_directory, new_fingerprint)
            else:
                logger.debug(f"No new model found at URL {model_server.url}")
        except Exception:  # skipcq: PYL-W0703
            # TODO: Make this exception more specific, possibly print different log
            # for each one.
            logger.exception(
                "Failed to update model. The previous model will stay loaded instead."
            )

コード例 #8

0

ファイルを表示

        async def session_get(url, *, timeout=None):
            url = str(url)  # undo yarl un-magick-ing

            # Silly mock HTTP GET computes the test's input based on its
            # expected output. This defeats the purpose of a test.
            row = results[results["url"] == url]
            if row.empty:
                raise ValueError("called with URL we did not expect")
            index = row.index[0]
            delay = response_times[index]
            await asyncio.sleep(delay)

            status = row.at[index, "status"]
            text = row.at[index, "html"]

            if status == "Timed out":
                raise asyncio.TimeoutError
            elif status == "Invalid URL":
                raise aiohttp.InvalidURL(url)
            elif status == "Can't connect: blah":
                raise aiohttp.client_exceptions.ClientConnectionError("blah")
            else:
                return MockResponse(int(status), text)

コード例 #9

0

ファイルを表示

async def spooled_data_from_url(url: str,
                                headers: Dict[str, str] = {},
                                timeout: aiohttp.ClientTimeout = None):
    """
    Download `url` to a tempfile and yield `(bytesio, headers, charset)`.

    Raise aiohttp.ClientError on generic error. Subclasses of note:
    * aiohttp.InvalidURL on invalid URL
    * aiohttp.ClientResponseError when HTTP status is not 200

    Raise asyncio.TimeoutError when `timeout` seconds have expired.
    """

    # aiohttp internally performs URL canonization before sending
    # request. DISABLE THIS: it breaks oauth and user's expectations.
    #
    # https://github.com/aio-libs/aiohttp/issues/3424
    url = yarl.URL(url, encoded=True)  # prevent magic
    if url.scheme not in ("http", "https"):
        raise aiohttp.InvalidURL("URL must start with http:// or https://")

    with tempfile.TemporaryFile(prefix="loadurl") as spool:
        async with aiohttp.ClientSession() as session:
            async with session.get(url,
                                   headers=headers,
                                   timeout=timeout,
                                   raise_for_status=True) as response:
                response.raise_for_status()

                async for blob in response.content.iter_chunked(_ChunkSize):
                    spool.write(blob)

                headers = response.headers
                charset = response.charset

        spool.seek(0)
        yield spool, headers, charset

コード例 #10

0

ファイルを表示

ファイル: insta_scraper.py プロジェクト: askhat-arslanov/insta-parser

async def scraper(account, session, ws):
    """ Connects to instagram account, parses it
    and returns list of media urls
    """

    # Trying to connect notify
    await ws.send_json({'state': 'connection'})

    url = f'{INSTAGRAM_URL}/{account}/'
    async with session.get(url, headers={"user-agent":
                                         USER_AGENT}) as response:
        if response.status == 200:
            # Successful connection and begining parse notifies
            await ws.send_json({'state': 'connection-completed'})
            await ws.send_json({'state': 'parsing'})

            html = await response.text()
            # Find info about account from script in html
            json_str = re.search(r'window._sharedData = (.*);</script>',
                                 html).group(1)
            page_info = json.loads(json_str)
            # Extract user from JSON
            user_ = page_info['entry_data']['ProfilePage'][0]['graphql'][
                'user']
            user_id = user_['id']
            # And other needful staff for further query
            end_cursor = user_['edge_owner_to_timeline_media']['page_info'][
                'end_cursor']
            rhx_gis = page_info['rhx_gis']
            # Extract csrf token from cookie
            csrf_token = response.cookies.get('csrftoken')
            # Send user avatar and amount of media
            avatar = user_['profile_pic_url']
            total_media = user_['edge_owner_to_timeline_media']['count']
            user_info = {'avatar': avatar, 'total_media': total_media}
            await ws.send_json({'info': ('user-info', user_info)})

            # Get first 12 posts from home page
            edges = user_['edge_owner_to_timeline_media']['edges']
            # and notify about that by simple increase parsing iteration
            await ws.send_json({'state': 'parsing-increase'})

            # Get next posts to the end with a little bit tricky algorithm
            while end_cursor:
                # First of all make up right QUERY_PARAMETERS
                variables = QUERY_PARAMETERS.format(user_id=user_id,
                                                    end_cursor=end_cursor)
                # which pass with rhx_gis to md5 for hash
                insta_gis = md5(
                    (rhx_gis + ':' + variables).encode('utf-8')).hexdigest()
                # And then pass it and other staff to get_next_page function,
                # which returns edges with media info for extract urls and
                # end_cursor for next query
                next_edges, end_cursor = await get_next_page(
                    session, csrf_token, insta_gis, QUERY_HASH, variables)
                edges.extend(next_edges)
                # Another increase parsing iteration notify
                await ws.send_json({'state': 'parsing-increase'})

                # Take some break for decency
                await asyncio.sleep(2)

            # Parsing completed notify
            await ws.send_json({'state': 'parsing-completed'})
            # Extract media urls
            urls = [edge['node']['display_url'] for edge in edges]
            return urls

        elif response.status == 404:
            raise aiohttp.InvalidURL(url=url)

コード例 #11

0

ファイルを表示

ファイル: test_web_monitor_app.py プロジェクト: deep725/website-monitoring-kafka

 async def test_async_open_Exception(self, web_monitor_app, response_mock):
     response_mock.get(url_list['url'], exception=aiohttp.InvalidURL(''))
     result = await web_monitor_app.async_open(url_list)
     assert 'Unknown' == result['err_status']