예제 #1
0
def test_request_ua():
    headers = {
        "User-Agent": "Python3.5"
    }
    request = Request('http://www.httpbin.org/get', method='GET', res_type='json', headers=headers)
    result = asyncio.get_event_loop().run_until_complete(request.fetch())
    assert result.html['headers']['User-Agent'] == "Python3.5"
예제 #2
0
    async def async_fetch(
        self,
        url_or_request: Union[Request, str],
        response: Response = None,
    ):
        """
        Fetch target URL
        :param url_or_request:
        :param response:
        :return:
        """
        async with aiohttp.ClientSession() as session:
            if isinstance(url_or_request, Request):
                request: Request = url_or_request
                request.request_session = session
            else:
                request: Request = Request(url=url_or_request,
                                           request_session=session)

            if response is None:
                response: Response = await request.fetch()

            # process response
            response.html = await response.text()
            response.etree = response.html_etree(response.html)

            self.refresh_user_ns(request, response)
예제 #3
0
def test_request_params():
    params = {
        "name": "ruia"
    }
    request = Request('http://www.httpbin.org/get', method='GET', res_type='json', params=params)
    result = asyncio.get_event_loop().run_until_complete(request.fetch())
    assert result.html['args']['name'] == "ruia"
예제 #4
0
파일: test_request.py 프로젝트: Ma233/ruia
def test_method_error_request():
    try:
        request = Request("https://httpbin.org/", method="PUT")
        response = asyncio.get_event_loop().run_until_complete(request.fetch())
        assert await response.text() == ""
    except Exception as e:
        assert isinstance(e, InvalidRequestMethod)
예제 #5
0
 async def parse(self, response):
     for index, url in enumerate(self.start_urls):
         yield Request(url,
                       method='POST',
                       data=self.body,
                       callback=self.parse_item,
                       metadata={'index': index})
예제 #6
0
def test_method_error_request():
    try:
        request = Request('https://httpbin.org/', method='PUT')
        response = asyncio.get_event_loop().run_until_complete(request.fetch())
        assert response.html == ''
    except Exception as e:
        assert isinstance(e, InvalidRequestMethod)
예제 #7
0
async def request_example():
    url = 'http://portal.neaea.gov.et/Home/Student'
    params = {
        'name': 'ruia',
    }
    headers = {
        'User-Agent': ('Mozilla/5.0'),
    }
    request = Request(url=url, method='GET', params=params, headers=headers)
    must_cookies = {}
    must_cookies_names = ['__RequestVerificationToken']

    response = await request.fetch()
    for cookie_name in must_cookies_names:
        must_cookies[cookie_name] = response.cookies.get(cookie_name)
    history = response.history
    text = await response.text()
    html = Selector(text=text)
    csrf_token = html.xpath("/html/body/div[2]/div/form/input/@value").get()

    form_data = {
        '__RequestVerificationToken': csrf_token,
        'admissionNumber': None  # to be set
    }
    return form_data, must_cookies, history
예제 #8
0
async def make_post_request(sem, callback):
    headers = {'Content-Type': 'application/json'}
    request = Request('https://httpbin.org/post',
                      method='POST',
                      headers=headers,
                      data=params,
                      callback=callback)
    return await request.fetch_callback(sem)
예제 #9
0
 async def parse(self, res):
     pages = ['http://www.httpbin.org/get', 'http://www.httpbin.org/get']
     for index, page in enumerate(pages):
         yield Request(
             page,
             callback=self.parse_item,
             metadata={'index': index}
         )
예제 #10
0
 async def parse(self, response):
     yield Request(
         url=response.url,
         callback=self.parse_item,
         headers=self.headers,
         request_config=self.request_config,
         **self.kwargs
     )
예제 #11
0
 async def parse(self, res):
     items = await alist(ArchivesItem.get_items(html=res.html))
     self.mongo_db = MotorBase(loop=self.loop).get_db()
     for item in items:
         # 随机休眠
         self.request_config['DELAY'] = random.randint(5, 10)
         yield Request(item.href,
                       callback=self.parse_item,
                       request_config=self.request_config)
예제 #12
0
 async def parse(self, response):
     self.mongo_db = MotorBase().get_db('ruia_test')
     urls = ['https://news.ycombinator.com/news?p=1', 'https://news.ycombinator.com/news?p=2']
     for index, url in enumerate(urls):
         yield Request(
             url,
             callback=self.parse_item,
             metadata={'index': index}
         )
예제 #13
0
파일: test_request.py 프로젝트: Ma233/ruia
def test_delay_false():
    request_config = {"DELAY": 10}
    request = Request("https://httpbin.org/", request_config=request_config)

    # Start a timer to time request
    timer = time.time()
    response = asyncio.get_event_loop().run_until_complete(request.fetch(delay=False))

    # Ensure delay option was ignored (time taken is less than 10s)
    assert time.time() - timer < 10
예제 #14
0
 async def parse(self, response):
     self.mongo_db = MotorBase().get_db("ruia_test")
     urls = [
         "https://news.ycombinator.com/news?p=1",
         "https://news.ycombinator.com/news?p=2",
     ]
     for index, url in enumerate(urls):
         yield Request(url,
                       callback=self.parse_item,
                       metadata={"index": index})
예제 #15
0
 async def parse(self, response):
     self.mongo_db = MotorBase().get_db('hacknews')
     urls = ['https://baijiahao.baidu.com/s?id=1553475025395018',
             'https://baijiahao.baidu.com/s?id=1570895803249513']
     for index, url in enumerate(urls):
         yield Request(
             url,
             callback=self.parse_item,
             metadata={'index': index}
         )
예제 #16
0
파일: request.py 프로젝트: ziux/ruia
async def request_example():
    url = "https://httpbin.org/get"
    params = {"name": "ruia"}
    headers = {"User-Agent": "Python3.6"}
    request = Request(
        url=url, method="GET", res_type="json", params=params, headers=headers
    )
    response = await request.fetch()
    assert response.html["args"]["name"] == "ruia"
    assert response.html["headers"]["User-Agent"] == "Python3.6"
예제 #17
0
파일: test_request.py 프로젝트: Ma233/ruia
async def make_post_request(sem, callback):
    headers = {"Content-Type": "application/json"}
    request = Request(
        "https://httpbin.org/post",
        method="POST",
        headers=headers,
        data=params,
        callback=callback,
    )
    return await request.fetch_callback(sem)
예제 #18
0
 async def parse(self, res):
     etree = res.html_etree
     pages = ['?start=0&filter='
              ] + [i.get('href') for i in etree.cssselect('.paginator>a')]
     for index, page in enumerate(pages):
         url = self.start_urls[0] + page
         yield Request(url,
                       callback=self.parse_item,
                       metadata={'index': index},
                       request_config=self.request_config,
                       **self.kwargs)
예제 #19
0
 async def parse(self, res):
     try:
         self.mongo_db = MotorBase(loop=self.loop).get_db()
     except Exception as e:
         self.logger.exception(e)
     async for item in ArchivesItem.get_items(html=await res.text()):
         yield Request(
             item.href,
             callback=self.parse_item,
             request_config=self.request_config,
         )
예제 #20
0
파일: test_request.py 프로젝트: Ma233/ruia
 async def timeout_request(sem):
     request_config = {"RETRIES": 1, "DELAY": 1, "TIMEOUT": 0.1}
     request = Request(
         "https://httpbin.org/get",
         method="GET",
         metadata={"hello": "ruia"},
         encoding="utf-8",
         request_config=request_config,
         params=params,
         callback=hi,
     )
     return await request.fetch_callback(sem)
예제 #21
0
def test_retry_delay():
    # Test invalid URL (to trigger retries) with 1s delay between retries
    request_config = {"RETRIES": 2, "RETRY_DELAY": 1}
    request = Request("http://127.0.0.1:5999/", request_config=request_config)

    # Start a timer to time retries
    timer = time.time()
    _, response = asyncio.get_event_loop().run_until_complete(
        request.fetch_callback(sem=sem))

    # Ensure that for 2 retries the time taken is > 2s (1s between each retry)
    assert time.time() - timer > 2
예제 #22
0
async def sec_request():
    form_data, must_cookies, history = await request_example()
    headers = {
        'User-Agent': ('Mozilla/5.0'),
    }
    request = Request(url='http://portal.neaea.gov.et/Student/StudentDetailsx',
                      method='POST',
                      headers=headers,
                      metadata=form_data,
                      cookies=must_cookies)
    print(request)
    return request.fetch()
예제 #23
0
def test_request_config():
    assert str(Request('https://httpbin.org/')) == '<GET https://httpbin.org/>'
    _, response = asyncio.get_event_loop().run_until_complete(
        make_get_request(sem=sem, callback=hello))
    assert response.callback_result == 'hello ruia'
    assert response.metadata == {'hello': 'ruia'}
    json_result = asyncio.get_event_loop().run_until_complete(response.json())
    assert json_result['args']['name'] == "ruia"

    _, response = asyncio.get_event_loop().run_until_complete(
        make_post_request(sem=sem, callback=None))
    json_result = asyncio.get_event_loop().run_until_complete(response.json())
    assert json_result['data'] == "name=ruia"
예제 #24
0
def test_request_config():
    assert str(Request("https://httpbin.org/")) == "<GET https://httpbin.org/>"
    _, response = asyncio.get_event_loop().run_until_complete(
        make_get_request(sem=sem, callback=hello))
    # assert response.callback_result == "hello ruia"
    assert response.metadata == {"hello": "ruia"}
    json_result = asyncio.get_event_loop().run_until_complete(response.json())
    assert json_result["args"]["name"] == "ruia"

    _, response = asyncio.get_event_loop().run_until_complete(
        make_post_request(sem=sem, callback=None))
    json_result = asyncio.get_event_loop().run_until_complete(response.json())
    assert json_result["data"] == "name=ruia"
예제 #25
0
 async def parse(self, res):
     etree = res.html_etree
     urls = [
         i.get('href') for i in etree.cssselect('.content_list .dd_bt a')
     ]
     for index, url in enumerate(urls):
         url = 'http:' + url
         yield Request(
             url,
             callback=self.parse_item,
             metadata={'index': index},
             request_config=self.request_config,
         )
예제 #26
0
    async def parse_item(self, res):
        async for item in ArticleListItem.get_items(html=await res.text()):
            # 已经抓取的链接不再请求
            is_exist = (await self.mongo_db.source_docs.find_one(
                {"url": item.href}) or {})

            if not is_exist.get("html"):
                yield Request(
                    item.href,
                    callback=self.save,
                    metadata={"title": item.title},
                    request_config=self.request_config,
                )
예제 #27
0
 async def parse_item(self, res):
     items = await alist(ArticleListItem.get_items(html=res.html))
     for item in items:
         # 已经抓取的链接不再请求
         is_exist = await self.mongo_db.source_docs.find_one(
             {'url': item.href})
         if not is_exist:
             # 随机休眠
             self.request_config['DELAY'] = random.randint(5, 10)
             yield Request(item.href,
                           callback=self.save,
                           metadata={'title': item.title},
                           request_config=self.request_config)
예제 #28
0
    async def parse_item(self, res):
        items = await ArticleListItem.get_items(html=res.html)
        for item in items:
            # 已经抓取的链接不再请求
            is_exist = await self.mongo_db.source_docs.find_one(
                {'url': item.href}) or {}

            if not is_exist.get('html'):
                yield Request(
                    item.href,
                    callback=self.save,
                    metadata={'title': item.title},
                    request_config=self.request_config,
                )
예제 #29
0
 async def timeout_request(sem):
     request_config = {
         'RETRIES': 1,
         'DELAY': 1,
         'TIMEOUT': 0.1,
     }
     request = Request('https://httpbin.org/get',
                       method='GET',
                       metadata={'hello': 'ruia'},
                       encoding='utf-8',
                       request_config=request_config,
                       params=params,
                       callback=hi)
     return await request.fetch_callback(sem)
예제 #30
0
async def make_get_request(sem, callback=None):
    request_config = {
        'RETRIES': 3,
        'DELAY': 1,
        'TIMEOUT': 0.1,
        'VALID': valid_response,
        'RETRY_FUNC': retry_func
    }
    request = Request('https://httpbin.org/get',
                      method='GET',
                      metadata={'hello': 'ruia'},
                      request_config=request_config,
                      params=params,
                      callback=callback)
    return await request.fetch_callback(sem)