Esempio n. 1
0
    def test_dynamic_request_browser_actions(self):
        cm = CookiesMiddleware(self.spider, self.spider.settings)
        self.driver = webdriver.Chrome()
        dh = DownloadHandler(self.spider, self.driver, self.driver_sem)

        def _actions(driver):
            driver.find_element_by_name('account').send_keys("username")
            driver.find_element_by_name('password').send_keys("pwd")
            driver.find_element_by_xpath(
                '/html/body/div[1]/div/div[2]/div[2]/form/div[2]/button'
            ).click()
            gevent.sleep(5)

        request = Request(
            'https://www.zhihu.com/#signin',
            dynamic=True,
            meta={'cookiejar': 'test'},
            browser_actions=[_actions],
        )
        cm.process_request(request)
        response = dh.fetch(request)
        cm.process_response(request, response)

        request = Request('https://www.zhihu.com',
                          dynamic=True,
                          meta={'cookiejar': 'test'})
        cm.process_request(request)
        response = dh.fetch(request)
        cm.process_response(request, response)
        print response.body
        self.driver.close()
 def test_process_request(self):
     self.spider.settings.set("PROXY_LIST", ['124.88.67.54:80'])
     request = Request('http://httpbin.org/get')
     pm = ProxyMiddleware(self.spider.settings, self.spider.logger)
     dh = DownloadHandler(self.spider, None, BoundedSemaphore(1))
     pm.process_request(request)
     response = dh.fetch(request)
     assert response.body
 def test_process_request(self):
     request = Request('http://httpbin.org/user-agent')
     self.assertIs(request.headers.get("User-Agent"), None)
     uam = UserAgentMiddleware(self.spider.settings, self.spider.logger)
     dh = DownloadHandler(self.spider, None, BoundedSemaphore(1))
     uam.process_request(request)
     response = dh.fetch(request)
     self.assertEqual(
         json.loads(response.body)['user-agent'],
         request.headers['User-Agent'])
    def test_process_request_interval(self):
        self.spider.settings.set("PROXY_LIST", ['218.76.106.78:3128'])
        request = Request('http://httpbin.org/get')
        pm = ProxyMiddleware(self.spider.settings, self.spider.logger)
        dh = DownloadHandler(self.spider, None, BoundedSemaphore(1))
        pm.process_request(request)
        time1 = time.time()
        dh.fetch(request)

        request = Request('http://httpbin.org/get')
        pm.process_request(request)
        self.assertGreater(time.time() - time1, 3)
Esempio n. 5
0
 def test_timeout_dynamic(self):
     self.driver = webdriver.PhantomJS()
     self.spider.settings.set('TIMEOUT', 5)
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     self.assertRaises(TimeoutException, dh.fetch,
                       Request(HTTPBIN_URL + '/delay/10', dynamic=True))
     self.driver.close()
Esempio n. 6
0
    def test_post_data_content_static(self):
        dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
        response = dh.fetch(
            Request(HTTPBIN_URL + '/post',
                    method='POST',
                    body={'text': 'pycreeper'}))
        self.assertIsInstance(response, Response)
        self.assertEqual(
            json.loads(response.body)['form'], {'text': 'pycreeper'})

        response = dh.fetch(
            Request(HTTPBIN_URL + '/post', method='POST', body=u'Unicode测试'))
        self.assertEqual(json.loads(response.body)['data'], 'Unicode测试')

        response = dh.fetch(
            Request(HTTPBIN_URL + '/post', method='POST', body='中文测试'))
        self.assertEqual(json.loads(response.body)['data'], '中文测试')
        self.assertEqual(response.status, 200)
Esempio n. 7
0
 def test_dynamic_request_cookie_between_static_and_dynamic(self):
     cm = CookiesMiddleware(self.spider, self.spider.settings)
     self.driver = webdriver.PhantomJS()
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     request = Request(HTTPBIN_URL + '/cookies/set?key1=val1&key2=val2',
                       dynamic=True,
                       meta={'cookiejar': 'test'})
     response = dh.fetch(request)
     cm.process_response(request, response)
     request = Request(HTTPBIN_URL + '/cookies', meta={'cookiejar': 'test'})
     cm.process_request(request)
     response = dh.fetch(request)
     self.assertEqual(
         json.loads(response.body)['cookies'], {
             u'key1': u'val1',
             u'key2': u'val2'
         })
     self.driver.close()
Esempio n. 8
0
 def test_concurrency_with_delayed_url(self):
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     n = 5
     pool = Pool(n)
     urls = []
     for i in range(n):
         urls.append(HTTPBIN_URL + '/delay/1')
     time_start = time.time()
     pool.map(dh.fetch, [Request(url) for url in urls])
     time_total = time.time() - time_start
     self.assertLess(time_total, n)
Esempio n. 9
0
 def test_dynamic_request_concurrency(self):
     self.driver = webdriver.PhantomJS()
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     n = 5
     pool = Pool(n)
     urls = []
     for i in range(n):
         urls.append(HTTPBIN_URL + '/delay/1')
     time1 = time.time()
     pool.map(dh.fetch,
              [Request(url, dynamic=True, wait=5) for url in urls])
     self.assertGreater(time.time() - time1, n)
     self.driver.close()
Esempio n. 10
0
 def test_dynamic_request_wait(self):
     self.driver = webdriver.PhantomJS()
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     request = Request(HTTPBIN_URL + '/get', dynamic=True, wait=3)
     dh.fetch(request)
     self.driver.close()
Esempio n. 11
0
 def test_get_data(self):
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     response = dh.fetch(Request(HTTPBIN_URL + '/get'))
     self.assertIsInstance(response, Response)
     self.assertEqual(response.status, 200)
Esempio n. 12
0
 def test_post_data_static(self):
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     response = dh.fetch(Request(HTTPBIN_URL + '/post', method='POST'))
     self.assertIsInstance(response, Response)
     self.assertEqual(response.status, 200)
Esempio n. 13
0
 def test_timeout_static(self):
     self.spider.settings.set('TIMEOUT', 5)
     dh = DownloadHandler(self.spider, self.driver, self.driver_sem)
     self.assertRaises(TimeoutException, dh.fetch,
                       Request(HTTPBIN_URL + '/delay/10'))