Beispiel #1
0
 def test_constructor_meta_argument(self):
     req = Request()
     self.assertEqual(req.meta, {})
     req = Request(meta=1)
     self.assertEqual(req.meta, 1)
     req = Request(meta={'place': 'hell'})
     self.assertEqual(req.meta, {'place': 'hell'})
Beispiel #2
0
 def task_generator(self):
     yield Request('test',
                   url=server.get_url(),
                   timeout=0.1,
                   meta={'id': 1})
     yield Request('test',
                   url=server.get_url(),
                   timeout=0.5,
                   meta={'id': 2})
Beispiel #3
0
    def test_constructor_arguments(self):
        req = Request('TAG')
        self.assertEqual(req.tag, 'TAG')
        self.assertEqual(req.url, None)

        req = Request('TAG', 'URL')
        self.assertEqual(req.tag, 'TAG')
        self.assertEqual(req.url, 'URL')

        req = Request(url='URL', tag='TAG')
        self.assertEqual(req.tag, 'TAG')
        self.assertEqual(req.url, 'URL')
 def click_button(self):
     self.saveButton.setText(_translate("MainWindow", "下载中..."))
     queue = Queue()
     print('aaaa' + self.web)
     Request().createDownTxt(self.web, self.book, queue)
     is_ok = True
     while 1:
         if queue.empty():
             QApplication.processEvents()
             continue
         id, chapter = queue.get()
         if id == 'over' or chapter == 'over':
             print(chapter)
             break
         if id == '404':
             is_ok = False
             break
         if id < -1:
             self.saveButton.setText(
                 _translate("MainWindow", "剩余{}".format(-id)))
         else:
             self.saveButton.setText(_translate("MainWindow", "正在保存.."))
     if is_ok:
         self.saveButton.setText(_translate("MainWindow", "下载完成"))
         QApplication.processEvents()
         time.sleep(5)
     else:
         self.saveButton.setText(_translate("MainWindow", "下载失败"))
         QApplication.processEvents()
         time.sleep(5)
     self.saveButton.setText(_translate("MainWindow", "重新下载"))
Beispiel #5
0
 def task_generator(self):
     with open('var/host.txt') as inp:
         for line in inp:
             if not line.startswith('#'):
                 host = line.strip()
                 yield Request('page',
                               'https://%s/' % host,
                               meta={'host': host})
Beispiel #6
0
 def click_button(self):
     self.searchButton.setText(_translate("MainWindow", "搜索中..."))
     search_word = self.searchEdit.text().strip()
     print('点击了按钮,开始搜索:{}'.format(search_word))
     r = Request()
     tabtab = self.tabtab
     # 遍历选中的web
     for i in range(min(len(self.webs), 10)):
         box = self.checkBoxList[i]
         if box.isChecked():
             print(box.text(), '开始搜索')
             queue = r.createSearchThread(box.text(), search_word)
             tabtab.add_ResultWidget_fuck(box.text(), queue)
Beispiel #7
0
    def test_single_request(self):
        class SimpleCrawler(Crawler):
            data = {}

            def handler_test(self, req, res):
                self.data['response'] = res.body

        token = 'Python'
        self.server.response['data'] = token

        bot = SimpleCrawler()
        bot.add_task(Request('test', url=self.server.get_url()))
        bot.run()
        self.assertEquals(token, bot.data['response'])
Beispiel #8
0
    def test_required_data_not_found(self):
        class TestCrawler(Crawler):
            def init_hook(self):
                self.gen = iter([False, True])
                self.result = []

            def handler_test(self, req, res):
                if not next(self.gen):
                    raise RequiredDataNotFound
                else:
                    self.result.append('ok')

        bot = TestCrawler()
        bot.add_task(Request('test', url=self.server.get_url()))
        bot.run()
        self.assertEquals('ok', bot.result[0])
Beispiel #9
0
    def test_too_many_redirects(self):
        class TestCrawler(Crawler):
            def init_hook(self):
                self.data = None

            def handler_test(self, req, res):
                self.data = res.body

        self.server.response['data'] = 'foo'
        self.server.response['code'] = 301
        self.server.response['headers'] = [('location', self.server.get_url())]

        bot = TestCrawler()
        bot.add_task(Request('test', url=self.server.get_url()))
        bot.run()
        self.assertEquals(10, bot.stat.counters['network:request-error-test'])
Beispiel #10
0
    def test_required_data_not_found_implicit(self):
        class TestCrawler(Crawler):
            def init_hook(self):
                self.gen = iter([False, True])
                self.result = []

            def handler_test(self, req, res):
                res.xpath('//h1').require()
                self.result.append('ok')

        self.server.response_once['data'] = '<h2>test</h2>'
        self.server.response['data'] = '<h1>test</h1>'
        bot = TestCrawler()
        bot.add_task(Request('test', url=self.server.get_url()))
        bot.run()
        self.assertEquals('ok', bot.result[0])
        self.assertEquals(2, bot.stat.counters['network:request-ok-test'])
Beispiel #11
0
    def test_handler_error(self):
        class SimpleCrawler(Crawler):
            data = {}

            def handler_test(self, req, res):
                1 / 0

        bot = SimpleCrawler()
        bot.add_task(Request('test', url=self.server.get_url()))
        bot.run()

        self.assertEqual(1, len(bot.stat.items['handler_error']))
        self.assertEqual(
            bot.stat.items['handler_error'][0], '%s|%s|%s|%s' % (
                'test',
                'ZeroDivisionError',
                'division by zero',
                self.server.get_url(),
            ))
Beispiel #12
0
    def test_redirect_by_default(self):
        class TestCrawler(Crawler):
            def init_hook(self):
                self.data = None

            def handler_test(self, req, res):
                self.data = res.body

        self.server.response_once['data'] = 'foo'
        self.server.response_once['code'] = 301
        self.server.response_once['headers'] = [('location',
                                                 self.server.get_url())]

        self.server.response['data'] = 'bar'

        bot = TestCrawler()
        bot.add_task(Request('test', url=self.server.get_url()))
        bot.run()
        self.assertEquals(b'bar', bot.data)
Beispiel #13
0
 def task_generator(self):
     for url in self._urls_todo:
         yield Request('test', url=url)
Beispiel #14
0
 def task_generator(self):
     for host in ['google.com']:
         host = host.strip()
         if host:
             yield Request('http://%s/' % host, tag='page')
Beispiel #15
0
 def task_generator(self):
     for x in range(10):
         yield Request('test', url=server.get_url())
Beispiel #16
0
 def handler_page(self, req, res):
     yield Request('page2',
                   url=server.get_url(),
                   proxy=proxy_server.address())
Beispiel #17
0
 def check_url(self, url):
     yield from self.check_request(Request(url))
Beispiel #18
0
 def handler_test2(self, req, res):
     self.points.append(req.meta['id'])
     yield Request('test3', url=server.get_url(), meta={'id': 3})
Beispiel #19
0
 def task_generator(self):
     for x in range(2000):
         yield Request('page',
                       url='http://127.0.0.1/awesome_python.html?%d' % x)
Beispiel #20
0
 def task_generator(self):
     for x in range(self._meta['num_req']):
         yield Request('page', 'http://127.0.0.1/awesome_python.html')
Beispiel #21
0
 def task_generator(self):
     yield Request('page', url=server.get_url())
     self.unfreeze.wait()
Beispiel #22
0
 def task_generator(self):
     yield Request('page', url=server.get_url())
Beispiel #23
0
 def task_generator(self):
     while True:
         yield Request('page', url=server.get_url())
         time.sleep(0.1)
Beispiel #24
0
 def task_generator(self):
     yield Request('test', url=server.get_url(), meta={'id': 1})
Beispiel #25
0
 def handler_page(self, req, res):
     yield Request('page', url=server.get_url())
Beispiel #26
0
 def task_generator(self):
     yield Request('test',
                   url=server.get_url(),
                   callback=self.handler_test)
Beispiel #27
0
 def task_generator(self):
     for host in islice(open('docs/domains.txt'), 100):
         host = host.strip()
         if host:
             yield Request('http://%s/' % host, tag='page')
Beispiel #28
0
 def task_generator(self):
     with open(pl_file, 'w') as out:
         out.write(proxy_server2)
     time.sleep(0.5)
     yield Request('page', url=server.get_url())