Example #1
0
    def test_request_counter(self):
        import threading

        reset_request_counter()
        grab = build_grab()
        grab.go(self.server.get_url())
        self.assertEqual(grab.request_counter, 1)

        grab.go(self.server.get_url())
        self.assertEqual(grab.request_counter, 2)

        def func():
            grab = build_grab()
            grab.go(self.server.get_url())

        # Make 10 requests in concurrent threads
        threads = []
        for _ in six.moves.range(10):
            thread = threading.Thread(target=func)
            threads.append(thread)
            thread.start()
        for thread in threads:
            thread.join()

        grab.go(self.server.get_url())
        self.assertEqual(grab.request_counter, 13)
Example #2
0
    def test_log_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            log_file_path = os.path.join(tmp_dir, 'lograb.html')
            grab = build_grab()
            grab.setup(log_file=log_file_path)
            self.server.response['get.data'] = 'omsk'

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(os.listdir(tmp_dir), ['lograb.html'])
            with open(log_file_path) as inp:
                self.assertEqual(inp.read(), 'omsk')
Example #3
0
    def test_log_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            log_file_path = os.path.join(tmp_dir, 'lograb.html')
            grab = build_grab()
            grab.setup(log_file=log_file_path)
            self.server.response['get.data'] = 'omsk'

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(os.listdir(tmp_dir), ['lograb.html'])
            with open(log_file_path) as inp:
                self.assertEqual(inp.read(), 'omsk')
Example #4
0
    def test_log_dir_response_content(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            self.assertTrue('x-engine' in log_file_content.lower())
Example #5
0
    def test_log_dir_request_content_is_empty(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            grab.setup(headers={'X-Name': 'spider'}, post='xxxPost')

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            self.assertFalse('X-Name' in log_file_content)
            self.assertFalse('xxxPost' in log_file_content)
Example #6
0
    def test_log_dir_response_content(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            self.assertTrue('x-engine' in log_file_content.lower())
Example #7
0
    def test_log_dir_request_content_is_empty(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            grab.setup(headers={'X-Name': 'spider'}, post='xxxPost')

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            self.assertFalse('X-Name' in log_file_content)
            self.assertFalse('xxxPost' in log_file_content)
Example #8
0
    def test_log_dir_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            self.server.response_once['get.data'] = 'omsk1'
            self.server.response['get.data'] = 'omsk2'

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log', '02.html', '02.log'])
            with open(os.path.join(tmp_dir, '01.html')) as inp:
                self.assertEqual(inp.read(), 'omsk1')
            with open(os.path.join(tmp_dir, '02.html')) as inp:
                self.assertEqual(inp.read(), 'omsk2')
Example #9
0
    def test_log_dir_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            self.server.response_once['get.data'] = 'omsk1'
            self.server.response['get.data'] = 'omsk2'

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log', '02.html', '02.log'])
            with open(os.path.join(tmp_dir, '01.html')) as inp:
                self.assertEqual(inp.read(), 'omsk1')
            with open(os.path.join(tmp_dir, '02.html')) as inp:
                self.assertEqual(inp.read(), 'omsk2')
Example #10
0
    def test_log_dir_request_content_headers_and_post(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir, debug=True)
            grab.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'})

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            #if not 'x-name' in log_file_content.lower():
            #    print('CONTENT OF 01.log:')
            #    print(log_file_content)
            self.assertTrue('x-name' in log_file_content.lower())
            self.assertTrue('xxx=post' in log_file_content.lower())
Example #11
0
    def test_log_dir_request_content_headers_and_post(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir, debug=True)
            grab.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'})

            self.assertEqual(os.listdir(tmp_dir), [])
            grab.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            #if not 'x-name' in log_file_content.lower():
            #    print('CONTENT OF 01.log:')
            #    print(log_file_content)
            self.assertTrue('x-name' in log_file_content.lower())
            self.assertTrue('xxx=post' in log_file_content.lower())
Example #12
0
    def test_log_dir_response_network_error(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir, timeout=1, user_agent='Perl',
                       debug=True)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]
            self.server.response['sleep'] = 2

            self.assertEqual(os.listdir(tmp_dir), [])
            try:
                grab.go(self.server.get_url())
            except GrabTimeoutError:
                pass

            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            self.assertTrue('user-agent: perl' in log_file_content.lower())
Example #13
0
    def test_log_dir_response_content_thread(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]

            self.assertEqual(os.listdir(tmp_dir), [])

            def func():
                grab.go(self.server.get_url())
            thread = threading.Thread(target=func)
            thread.start()
            thread.join()

            files = os.listdir(tmp_dir)
            self.assertEqual(2, len([x for x in files if '01-thread' in x]))
            fname = [x for x in files if x.endswith('.log')][0]
            with open(os.path.join(tmp_dir, fname)) as inp:
                log_file_content = inp.read()
            self.assertTrue('x-engine' in log_file_content.lower())
Example #14
0
    def test_log_dir_response_network_error(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir,
                       timeout=1,
                       user_agent='Perl',
                       debug=True)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]
            self.server.response['sleep'] = 2

            self.assertEqual(os.listdir(tmp_dir), [])
            try:
                grab.go(self.server.get_url())
            except GrabTimeoutError:
                pass

            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            with open(os.path.join(tmp_dir, '01.log')) as inp:
                log_file_content = inp.read()
            self.assertTrue('user-agent: perl' in log_file_content.lower())
Example #15
0
    def test_log_dir_response_content_thread(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            grab = build_grab()
            grab.setup(log_dir=tmp_dir)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]

            self.assertEqual(os.listdir(tmp_dir), [])

            def func():
                grab.go(self.server.get_url())

            thread = threading.Thread(target=func)
            thread.start()
            thread.join()

            files = os.listdir(tmp_dir)
            self.assertEqual(2, len([x for x in files if '01-thread' in x]))
            fname = [x for x in files if x.endswith('.log')][0]
            with open(os.path.join(tmp_dir, fname)) as inp:
                log_file_content = inp.read()
            self.assertTrue('x-engine' in log_file_content.lower())