Ejemplo n.º 1
0
    def test_request_counter(self):
        import threading

        reset_request_counter()
        g = build_grab()
        g.go(self.server.get_url())
        self.assertEqual(g.request_counter, 1)

        g.go(self.server.get_url())
        self.assertEqual(g.request_counter, 2)

        def func():
            g = build_grab()
            g.go(self.server.get_url())

        # Make 10 requests in concurrent threads
        threads = []
        for x in six.moves.range(10):
            th = threading.Thread(target=func)
            threads.append(th)
            th.start()
        for th in threads:
            th.join()

        g.go(self.server.get_url())
        self.assertEqual(g.request_counter, 13)
Ejemplo n.º 2
0
    def test_request_counter(self):
        import threading

        reset_request_counter()
        g = build_grab()
        g.go(self.server.get_url())
        self.assertEqual(g.request_counter, 1)

        g.go(self.server.get_url())
        self.assertEqual(g.request_counter, 2)

        def func():
            g = build_grab()
            g.go(self.server.get_url())

        # Make 10 requests in concurrent threads
        threads = []
        for x in six.moves.range(10):
            th = threading.Thread(target=func)
            threads.append(th)
            th.start()
        for th in threads:
            th.join()

        g.go(self.server.get_url())
        self.assertEqual(g.request_counter, 13)
Ejemplo n.º 3
0
    def test_log_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        log_file_path = os.path.join(TMP_DIR, 'log.html')
        g = build_grab()
        g.setup(log_file=log_file_path)
        self.server.response['get.data'] = 'omsk'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(self.server.get_url())
        self.assertEqual(os.listdir(TMP_DIR), ['log.html'])
        self.assertEqual(open(log_file_path).read(), 'omsk')
Ejemplo n.º 4
0
    def test_log_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        log_file_path = os.path.join(TMP_DIR, 'log.html')
        g = build_grab()
        g.setup(log_file=log_file_path)
        self.server.response['get.data'] = 'omsk'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(self.server.get_url())
        self.assertEqual(os.listdir(TMP_DIR), ['log.html'])
        self.assertEqual(open(log_file_path).read(), 'omsk')
Ejemplo n.º 5
0
    def test_log_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        log_file_path = os.path.join(TMP_DIR, 'log.html')
        g = build_grab()
        g.setup(log_file=log_file_path)
        SERVER.RESPONSE['get'] = 'omsk'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        self.assertEqual(os.listdir(TMP_DIR), ['log.html'])
        self.assertEqual(open(log_file_path).read(), 'omsk')
Ejemplo n.º 6
0
    def test_log_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        log_file_path = os.path.join(TMP_DIR, 'log.html')
        g = build_grab()
        g.setup(log_file=log_file_path)
        SERVER.RESPONSE['get'] = 'omsk'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        self.assertEqual(os.listdir(TMP_DIR), ['log.html'])
        self.assertEqual(open(log_file_path).read(), 'omsk')
Ejemplo n.º 7
0
    def test_log_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            log_file_path = os.path.join(tmp_dir, 'log.html')
            g = build_grab()
            g.setup(log_file=log_file_path)
            self.server.response['get.data'] = 'omsk'

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            self.assertEqual(os.listdir(tmp_dir), ['log.html'])
            self.assertEqual(open(log_file_path).read(), 'omsk')
Ejemplo n.º 8
0
    def test_log_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            log_file_path = os.path.join(tmp_dir, 'log.html')
            g = build_grab()
            g.setup(log_file=log_file_path)
            self.server.response['get.data'] = 'omsk'

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            self.assertEqual(os.listdir(tmp_dir), ['log.html'])
            self.assertEqual(open(log_file_path).read(), 'omsk')
Ejemplo n.º 9
0
    def test_log_dir_request_content_is_empty(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        g.setup(headers={'X-Name': 'spider'}, post='xxxPost')

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(self.server.get_url())
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log'])
        log_file_content = open(os.path.join(TMP_DIR, '01.log')).read()
        self.assertFalse('X-Name' in log_file_content)
        self.assertFalse('xxxPost' in log_file_content)
Ejemplo n.º 10
0
    def test_log_dir_request_content_is_empty(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        g.setup(headers={'X-Name': 'spider'}, post='xxxPost')

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(self.server.get_url())
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log'])
        log_file_content = open(os.path.join(TMP_DIR, '01.log')).read()
        self.assertFalse('X-Name' in log_file_content)
        self.assertFalse('xxxPost' in log_file_content)
Ejemplo n.º 11
0
    def test_log_dir_response_content(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        SERVER.RESPONSE['get'] = 'omsk'
        SERVER.RESPONSE['headers'] = [('X-Engine', 'PHP')]

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log'])
        log_file_content = open(os.path.join(TMP_DIR, '01.log')).read()
        self.assertTrue('X-Engine' in log_file_content)
Ejemplo n.º 12
0
    def test_log_dir_response_content(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        SERVER.RESPONSE['get'] = 'omsk'
        SERVER.RESPONSE['headers'] = [('X-Engine', 'PHP')]

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log'])
        log_file_content = open(os.path.join(TMP_DIR, '01.log')).read()
        self.assertTrue('X-Engine' in log_file_content)
Ejemplo n.º 13
0
    def test_log_dir_request_content_is_empty(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir)
            g.setup(headers={'X-Name': 'spider'}, post='xxxPost')

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log'])
            log_file_content = open(os.path.join(tmp_dir, '01.log')).read()
            self.assertFalse('X-Name' in log_file_content)
            self.assertFalse('xxxPost' in log_file_content)
Ejemplo n.º 14
0
    def test_log_dir_response_content(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log'])
            log_file_content = open(os.path.join(tmp_dir, '01.log')).read()
            self.assertTrue('x-engine' in log_file_content.lower())
Ejemplo n.º 15
0
    def test_log_dir_request_content_headers_and_post(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR, debug=True)
        g.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'})

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log'])
        log_file_content = open(os.path.join(TMP_DIR, '01.log')).read()
        self.assertTrue('X-Name' in log_file_content)
        self.assertTrue('xxx=Post' in log_file_content)
Ejemplo n.º 16
0
    def test_log_dir_request_content_headers_and_post(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR, debug=True)
        g.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'})

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log'])
        log_file_content = open(os.path.join(TMP_DIR, '01.log')).read()
        self.assertTrue('X-Name' in log_file_content)
        self.assertTrue('xxx=Post' in log_file_content)
Ejemplo n.º 17
0
    def test_log_dir_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        SERVER.RESPONSE_ONCE['get'] = 'omsk1'
        SERVER.RESPONSE['get'] = 'omsk2'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        g.go(SERVER.BASE_URL)
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log', '02.html', '02.log'])
        self.assertEqual(open(os.path.join(TMP_DIR, '01.html')).read(), 'omsk1')
        self.assertEqual(open(os.path.join(TMP_DIR, '02.html')).read(), 'omsk2')
Ejemplo n.º 18
0
    def test_log_dir_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        SERVER.RESPONSE_ONCE['get'] = 'omsk1'
        SERVER.RESPONSE['get'] = 'omsk2'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(SERVER.BASE_URL)
        g.go(SERVER.BASE_URL)
        self.assertEqual(sorted(os.listdir(TMP_DIR)), ['01.html', '01.log', '02.html', '02.log'])
        self.assertEqual(open(os.path.join(TMP_DIR, '01.html')).read(), 'omsk1')
        self.assertEqual(open(os.path.join(TMP_DIR, '02.html')).read(), 'omsk2')
Ejemplo n.º 19
0
    def test_log_dir_request_content_headers_and_post(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir, debug=True)
            g.setup(headers={'X-Name': 'spider'}, post={'xxx': 'Post'})

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log'])
            log_file_content = open(os.path.join(tmp_dir, '01.log')).read()
            #if not 'x-name' in log_file_content.lower():
            #    print('CONTENT OF 01.log:')
            #    print(log_file_content)
            self.assertTrue('x-name' in log_file_content.lower())
            self.assertTrue('xxx=post' in log_file_content.lower())
Ejemplo n.º 20
0
    def test_log_dir_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        self.server.response_once['get.data'] = 'omsk1'
        self.server.response['get.data'] = 'omsk2'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(self.server.get_url())
        g.go(self.server.get_url())
        self.assertEqual(sorted(os.listdir(TMP_DIR)),
                         ['01.html', '01.log', '02.html', '02.log'])
        self.assertEqual(
            open(os.path.join(TMP_DIR, '01.html')).read(), 'omsk1')
        self.assertEqual(
            open(os.path.join(TMP_DIR, '02.html')).read(), 'omsk2')
Ejemplo n.º 21
0
    def test_log_dir_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir)
            self.server.response_once['get.data'] = 'omsk1'
            self.server.response['get.data'] = 'omsk2'

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            g.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log', '02.html', '02.log'])
            self.assertEqual(open(os.path.join(tmp_dir, '01.html')).read(),
                             'omsk1')
            self.assertEqual(open(os.path.join(tmp_dir, '02.html')).read(),
                             'omsk2')
Ejemplo n.º 22
0
    def test_log_dir_option(self):
        clear_directory(TMP_DIR)
        reset_request_counter()

        g = build_grab()
        g.setup(log_dir=TMP_DIR)
        self.server.response_once['get.data'] = 'omsk1'
        self.server.response['get.data'] = 'omsk2'

        self.assertEqual(os.listdir(TMP_DIR), [])
        g.go(self.server.get_url())
        g.go(self.server.get_url())
        self.assertEqual(sorted(os.listdir(TMP_DIR)),
                         ['01.html', '01.log', '02.html', '02.log'])
        self.assertEqual(open(os.path.join(TMP_DIR, '01.html')).read(),
                         'omsk1')
        self.assertEqual(open(os.path.join(TMP_DIR, '02.html')).read(),
                         'omsk2')
Ejemplo n.º 23
0
    def test_log_dir_option(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir)
            self.server.response_once['get.data'] = 'omsk1'
            self.server.response['get.data'] = 'omsk2'

            self.assertEqual(os.listdir(tmp_dir), [])
            g.go(self.server.get_url())
            g.go(self.server.get_url())
            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log', '02.html', '02.log'])
            self.assertEqual(
                open(os.path.join(tmp_dir, '01.html')).read(), 'omsk1')
            self.assertEqual(
                open(os.path.join(tmp_dir, '02.html')).read(), 'omsk2')
Ejemplo n.º 24
0
    def test_log_dir_response_network_error(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir, timeout=1, user_agent='Perl', debug=True)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]
            self.server.response['sleep'] = 2

            self.assertEqual(os.listdir(tmp_dir), [])
            try:
                g.go(self.server.get_url())
            except GrabTimeoutError:
                pass

            self.assertEqual(sorted(os.listdir(tmp_dir)),
                             ['01.html', '01.log'])
            log_file_content = open(os.path.join(tmp_dir, '01.log')).read()
            self.assertTrue('user-agent: perl' in log_file_content.lower())
Ejemplo n.º 25
0
    def test_log_dir_response_content_thread(self):
        with temp_dir() as tmp_dir:
            reset_request_counter()

            g = build_grab()
            g.setup(log_dir=tmp_dir)
            self.server.response['get.data'] = 'omsk'
            self.server.response['headers'] = [('X-Engine', 'PHP')]

            self.assertEqual(os.listdir(tmp_dir), [])

            def func():
                g.go(self.server.get_url())

            th = threading.Thread(target=func)
            th.start()
            th.join()

            files = os.listdir(tmp_dir)
            self.assertEqual(2, len([x for x in files if '01-thread' in x]))
            fname = [x for x in files if x.endswith('.log')][0]
            log_file_content = open(os.path.join(tmp_dir, fname)).read()
            self.assertTrue('x-engine' in log_file_content.lower())