Ejemplo n.º 1
0
    def test_a200_robots_txt(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['robots_txt'] = False
        request['url'] = self.httpbin+'/deny'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)

        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 2
0
    def test_a200_robots_txt(self):
        request = copy.deepcopy(self.sample_task_http)
        request["fetch"]["robots_txt"] = False
        request["url"] = self.httpbin + "/deny"
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)

        request["fetch"]["robots_txt"] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 3
0
    def test_a200_robots_txt(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['robots_txt'] = False
        request['url'] = self.httpbin + '/deny'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)

        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 4
0
def run(project):
    task = json.loads(request.form['task'])
    project_info = {
            'name': project,
            'status': 'DEBUG',
            'script': request.form['script'],
            }

    fetch_result = {}
    start_time = time.time()
    try:
        fetch_result = app.config['fetch'](task)
        response = rebuild_response(fetch_result)
        module = build_module(project_info, {
            'debugger': True
            })
        ret = module['instance'].run(module['module'], task, response)
    except Exception, e:
        type, value, tb = sys.exc_info()
        tb = hide_me(tb, globals())
        logs = ''.join(traceback.format_exception(type, value, tb))
        result = {
                'fetch_result': fetch_result,
                'logs': logs,
                'follows': [],
                'messages': [],
                'result': None,
                'time': time.time() - start_time,
                }
Ejemplo n.º 5
0
    def test_e020_too_much_redirect(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/redirect/10'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 599, result)
        self.assertIn('redirects followed', response.error)
Ejemplo n.º 6
0
    def test_20_dataurl_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = 'data:,hello'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.text, 'hello')
Ejemplo n.º 7
0
    def test_40_with_rpc(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = 'data:,hello'
        result = umsgpack.unpackb(self.rpc.fetch(request).data)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.text, 'hello')
Ejemplo n.º 8
0
    def test_a180_max_redirects(self):
        request = copy.deepcopy(self.sample_task_http)
        request["fetch"]["max_redirects"] = 10
        request["url"] = self.httpbin + "/redirect/10"
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
Ejemplo n.º 9
0
    def test_a170_validate_cert(self):
        request = copy.deepcopy(self.sample_task_http)
        request["fetch"]["validate_cert"] = False
        request["url"] = self.httpbin + "/get"
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
Ejemplo n.º 10
0
    def test_a160_cookie(self):
        request = copy.deepcopy(self.sample_task_http)
        request["url"] = self.httpbin + "/cookies/set?k1=v1&k2=v2"
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.cookies, {"a": "b", "k1": "v1", "k2": "v2", "c": "d"}, result)
Ejemplo n.º 11
0
    def test_a150_too_much_redirect(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/redirect/10'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 599, result)
        self.assertIn('redirects followed', response.error)
Ejemplo n.º 12
0
    def test_a170_validate_cert(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['validate_cert'] = False
        request['url'] = self.httpbin+'/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
Ejemplo n.º 13
0
    def test_40_with_rpc(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = 'data:,hello'
        result = umsgpack.unpackb(self.rpc.fetch(request).data)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.text, 'hello')
Ejemplo n.º 14
0
    def test_a160_cookie(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/cookies/set?k1=v1&k2=v2'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.cookies, {'a': 'b', 'k1': 'v1', 'k2': 'v2', 'c': 'd'}, result)
Ejemplo n.º 15
0
    def test_20_dataurl_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = 'data:,hello'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.text, 'hello')
Ejemplo n.º 16
0
    def test_a170_validate_cert(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['validate_cert'] = False
        request['url'] = self.httpbin + '/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
Ejemplo n.º 17
0
    def test_a180_max_redirects(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['max_redirects'] = 10
        request['url'] = self.httpbin+'/redirect/10'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
Ejemplo n.º 18
0
    def test_a180_max_redirects(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['max_redirects'] = 10
        request['url'] = self.httpbin + '/redirect/10'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
Ejemplo n.º 19
0
    def test_a160_cookie(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/cookies/set?k1=v1&k2=v2'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.cookies, {'a': 'b', 'k1': 'v1', 'k2': 'v2', 'c': 'd'}, result)
Ejemplo n.º 20
0
    def test_30_with_queue(self):
        request= copy.deepcopy(self.sample_task_http)
        request['url'] = 'data:,hello'
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.text, 'hello')
Ejemplo n.º 21
0
def run(project):
    task = utils.decode_unicode_obj(json.loads(request.form['task']))
    project_info = {
        'name': project,
        'status': 'DEBUG',
        'script': request.form['script'],
    }

    fetch_result = {}
    start_time = time.time()
    try:
        fetch_result = app.config['fetch'](task)
        response = rebuild_response(fetch_result)
        module = build_module(project_info, {
            'debugger': True
        })
        ret = module['instance'].run(module['module'], task, response)
    except Exception:
        type, value, tb = sys.exc_info()
        tb = utils.hide_me(tb, globals())
        logs = ''.join(traceback.format_exception(type, value, tb))
        result = {
            'fetch_result': fetch_result,
            'logs': logs,
            'follows': [],
            'messages': [],
            'result': None,
            'time': time.time() - start_time,
        }
    else:
        result = {
            'fetch_result': fetch_result,
            'logs': ret.logstr(),
            'follows': ret.follows,
            'messages': ret.messages,
            'result': ret.result,
            'time': time.time() - start_time,
        }
        result['fetch_result']['content'] = response.text

    try:
        # binary data can't encode to JSON, encode result as unicode obj
        # before send it to frontend
        return json.dumps(utils.unicode_obj(result)), 200, {'Content-Type': 'application/json'}
    except Exception:
        type, value, tb = sys.exc_info()
        tb = utils.hide_me(tb, globals())
        logs = ''.join(traceback.format_exception(type, value, tb))
        result = {
            'fetch_result': "",
            'logs': logs,
            'follows': [],
            'messages': [],
            'result': None,
            'time': time.time() - start_time,
        }
        return json.dumps(utils.unicode_obj(result)), 200, {'Content-Type': 'application/json'}
Ejemplo n.º 22
0
 def get(self, url, **kwargs):
     if not url.startswith('http://'):
         url = self.httpbin + url
     request = copy.deepcopy(self.sample_task_http)
     request['url'] = url
     request.update(kwargs)
     result = self.fetcher.fetch(request)
     response = rebuild_response(result)
     return response
Ejemplo n.º 23
0
    def test_65_418(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/status/418'
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 418)
        self.assertIn('teapot', response.text)
Ejemplo n.º 24
0
 def get(self, url, **kwargs):
     if not url.startswith('http://'):
         url = self.httpbin + url
     request = copy.deepcopy(self.sample_task_http)
     request['url'] = url
     request.update(kwargs)
     result = self.fetcher.fetch(request)
     response = rebuild_response(result)
     return response
Ejemplo n.º 25
0
    def test_65_418(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/status/418'
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 418)
        self.assertIn('teapot', response.text)
Ejemplo n.º 26
0
    def test_75_splash_robots(self):
        request = self.sample_task_http
        request['url'] = self.httpbin + '/deny'
        request['fetch']['fetch_type'] = 'splash'
        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 27
0
    def test_a120_http_get_with_proxy_fail(self):
        self.fetcher.proxy = self.proxy
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
        self.fetcher.proxy = None
Ejemplo n.º 28
0
    def test_a120_http_get_with_proxy_fail_1(self):
        self.fetcher.proxy = self.proxy
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
        self.fetcher.proxy = None
Ejemplo n.º 29
0
    def test_30_with_queue(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = 'data:,hello'
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.text, 'hello')
Ejemplo n.º 30
0
    def test_a140_redirect(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/redirect-to?url=/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.url, self.httpbin+'/get')
Ejemplo n.º 31
0
    def test_75_splash_robots(self):
        request = self.sample_task_http
        request['url'] = self.httpbin + '/deny'
        request['fetch']['fetch_type'] = 'splash'
        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 32
0
    def test_e010_redirect(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/redirect-to?url=/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.url, self.httpbin + '/get')
Ejemplo n.º 33
0
    def test_75_phantomjs_robots(self):
        if not self.phantomjs:
            raise unittest.SkipTest('no phantomjs')
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/deny'
        request['fetch']['fetch_type'] = 'phantomjs'
        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 34
0
    def test_75_phantomjs_robots(self):
        if not self.phantomjs:
            raise unittest.SkipTest('no phantomjs')
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/deny'
        request['fetch']['fetch_type'] = 'phantomjs'
        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 403, result)
Ejemplo n.º 35
0
    def test_55_base64_data(self):
        request = copy.deepcopy(self.sample_task_http)
        request["url"] = self.httpbin + "/post"
        request["fetch"]["method"] = "POST"
        # gbk encoding 中文
        request["fetch"]["data"] = "[BASE64-DATA]1tDOxA==[/BASE64-DATA]"
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, response.error)
        self.assertIsNotNone(response.json, response.content)
Ejemplo n.º 36
0
    def test_55_base64_data(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/post'
        request['fetch']['method'] = 'POST'
        # gbk encoding 中文
        request['fetch']['data'] = "[BASE64-DATA]1tDOxA==[/BASE64-DATA]"
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, response.error)
        self.assertIsNotNone(response.json, response.content)
Ejemplo n.º 37
0
    def test_55_base64_data(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/post'
        request['fetch']['method'] = 'POST'
        # gbk encoding 中文
        request['fetch']['data'] = "[BASE64-DATA]1tDOxA==[/BASE64-DATA]"
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, response.error)
        self.assertIsNotNone(response.json, response.content)
Ejemplo n.º 38
0
    def test_10_http_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request["url"] = self.httpbin + "/get"
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request["url"])
        self.assertEqual(response.save, request["fetch"]["save"])
        self.assertIsNotNone(response.json, response.content)
        self.assertEqual(response.json["headers"].get("A"), "b", response.json)
        self.assertIn("c=d", response.json["headers"].get("Cookie"), response.json)
        self.assertIn("a=b", response.json["headers"].get("Cookie"), response.json)
Ejemplo n.º 39
0
    def _run(self, task, response):
        self._reset()
        if isinstance(response, dict):
            response = rebuild_response(response)
        process = task.get('process', {})
        callback = process.get('callback', '__call__')
        if not hasattr(self, callback):
            raise NotImplementedError("self.%s() not implemented!" % callback)

        function = getattr(self, callback)
        if not getattr(function, '_catch_status_code_error', False):
            response.raise_for_status()
        return self._run_func(function, response, task)
Ejemplo n.º 40
0
    def _run(self, task, response):
        self._reset()
        if isinstance(response, dict):
            response = rebuild_response(response)
        process = task.get('process', {})
        callback = process.get('callback', '__call__')
        if not hasattr(self, callback):
            raise NotImplementedError("self.%s() not implemented!" % callback)

        function = getattr(self, callback)
        if not getattr(function, '_catch_status_code_error', False):
            response.raise_for_status()
        return self._run_func(function, response, task)
Ejemplo n.º 41
0
    def test_10_http_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        self.assertIsNotNone(response.json, response.content)
        self.assertEqual(response.json['headers'].get('A'), 'b', response.json)
        self.assertEqual(response.json['headers'].get('Cookie'), 'c=d',
                         response.json)
Ejemplo n.º 42
0
    def test_69_no_splash(self):
        splash_endpoint = self.fetcher.splash_endpoint
        self.fetcher.splash_endpoint = None

        request = self.sample_task_http
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 501, result)

        self.fetcher.splash_endpoint = splash_endpoint
Ejemplo n.º 43
0
    def test_10_http_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        self.assertIsNotNone(response.json, response.content)
        self.assertEqual(response.json['headers'].get('A'), 'b', response.json)
        self.assertIn('c=d', response.json['headers'].get('Cookie'), response.json)
        self.assertIn('a=b', response.json['headers'].get('Cookie'), response.json)
Ejemplo n.º 44
0
    def test_69_no_splash(self):
        splash_endpoint = self.fetcher.splash_endpoint
        self.fetcher.splash_endpoint = None

        request = self.sample_task_http
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 501, result)

        self.fetcher.splash_endpoint = splash_endpoint
Ejemplo n.º 45
0
    def run_task(self, module, task, response):
        """
        处理task,捕捉错误,返回ProcessorResult object
        Processing the task, catching exceptions and logs, return a `ProcessorResult` object
        """
        # TODO module是啥
        self.logger = logger = module.logger
        result = None
        exception = None
        stdout = sys.stdout
        self.task = task
        # deep copy response
        if isinstance(response, dict):
            response = rebuild_response(response)
        self.response = response
        self.save = (task.get('track') or {}).get('save', {})

        try:
            if self.__env__.get('enable_stdout_capture', True):
                # 一个具有writelines等方法的基于list的obj
                sys.stdout = ListO(module.log_buffer)
            self._reset()
            # 运行task
            result = self._run_task(task, response)
            # TODO
            # on_result 用于处理结果
            if inspect.isgenerator(result):
                for r in result:
                    self._run_func(self.on_result, r, response, task)
            else:
                self._run_func(self.on_result, result, response, task)

        except Exception as e:
            logger.exception(e)
            exception = e
        finally:
            follows = self._follows
            messages = self._messages
            logs = list(module.log_buffer)
            extinfo = self._extinfo
            save = self.save

            sys.stdout = stdout
            self.task = None
            self.response = None
            self.save = None

        # 清空buffer
        module.log_buffer[:] = []
        return ProcessorResult(result, follows, messages, logs, exception,
                               extinfo, save)
Ejemplo n.º 46
0
    def test_60_timeout(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/delay/5'
        request['fetch']['timeout'] = 3
        start_time = time.time()
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        end_time = time.time()
        self.assertGreater(end_time - start_time, 1.5)
        self.assertLess(end_time - start_time, 4.5)

        response = rebuild_response(result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
Ejemplo n.º 47
0
    def test_60_timeout(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/delay/5'
        request['fetch']['timeout'] = 3
        start_time = time.time()
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        end_time = time.time()
        self.assertGreater(end_time - start_time, 1.5)
        self.assertLess(end_time - start_time, 4.5)

        response = rebuild_response(result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
Ejemplo n.º 48
0
 def on_task(self, task, response):
     start_time = time.time()
     try:
         response = rebuild_response(response)
         assert 'taskid' in task, 'need taskid in task'
         project = task['project']
         if project not in self.projects:
             raise LookupError("no such project: %s" % project)
         project_data = self.projects[project]
         ret = project_data['instance'].run(
                 project_data['module'], task, response)
     except Exception, e:
         logger.exception(e)
         return False
Ejemplo n.º 49
0
    def test_70_splash_url(self):
        request = self.sample_task_http
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        data = json.loads(response.doc('pre').text())
        self.assertIsNotNone(data, response.content)
        self.assertEqual(data['headers'].get('A'), 'b', response.json)
        self.assertEqual(data['headers'].get('Cookie'), 'c=d', response.json)
Ejemplo n.º 50
0
    def test_70_splash_url(self):
        request = self.sample_task_http
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        data = json.loads(response.doc('pre').text())
        self.assertIsNotNone(data, response.content)
        self.assertEqual(data['headers'].get('A'), 'b', response.json)
        self.assertEqual(data['headers'].get('Cookie'), 'c=d', response.json)
Ejemplo n.º 51
0
 def on_task(self, task, response):
     start_time = time.time()
     try:
         response = rebuild_response(response)
         assert 'taskid' in task, 'need taskid in task'
         project = task['project']
         if project not in self.projects:
             raise LookupError("no such project: %s" % project)
         project_data = self.projects[project]
         ret = project_data['instance'].run(project_data['module'], task,
                                            response)
     except Exception, e:
         logger.exception(e)
         return False
Ejemplo n.º 52
0
    def test_a130_http_get_with_proxy_ok_1(self):
        self.fetcher.proxy = 'http://*****:*****@%s/' % self.proxy
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        self.assertIsNotNone(response.json, response.content)
        self.assertEqual(response.json['headers'].get('A'), 'b', response.json)
        self.assertIn('c=d', response.json['headers'].get('Cookie'), response.json)
        self.assertIn('a=b', response.json['headers'].get('Cookie'), response.json)
        self.fetcher.proxy = None
Ejemplo n.º 53
0
    def test_zzzz_issue375(self):
        phantomjs_proxy = self.fetcher.phantomjs_proxy
        self.fetcher.phantomjs_proxy = '127.0.0.1:20000'

        if not self.phantomjs:
            raise unittest.SkipTest('no phantomjs')
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'phantomjs'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 599, result)

        self.fetcher.phantomjs_proxy = phantomjs_proxy
Ejemplo n.º 54
0
    def test_zzzz_issue375(self):
        phantomjs_proxy = self.fetcher.phantomjs_proxy
        self.fetcher.phantomjs_proxy = '127.0.0.1:20000'

        if not self.phantomjs:
            raise unittest.SkipTest('no phantomjs')
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'phantomjs'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 599, result)

        self.fetcher.phantomjs_proxy = phantomjs_proxy
Ejemplo n.º 55
0
    def test_69_no_phantomjs(self):
        phantomjs_proxy = self.fetcher.phantomjs_proxy
        self.fetcher.phantomjs_proxy = None

        if not self.phantomjs:
            raise unittest.SkipTest('no phantomjs')
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'js'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 501, result)

        self.fetcher.phantomjs_proxy = phantomjs_proxy
Ejemplo n.º 56
0
    def test_70_phantomjs_url(self):
        if not self.phantomjs:
            raise unittest.SkipTest('no phantomjs')
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'js'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        data = json.loads(response.doc('pre').text())
        self.assertIsNotNone(data, response.content)
        self.assertEqual(data['headers'].get('A'), 'b', response.json)
        self.assertEqual(data['headers'].get('Cookie'), 'c=d', response.json)
Ejemplo n.º 57
0
    def test_a130_http_get_with_proxy_ok(self):
        self.fetcher.proxy = 'http://*****:*****@%s/' % self.proxy
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])

        data = json.loads(response.doc('pre').text())
        self.assertEqual(data['headers'].get('A'), 'b', response.content)
        self.assertIn('c=d', data['headers'].get('Cookie'), response.content)
        self.assertIn('a=b', data['headers'].get('Cookie'), response.content)
        self.fetcher.proxy = None
Ejemplo n.º 58
0
    def test_15_http_post(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin + '/post'
        request['fetch']['method'] = 'POST'
        request['fetch']['data'] = 'binux'
        request['fetch']['cookies'] = {'c': 'd'}
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        self.assertIsNotNone(response.json, response.content)

        self.assertEqual(response.json['form'].get('binux'), '')
        self.assertEqual(response.json['headers'].get('A'), 'b', response.json)
        self.assertEqual(response.json['headers'].get('Cookie'), 'c=d',
                         response.json)
Ejemplo n.º 59
0
    def run_task(self, module, task, response):
        """
        Processing the task, catching exceptions and logs, return a `ProcessorResult` object
        """
        logger = module.logger
        result = None
        exception = None
        stdout = sys.stdout
        self.task = task
        if isinstance(response, dict):
            response = rebuild_response(response)
        self.response = response
        self.save = (task.get('track') or {}).get('save', {})

        try:
            if self.__env__.get('enable_stdout_capture', True):
                sys.stdout = ListO(module.log_buffer)
            self._reset()
            #执行脚本
            result = self._run_task(task, response)
            if inspect.isgenerator(result):
                for r in result:
                    self._run_func(self.on_result, r, response, task)
            else:
                #调用on_result
                self._run_func(self.on_result, result, response, task)
        except Exception as e:
            logger.exception(e)
            exception = e
        finally:
            follows = self._follows
            messages = self._messages
            logs = list(module.log_buffer)
            extinfo = self._extinfo
            save = self.save

            sys.stdout = stdout
            self.task = None
            self.response = None
            self.save = None

        module.log_buffer[:] = []
        return ProcessorResult(result, follows, messages, logs, exception, extinfo, save)