def setUpClass(self): self.inqueue = Queue(10) self.outqueue = Queue(10) self.fetcher = Fetcher(self.inqueue, self.outqueue) self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % 24444) self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444) self.thread = utils.run_in_thread(self.fetcher.run)
def run_fetcher(g=g): from fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor) fetcher.phantomjs_proxy = g.phantomjs_proxy run_in_thread(fetcher.xmlrpc_run, port=g.fetcher_xmlrpc_port, bind=g.webui_host) fetcher.run()
class TestTaskDB(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': 'http://httpbin.org/get', 'fetch': { 'method': 'GET', 'headers': { 'Cookie': 'a=b', 'a': 'b' }, 'timeout': 60, 'save': 'abc', }, 'process': { 'callback': 'callback', 'save': [1, 2, 3], }, } def setUp(self): self.fetcher = Fetcher(None, None) self.thread = utils.run_in_thread(self.fetcher.run) def tearDown(self): self.fetcher.quit() self.thread.join() def test_http_get(self): result = self.fetcher.sync_fetch(self.sample_task_http) self.assertEqual(result['status_code'], 200) self.assertEqual(result['orig_url'], self.sample_task_http['url']) self.assertEqual(result['save'], self.sample_task_http['fetch']['save']) self.assertIn('content', result) content = json.loads(result['content']) self.assertIn('headers', content) self.assertIn('A', content['headers']) self.assertIn('Cookie', content['headers']) self.assertEqual(content['headers']['Cookie'], 'a=b') def test_dataurl_get(self): data = dict(self.sample_task_http) data['url'] = 'data:,hello' result = self.fetcher.sync_fetch(data) self.assertEqual(result['status_code'], 200) self.assertIn('content', result) self.assertEqual(result['content'], 'hello')
def setUpClass(self): self.inqueue = Queue(10) self.outqueue = Queue(10) self.fetcher = Fetcher(self.inqueue, self.outqueue) self.rpc = xmlrpclib.ServerProxy("http://localhost:%d" % 24444) self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444) self.thread = utils.run_in_thread(self.fetcher.run)
class TestTaskDB(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': 'http://httpbin.org/get', 'fetch': { 'method': 'GET', 'headers': { 'Cookie': 'a=b', 'a': 'b' }, 'timeout': 60, 'save': 'abc', }, 'process': { 'callback': 'callback', 'save': [1, 2, 3], }, } def setUp(self): self.fetcher = Fetcher(None, None) self.thread = utils.run_in_thread(self.fetcher.run) def tearDown(self): self.fetcher.quit() self.thread.join() def test_http_get(self): result = self.fetcher.sync_fetch(self.sample_task_http) self.assertEqual(result['status_code'], 200) self.assertEqual(result['orig_url'], self.sample_task_http['url']) self.assertEqual(result['save'], self.sample_task_http['fetch']['save']) self.assertIn('content', result) content = json.loads(result['content']) self.assertIn('headers', content) self.assertIn('A', content['headers']) self.assertIn('Cookie', content['headers']) self.assertEqual(content['headers']['Cookie'], 'a=b') def test_dataurl_get(self): data = dict(self.sample_task_http) data['url'] = 'data:,hello'; result = self.fetcher.sync_fetch(data) self.assertEqual(result['status_code'], 200) self.assertIn('content', result) self.assertEqual(result['content'], 'hello')
class TestTaskDB(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': 'http://httpbin.org/get', 'fetch': { 'method': 'GET', 'headers': { 'Cookie': 'a=b', 'a': 'b' }, 'data': 'a=b&c=d', 'timeout': 60, }, 'process': { 'callback': 'callback', 'save': [1, 2, 3], }, } def setUp(self): self.fetcher = Fetcher(None, None) self.thread = threading.Thread(target=self.fetcher.run) self.thread.daemon = True self.thread.start() def tearDown(self): self.fetcher.quit() self.thread.join() def test_http_get(self): result = self.fetcher.sync_fetch(self.sample_task_http) self.assertEqual(result['status_code'], 200) self.assertEqual(result['orig_url'], self.sample_task_http['url']) self.assertIn('content', result) content = json.loads(result['content']) self.assertIn('headers', content) self.assertIn('A', content['headers']) self.assertIn('Cookie', content['headers']) self.assertEqual(content['headers']['Cookie'], 'a=b')
def run_webui(g=g): import cPickle as pickle from fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy from webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['fetch'] = lambda x: fetcher.fetch(x)[1] app.config['scheduler_rpc'] = g.scheduler_rpc #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/' if g.demo_mode: app.config['max_rate'] = 0.2 app.config['max_burst'] = 3.0 if 'WEBUI_USERNAME' in os.environ: app.config['webui_username'] = os.environ['WEBUI_USERNAME'] app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '') if not getattr(g, 'all_in_one', False): app.debug = g.debug app.run(host=g.webui_host, port=g.webui_port)
def run_fetcher(): from fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=scheduler2fetcher, outqueue=fetcher2processor) run_in_thread(fetcher.xmlrpc_run, port=fetcher_xmlrpc_port) fetcher.run()
def setUp(self): self.fetcher = Fetcher(None, None) self.thread = utils.run_in_thread(self.fetcher.run)
class TestFetcher(unittest.TestCase): sample_task_http = { "taskid": "taskid", "project": "project", "url": "http://echo.opera.com/", "fetch": {"method": "GET", "headers": {"Cookie": "a=b", "a": "b"}, "timeout": 60, "save": "abc"}, "process": {"callback": "callback", "save": [1, 2, 3]}, } @classmethod def setUpClass(self): self.inqueue = Queue(10) self.outqueue = Queue(10) self.fetcher = Fetcher(self.inqueue, self.outqueue) self.rpc = xmlrpclib.ServerProxy("http://localhost:%d" % 24444) self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444) self.thread = utils.run_in_thread(self.fetcher.run) @classmethod def tearDownClass(self): self.rpc._quit() self.thread.join() def test_10_http_get(self): result = self.fetcher.sync_fetch(self.sample_task_http) self.assertEqual(result["status_code"], 200) self.assertEqual(result["orig_url"], self.sample_task_http["url"]) self.assertEqual(result["save"], self.sample_task_http["fetch"]["save"]) self.assertIn("content", result) content = result["content"] self.assertIn("..A:", content) self.assertIn("..Cookie:", content) self.assertIn("a=b", content) def test_10_http_post(self): request = dict(self.sample_task_http) request["fetch"]["method"] = "POST" request["fetch"]["data"] = "binux" request["fetch"]["cookies"] = {"c": "d"} result = self.fetcher.sync_fetch(request) self.assertEqual(result["status_code"], 200) self.assertEqual(result["orig_url"], self.sample_task_http["url"]) self.assertEqual(result["save"], self.sample_task_http["fetch"]["save"]) self.assertIn("content", result) content = result["content"] self.assertIn("<h2>POST", content) self.assertIn("..A:", content) self.assertIn("..Cookie:", content) # FIXME: cookies in headers not supported self.assertNotIn("a=b", content) self.assertIn("c=d", content) self.assertIn("binux", content) def test_20_dataurl_get(self): data = dict(self.sample_task_http) data["url"] = "data:,hello" result = self.fetcher.sync_fetch(data) self.assertEqual(result["status_code"], 200) self.assertIn("content", result) self.assertEqual(result["content"], "hello") def test_30_with_queue(self): data = dict(self.sample_task_http) data["url"] = "data:,hello" self.inqueue.put(data) task, result = self.outqueue.get() self.assertEqual(result["status_code"], 200) self.assertIn("content", result) self.assertEqual(result["content"], "hello") def test_40_with_rpc(self): data = dict(self.sample_task_http) data["url"] = "data:,hello" result = pickle.loads(self.rpc.fetch(data).data) self.assertEqual(result["status_code"], 200) self.assertIn("content", result) self.assertEqual(result["content"], "hello")
def setUp(self): self.fetcher = Fetcher(None, None) self.thread = threading.Thread(target=self.fetcher.run) self.thread.daemon = True self.thread.start()
class TestFetcher(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': 'http://echo.opera.com/', 'fetch': { 'method': 'GET', 'headers': { 'Cookie': 'a=b', 'a': 'b' }, 'timeout': 60, 'save': 'abc', }, 'process': { 'callback': 'callback', 'save': [1, 2, 3], }, } @classmethod def setUpClass(self): self.inqueue = Queue(10) self.outqueue = Queue(10) self.fetcher = Fetcher(self.inqueue, self.outqueue) self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % 24444) self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444) self.thread = utils.run_in_thread(self.fetcher.run) @classmethod def tearDownClass(self): self.rpc._quit() self.thread.join() def test_10_http_get(self): result = self.fetcher.sync_fetch(self.sample_task_http) self.assertEqual(result['status_code'], 200) self.assertEqual(result['orig_url'], self.sample_task_http['url']) self.assertEqual(result['save'], self.sample_task_http['fetch']['save']) self.assertIn('content', result) content = result['content'] self.assertIn('..A:', content) self.assertIn('..Cookie:', content) self.assertIn('a=b', content) def test_10_http_post(self): request = dict(self.sample_task_http) request['fetch']['method'] = 'POST' request['fetch']['data'] = 'binux' request['fetch']['cookies'] = {'c': 'd'} result = self.fetcher.sync_fetch(request) self.assertEqual(result['status_code'], 200) self.assertEqual(result['orig_url'], self.sample_task_http['url']) self.assertEqual(result['save'], self.sample_task_http['fetch']['save']) self.assertIn('content', result) content = result['content'] self.assertIn('<h2>POST', content) self.assertIn('..A:', content) self.assertIn('..Cookie:', content) # FIXME: cookies in headers not supported self.assertNotIn('a=b', content) self.assertIn('c=d', content) self.assertIn('binux', content) def test_20_dataurl_get(self): data = dict(self.sample_task_http) data['url'] = 'data:,hello'; result = self.fetcher.sync_fetch(data) self.assertEqual(result['status_code'], 200) self.assertIn('content', result) self.assertEqual(result['content'], 'hello') def test_30_with_queue(self): data = dict(self.sample_task_http) data['url'] = 'data:,hello'; self.inqueue.put(data) task, result = self.outqueue.get() self.assertEqual(result['status_code'], 200) self.assertIn('content', result) self.assertEqual(result['content'], 'hello') def test_40_with_rpc(self): data = dict(self.sample_task_http) data['url'] = 'data:,hello'; result = pickle.loads(self.rpc.fetch(data).data) self.assertEqual(result['status_code'], 200) self.assertIn('content', result) self.assertEqual(result['content'], 'hello')