class TestProcessor(unittest.TestCase): projectdb_path = './data/tests/project.db' @classmethod def setUpClass(self): shutil.rmtree('./data/tests/', ignore_errors=True) os.makedirs('./data/tests/') def get_projectdb(): return projectdb.ProjectDB(self.projectdb_path) self.projectdb = get_projectdb() self.in_queue = Queue(10) self.status_queue = Queue(10) self.newtask_queue = Queue(10) self.result_queue = Queue(10) def run_processor(): self.processor = Processor(get_projectdb(), self.in_queue, self.status_queue, self.newtask_queue, self.result_queue) self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 0.1 self.processor.run() self.process = run_in_thread(run_processor) time.sleep(1) @classmethod def tearDownClass(self): if self.process.is_alive(): self.processor.quit() self.process.join(2) assert not self.process.is_alive() shutil.rmtree('./data/tests/', ignore_errors=True) def test_10_update_project(self): self.assertIsNone(self.processor.project_manager.get('test_project')) self.projectdb.insert('test_project', { 'name': 'test_project', 'group': 'group', 'status': 'TODO', 'script': inspect.getsource(sample_handler), 'comments': 'test project', 'rate': 1.0, 'burst': 10, }) self.assertIsNone(self.processor.project_manager.get('not_exists')) self.assertIsNotNone(self.processor.project_manager.get('test_project')) task = { "process": { "callback": "on_start" }, "project": "not_exists", "taskid": "data:,on_start", "url": "data:,on_start" } self.in_queue.put((task, {})) time.sleep(1) self.assertFalse(self.status_queue.empty()) while not self.status_queue.empty(): status = self.status_queue.get() self.assertEqual(status['track']['process']['ok'], False) self.assertIsNone(self.processor.project_manager.get('not_exists')) def test_20_broken_project(self): self.assertIsNone(self.processor.project_manager.get('test_broken_project')) self.projectdb.insert('test_broken_project', { 'name': 'test_broken_project', 'group': 'group', 'status': 'DEBUG', 'script': inspect.getsource(sample_handler)[:10], 'comments': 'test project', 'rate': 1.0, 'burst': 10, }) self.assertIsNone(self.processor.project_manager.get('not_exists')) self.assertIsNotNone(self.processor.project_manager.get('test_broken_project')) project_data = self.processor.project_manager.get('test_broken_project') self.assertIsNotNone(project_data.get('exception')) def test_30_new_task(self): self.assertTrue(self.status_queue.empty()) self.assertTrue(self.newtask_queue.empty()) task = { "process": { "callback": "on_start" }, "project": "test_project", "taskid": "data:,on_start", "url": "data:,on_start" } fetch_result = { "orig_url": "data:,on_start", "content": "on_start", "headers": {}, "status_code": 200, "url": "data:,on_start", "time": 0, } self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) while not self.status_queue.empty(): self.status_queue.get() self.assertFalse(self.newtask_queue.empty()) def test_40_index_page(self): task = None while not self.newtask_queue.empty(): task = self.newtask_queue.get()[0] self.assertIsNotNone(task) fetch_result = { "orig_url": task['url'], "content": ( "<html><body>" "<a href='http://binux.me'>binux</a>" "<a href='http://binux.me/中文'>binux</a>" "<a href='http://binux.me/1'>1</a>" "<a href='http://binux.me/1'>2</a>" "</body></html>" ), "headers": {'a': 'b', 'etag': 'tag'}, "status_code": 200, "url": task['url'], "time": 0, } self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) self.assertFalse(self.newtask_queue.empty()) status = self.status_queue.get() self.assertEqual(status['track']['fetch']['ok'], True) self.assertEqual(status['track']['fetch']['time'], 0) self.assertEqual(status['track']['fetch']['status_code'], 200) self.assertEqual('tag', status['track']['fetch']['headers']['etag']) self.assertIsNone(status['track']['fetch']['content']) self.assertEqual(status['track']['process']['ok'], True) self.assertGreater(status['track']['process']['time'], 0) self.assertEqual(status['track']['process']['follows'], 3) self.assertIsNone(status['track']['process']['result']) self.assertEqual(status['track']['process']['logs'], '') self.assertIsNone(status['track']['process']['exception']) tasks = self.newtask_queue.get() self.assertEqual(len(tasks), 3) self.assertEqual(tasks[0]['url'], 'http://binux.me/') self.assertTrue(tasks[1]['url'].startswith('http://binux.me/%'), task['url']) def test_50_fetch_error(self): # clear new task queue while not self.newtask_queue.empty(): self.newtask_queue.get() # clear status queue while not self.status_queue.empty(): self.status_queue.get() task = { "process": { "callback": "index_page" }, "project": "test_project", "taskid": "data:,test_fetch_error", "url": "data:,test_fetch_error" } fetch_result = { "orig_url": task['url'], "content": "test_fetch_error", "error": "test_fetch_error", "headers": {'a': 'b', 'last-modified': '123'}, "status_code": 598, "url": task['url'], "time": 0, } self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) self.assertTrue(self.newtask_queue.empty()) status = self.status_queue.get() self.assertEqual(status['track']['fetch']['ok'], False) self.assertEqual(status['track']['fetch']['time'], 0) self.assertEqual(status['track']['fetch']['status_code'], 598) self.assertEqual('123', status['track']['fetch']['headers']['last-modified']) self.assertIsNotNone(status['track']['fetch']['content']) self.assertEqual(status['track']['process']['ok'], False) self.assertGreater(status['track']['process']['time'], 0) self.assertEqual(status['track']['process']['follows'], 0) self.assertIsNone(status['track']['process']['result']) self.assertGreater(len(status['track']['process']['logs']), 0) self.assertIsNotNone(status['track']['process']['exception']) def test_60_call_broken_project(self): # clear new task queue while not self.newtask_queue.empty(): self.newtask_queue.get() # clear status queue while not self.status_queue.empty(): self.status_queue.get() task = { "process": { "callback": "on_start" }, "project": "test_broken_project", "taskid": "data:,on_start", "url": "data:,on_start", } fetch_result = { "orig_url": "data:,on_start", "content": "on_start", "headers": {}, "status_code": 200, "url": "data:,on_start", "time": 0, } self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) while not self.status_queue.empty(): status = self.status_queue.get() self.assertEqual(status['track']['fetch']['ok'], True) self.assertEqual(status['track']['process']['ok'], False) self.assertGreater(len(status['track']['process']['logs']), 0) self.assertIsNotNone(status['track']['process']['exception']) self.assertTrue(self.newtask_queue.empty()) def test_70_update_project(self): self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 1000000 self.processor.project_manager._check_projects() self.assertIsNotNone(self.processor.project_manager.get('test_broken_project')) # clear new task queue while not self.newtask_queue.empty(): self.newtask_queue.get() # clear status queue while not self.status_queue.empty(): self.status_queue.get() task = { "process": { "callback": "on_start" }, "project": "test_broken_project", "taskid": "data:,on_start", "url": "data:,on_start" } fetch_result = { "orig_url": "data:,on_start", "content": "on_start", "headers": {}, "status_code": 200, "url": "data:,on_start", "time": 0, } self.projectdb.update('test_broken_project', { 'script': inspect.getsource(sample_handler), }) # not update self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) while not self.status_queue.empty(): status = self.status_queue.get() self.assertEqual(status['track']['fetch']['ok'], True) self.assertEqual(status['track']['process']['ok'], False) # updated task['project_updatetime'] = time.time() self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) while not self.status_queue.empty(): status = self.status_queue.get() self.assertEqual(status['track']['fetch']['ok'], True) self.assertEqual(status['track']['process']['ok'], True) self.projectdb.update('test_broken_project', { 'script': inspect.getsource(sample_handler)[:10], }) # update with md5 task['project_md5sum'] = 'testmd5' del task['project_updatetime'] self.in_queue.put((task, fetch_result)) time.sleep(1) self.assertFalse(self.status_queue.empty()) while not self.status_queue.empty(): status = self.status_queue.get() self.assertEqual(status['track']['fetch']['ok'], True) self.assertEqual(status['track']['process']['ok'], False) self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 0.1 @unittest.skipIf(six.PY3, "deprecated feature, not work for PY3") def test_80_import_project(self): self.projectdb.insert('test_project2', { 'name': 'test_project', 'group': 'group', 'status': 'TODO', 'script': inspect.getsource(sample_handler), 'comments': 'test project', 'rate': 1.0, 'burst': 10, }) self.projectdb.insert('test_project3', { 'name': 'test_project', 'group': 'group', 'status': 'TODO', 'script': inspect.getsource(sample_handler), 'comments': 'test project', 'rate': 1.0, 'burst': 10, }) from projects import test_project self.assertIsNotNone(test_project) self.assertIsNotNone(test_project.Handler) from projects.test_project2 import Handler self.assertIsNotNone(Handler) import projects.test_project3 self.assertIsNotNone(projects.test_project3.Handler)