def test_handle_task(self, get_preps, collect): task = Task.create(self.job_id, self.target) options = {'javascript': True, 'extract': True} fake_preps = ['prep1'] get_preps.return_value = fake_preps collect_result = {'meta': 'so meta'} collect.return_value = collect_result mock_settings = {'artexin.out_dir': '/test/out'} handler = FetchableHandler() with mock_bottle_config('artexinweb.settings.BOTTLE_CONFIG', mock_settings): result = handler.handle_task(task, options) assert result == collect_result get_preps.assert_called_once_with(task.target) out_dir = mock_settings['artexin.out_dir'] collect.assert_called_once_with(task.target, prep=fake_preps, base_dir=out_dir, javascript=True, do_extract=True, meta={})
def test_mark_queued(self): task = Task.create(self.job_id, self.task_target) # tasks are queued by default, so make it failed task.mark_failed("error") assert task.is_queued is False task.mark_queued() assert task.is_queued is True
def test_mark_finished(self): task = Task.create(self.job_id, self.task_target) task.notes = 'test' task.save() assert task.is_finished is False task.mark_finished() assert task.is_finished is True assert task.notes == ''
def test_process_task_invalid_target(self, mark_failed, handle_task): task = Task.create(self.job_id, self.targets[0]) handler = BaseJobHandler() with mock.patch.object(handler, 'is_valid_target', return_value=False): handler.process_task(task, {}) assert mark_failed.call_count == 1 assert not handle_task.called
def test_handle_task_result_failure(self, mark_failed, mark_finished): task = Task.create(self.job_id, self.target) result = {'error': 'something went wrong'} options = {} handler = FetchableHandler() handler.handle_task_result(task, result, options) assert not mark_finished.called assert mark_failed.call_count == 1
def test_handle_task_override_title(self, extract_target, read_title, count_images, create_zipball, shutil_rmtree): options = {'origin': self.origin, 'meta': {'title': 'overridden'}} task = Task.create(self.job_id, self.target) handler = StandaloneHandler() expected_meta = { 'title': 'overridden', 'images': 4, 'url': self.origin, 'domain': urllib.parse.urlparse(self.origin).netloc } extract_target.return_value = self.temp_dir read_title.return_value = 'title actually found' count_images.return_value = expected_meta['images'] def mocked_create_zipball(*args, **kwargs): return kwargs['meta'] create_zipball.side_effect = mocked_create_zipball mock_settings = {'artexin.out_dir': '/test/out'} with mock_bottle_config('artexinweb.settings.BOTTLE_CONFIG', mock_settings): result = handler.handle_task(task, options) for call_arg in create_zipball.call_args: if isinstance(call_arg, dict): assert call_arg['src_dir'] == self.temp_dir assert call_arg['out_dir'] == mock_settings['artexin.out_dir'] for key, value in expected_meta.items(): assert call_arg['meta'][key] == value assert isinstance(call_arg['meta']['timestamp'], datetime.datetime) extract_target.assert_called_once_with(task.target) assert read_title.called is False count_images.assert_called_once_with(self.temp_dir) shutil_rmtree.assert_called_once_with(self.temp_dir) assert len(result) == len(expected_meta) + 1 for key, value in expected_meta.items(): assert result[key] == value assert isinstance(result['timestamp'], datetime.datetime)
def test_process_task_handle_task_failure(self, mark_failed, handle_task, handle_task_result): task = Task.create(self.job_id, self.targets[0]) options = {} handle_task.side_effect = Exception() handler = BaseJobHandler() with mock.patch.object(handler, 'is_valid_target', return_value=True): handler.process_task(task, options) handle_task.assert_called_once_with(task, options) assert not handle_task_result.called assert mark_failed.call_count == 1
def test_is_valid_task(self): task = Task.create(self.job_id, self.targets[0]) handler = BaseJobHandler() # initial status is queued assert handler.is_valid_task(task) is True task.mark_processing() assert handler.is_valid_task(task) is True task.mark_failed("failed") assert handler.is_valid_task(task) is True task.mark_finished() assert handler.is_valid_task(task) is False
def test_process_task_success(self, mark_failed, handle_task, handle_task_result): task = Task.create(self.job_id, self.targets[0]) options = {} task_result = {'result': 'OK'} handle_task.return_value = task_result handler = BaseJobHandler() with mock.patch.object(handler, 'is_valid_target', return_value=True): handler.process_task(task, options) handle_task.assert_called_once_with(task, options) handle_task_result.assert_called_once_with(task, task_result, options) assert not mark_failed.called
def test_handle_task_result(self, mark_finished): task = Task.create(self.job_id, self.temp_dir) result = { 'size': 1234, 'hash': 'a' * 32, 'title': 'page title', 'images': 12, 'timestamp': datetime.datetime.utcnow() } handler = StandaloneHandler() handler.handle_task_result(task, result, {}) mark_finished.assert_called_once_with() assert task.size == result['size'] assert task.md5 == result['hash'] assert task.title == result['title'] assert task.images == result['images'] assert task.timestamp == result['timestamp']
def test_handle_task_result_success(self, mark_failed, mark_finished): task = Task.create(self.job_id, self.target) result = { 'size': 1024, 'hash': self.job_id, 'title': 'Target title', 'images': 3, 'timestamp': datetime.datetime.utcnow() } options = {} handler = FetchableHandler() handler.handle_task_result(task, result, options) assert not mark_failed.called mark_finished.assert_called_once_with() assert task.size == result['size'] assert task.md5 == result['hash'] assert task.title == result['title'] assert task.images == result['images'] assert task.timestamp == result['timestamp']
def test_mark_failed(self): task = Task.create(self.job_id, self.task_target) assert task.is_failed is False task.mark_failed("error") assert task.is_failed is True
def test_mark_processing(self): task = Task.create(self.job_id, self.task_target) assert task.is_processing is False task.mark_processing() assert task.is_processing is True
def test_create(self): task = Task.create(self.job_id, self.task_target) assert task.job_id == self.job_id assert task.target == self.task_target assert task.status == Task.QUEUED