def test_count_availability_reset(self, mock_fn): self.resource.extras = {'check:status': 200, 'check:available': True, 'check:date': datetime.now(), 'check:count-availability': 2} check_res = {'check:status': 200, 'check:available': False, 'check:date': datetime.now()} class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker check_resource(self.resource) self.assertEqual(self.resource.extras['check:count-availability'], 1)
def test_count_availability_reset(self, mock_fn): self.resource.extras = {'check:status': 200, 'check:available': True, 'check:date': datetime.now(), 'check:count-availability': 2} check_res = {'check:status': 200, 'check:available': False, 'check:date': datetime.now()} class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker check_resource(self.resource) self.assertEquals(self.resource.extras['check:count-availability'], 1)
def test_check_resource_ignored_domain(self): self.resource.extras = {} self.resource.url = 'http://example-ignore.com/url' self.resource.save() res = check_resource(self.resource) self.assertEquals(res.get('check:status'), 204) self.assertEquals(res.get('check:available'), True)
def test_check_resource_linkchecker_check_error(self, mock_fn): class DummyLinkchecker: def check(self, _): return {'check:error': 'ERROR'} mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEquals(res, ({'error': 'ERROR'}, 500))
def test_check_resource_ignored_domain(self): self.resource.extras = {} self.resource.url = 'http://example-ignore.com/url' self.resource.save() res = check_resource(self.resource) self.assertEqual(res.get('check:status'), 204) self.assertEqual(res.get('check:available'), True)
def test_check_resource_linkchecker_check_error(self, mock_fn): class DummyLinkchecker: def check(self, _): return {'check:error': 'ERROR'} mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEqual(res, ({'error': 'ERROR'}, 500))
def test_check_resource_creates_no_activity(activity_app, mocker): resource = ResourceFactory() dataset = DatasetFactory(resources=[resource]) user = UserFactory() login_user(user) check_res = {'check:status': 200, 'check:available': True, 'check:date': datetime.now()} class DummyLinkchecker: def check(self, _): return check_res mocker.patch('udata.linkchecker.checker.get_linkchecker', return_value=DummyLinkchecker) check_resource(resource) activities = Activity.objects.filter(related_to=dataset) assert len(activities) == 0
def test_check_resource_linkchecker_no_status(self, mock_fn): class DummyLinkchecker: def check(self, _): return {'check:available': True} mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEquals(res, ({'error': 'No status in response from linkchecker'}, 503))
def test_check_resource_linkchecker_no_status(self, mock_fn): class DummyLinkchecker: def check(self, _): return {'check:available': True} mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEqual(res, ({'error': 'No status in response from linkchecker'}, 503))
def test_check_resource_filter_result(self, mock_fn): check_res = {'check:status': 200, 'dummy': 'dummy'} class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEquals(res, check_res) self.assertNotIn('dummy', self.resource.extras)
def test_check_resource_filter_result(self, mock_fn): check_res = {'check:status': 200, 'dummy': 'dummy'} class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEqual(res, check_res) self.assertNotIn('dummy', self.resource.extras)
def test_check_resource_linkchecker_ok(self, mock_fn): check_res = {'check:status': 200, 'check:available': True, 'check:date': datetime.now()} class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEquals(res, check_res) check_res.update({'check:count-availability': 1}) self.assertEquals(self.resource.extras, check_res)
def test_check_resource_linkchecker_ok(self, mock_fn): check_res = {'check:status': 200, 'check:available': True, 'check:date': datetime.now()} class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) self.assertEqual(res, check_res) check_res.update({'check:count-availability': 1}) self.assertEqual(self.resource.extras, check_res)
def test_unvalid_cache(self, mock_fn): self.resource.extras = { 'check:date': datetime.now() - timedelta(seconds=3600), 'check:status': 42 } check_res = { 'check:status': 200, 'check:available': True, 'check:date': datetime.now() } class DummyLinkchecker: def check(self, _): return check_res mock_fn.return_value = DummyLinkchecker res = check_resource(self.resource) # we get the result from DummyLinkchecker and not from cache self.assertEquals(res, check_res)
def get(self, dataset, rid): '''Checks that a resource's URL exists and returns metadata.''' resource = self.get_resource_or_404(dataset, rid) return check_resource(resource)
def test_check_resource_linkchecker_no_check(self): self.resource.extras['check:checker'] = 'no_check' self.resource.save() res = check_resource(self.resource) self.assertEquals(res.get('check:status'), 204) self.assertEquals(res.get('check:available'), True)
def test_check_resource_linkchecker_in_resource(self, mock_fn): self.resource.extras['check:checker'] = 'another_linkchecker' self.resource.save() check_resource(self.resource) args, kwargs = mock_fn.call_args self.assertEquals(args, ('another_linkchecker', ))
def test_check_resource_no_linkchecker(self, mock_fn): mock_fn.return_value = None res = check_resource(self.resource) self.assertEquals(res, ({'error': 'No linkchecker configured.'}, 503))
def test_check_resource_no_linkchecker(self, mock_fn): mock_fn.return_value = None res = check_resource(self.resource) self.assertEqual(res, ({'error': 'No linkchecker configured.'}, 503))
def check_resources(self, number=5000): '''Check <number> of URLs that have not been (recently) checked''' if not current_app.config.get('LINKCHECKING_ENABLED'): log.error('Link checking is disabled.') return base_pipeline = [ { '$match': { 'resources': { '$gt': [] } } }, { '$project': { 'resources._id': True, 'resources.extras.check:date': True } }, { '$unwind': '$resources' }, ] # unchecked resources pipeline = base_pipeline + [{ '$match': { 'resources.extras.check:date': { '$eq': None } } }, { '$limit': number }] resources = list(Dataset.objects.aggregate(*pipeline)) # not recently checked resources slots_left = number - len(resources) if slots_left: pipeline = base_pipeline + [{ '$match': { 'resources.extras.check:date': { '$ne': None } } }, { '$sort': { 'resources.extras.check:date': 1 } }, { '$limit': slots_left }] resources += list(Dataset.objects.aggregate(*pipeline)) nb_resources = len(resources) log.info('Checking %s resources...', nb_resources) resource_check_result = [] for idx, dataset_resource in enumerate(resources): dataset_obj = Dataset.objects.get(id=dataset_resource['_id']) resource_id = dataset_resource['resources']['_id'] rid = uuid.UUID(resource_id) resource_obj = get_by(dataset_obj.resources, 'id', rid) log.info('Checking resource %s (%s/%s)', resource_id, idx + 1, nb_resources) if resource_obj.need_check(): result = check_resource(resource_obj) #log.info(resource_obj.url) #log.info(result) if not result['check:available']: data = { 'dataset': dataset_obj, 'resource': resource_obj, 'status': result['check:status'] } resource_check_result.append(data) else: log.info("--> Skipping this resource, cache is fresh enough.") #Group resources by dataset and send email if resource_check_result: resource_check_result.sort(key=lambda item: item['dataset'].id) resource_groups = groupby(resource_check_result, lambda item: item['dataset']) admin_role = Role.objects.filter(name='admin').first() recipients = [ user.email for user in User.objects.filter(roles=admin_role).all() ] subject = 'Relatório de verificação de links do dados.gov.' context = { 'subject': subject, 'resources': resource_groups, 'server': current_app.config.get('SERVER_NAME') } msg = Message(subject=subject, sender='*****@*****.**', recipients=recipients) #msg.body = theme.render('mail/link_checker_warning.txt', **context) msg.html = theme.render('mail/link_checker_warning.html', **context) mail = current_app.extensions.get('mail') try: mail.send(msg) except: pass log.info('Done.')
def test_check_resource_linkchecker_in_resource(self, mock_fn): self.resource.extras['check:checker'] = 'another_linkchecker' self.resource.save() check_resource(self.resource) args, kwargs = mock_fn.call_args self.assertEqual(args, ('another_linkchecker', ))
def test_check_resource_linkchecker_no_check(self): self.resource.extras['check:checker'] = 'no_check' self.resource.save() res = check_resource(self.resource) self.assertEqual(res.get('check:status'), 204) self.assertEqual(res.get('check:available'), True)