Beispiel #1
0
    def test_count_availability_reset(self, mock_fn):
        self.resource.extras = {'check:status': 200, 'check:available': True,
                                'check:date': datetime.now(),
                                'check:count-availability': 2}
        check_res = {'check:status': 200, 'check:available': False,
                     'check:date': datetime.now()}

        class DummyLinkchecker:
            def check(self, _):
                return check_res
        mock_fn.return_value = DummyLinkchecker

        check_resource(self.resource)
        self.assertEqual(self.resource.extras['check:count-availability'], 1)
Beispiel #2
0
    def test_count_availability_reset(self, mock_fn):
        self.resource.extras = {'check:status': 200, 'check:available': True,
                                'check:date': datetime.now(),
                                'check:count-availability': 2}
        check_res = {'check:status': 200, 'check:available': False,
                     'check:date': datetime.now()}

        class DummyLinkchecker:
            def check(self, _):
                return check_res
        mock_fn.return_value = DummyLinkchecker

        check_resource(self.resource)
        self.assertEquals(self.resource.extras['check:count-availability'], 1)
Beispiel #3
0
 def test_check_resource_ignored_domain(self):
     self.resource.extras = {}
     self.resource.url = 'http://example-ignore.com/url'
     self.resource.save()
     res = check_resource(self.resource)
     self.assertEquals(res.get('check:status'), 204)
     self.assertEquals(res.get('check:available'), True)
Beispiel #4
0
 def test_check_resource_linkchecker_check_error(self, mock_fn):
     class DummyLinkchecker:
         def check(self, _):
             return {'check:error': 'ERROR'}
     mock_fn.return_value = DummyLinkchecker
     res = check_resource(self.resource)
     self.assertEquals(res, ({'error': 'ERROR'}, 500))
Beispiel #5
0
 def test_check_resource_ignored_domain(self):
     self.resource.extras = {}
     self.resource.url = 'http://example-ignore.com/url'
     self.resource.save()
     res = check_resource(self.resource)
     self.assertEqual(res.get('check:status'), 204)
     self.assertEqual(res.get('check:available'), True)
Beispiel #6
0
 def test_check_resource_linkchecker_check_error(self, mock_fn):
     class DummyLinkchecker:
         def check(self, _):
             return {'check:error': 'ERROR'}
     mock_fn.return_value = DummyLinkchecker
     res = check_resource(self.resource)
     self.assertEqual(res, ({'error': 'ERROR'}, 500))
Beispiel #7
0
def test_check_resource_creates_no_activity(activity_app, mocker):
    resource = ResourceFactory()
    dataset = DatasetFactory(resources=[resource])
    user = UserFactory()
    login_user(user)
    check_res = {'check:status': 200, 'check:available': True,
                 'check:date': datetime.now()}

    class DummyLinkchecker:
        def check(self, _):
            return check_res
    mocker.patch('udata.linkchecker.checker.get_linkchecker',
                 return_value=DummyLinkchecker)

    check_resource(resource)

    activities = Activity.objects.filter(related_to=dataset)
    assert len(activities) == 0
Beispiel #8
0
 def test_check_resource_linkchecker_no_status(self, mock_fn):
     class DummyLinkchecker:
         def check(self, _):
             return {'check:available': True}
     mock_fn.return_value = DummyLinkchecker
     res = check_resource(self.resource)
     self.assertEquals(res,
                       ({'error': 'No status in response from linkchecker'},
                        503))
Beispiel #9
0
def test_check_resource_creates_no_activity(activity_app, mocker):
    resource = ResourceFactory()
    dataset = DatasetFactory(resources=[resource])
    user = UserFactory()
    login_user(user)
    check_res = {'check:status': 200, 'check:available': True,
                 'check:date': datetime.now()}

    class DummyLinkchecker:
        def check(self, _):
            return check_res
    mocker.patch('udata.linkchecker.checker.get_linkchecker',
                 return_value=DummyLinkchecker)

    check_resource(resource)

    activities = Activity.objects.filter(related_to=dataset)
    assert len(activities) == 0
Beispiel #10
0
 def test_check_resource_linkchecker_no_status(self, mock_fn):
     class DummyLinkchecker:
         def check(self, _):
             return {'check:available': True}
     mock_fn.return_value = DummyLinkchecker
     res = check_resource(self.resource)
     self.assertEqual(res,
                       ({'error': 'No status in response from linkchecker'},
                        503))
Beispiel #11
0
    def test_check_resource_filter_result(self, mock_fn):
        check_res = {'check:status': 200, 'dummy': 'dummy'}

        class DummyLinkchecker:
            def check(self, _):
                return check_res
        mock_fn.return_value = DummyLinkchecker

        res = check_resource(self.resource)
        self.assertEquals(res, check_res)
        self.assertNotIn('dummy', self.resource.extras)
Beispiel #12
0
    def test_check_resource_filter_result(self, mock_fn):
        check_res = {'check:status': 200, 'dummy': 'dummy'}

        class DummyLinkchecker:
            def check(self, _):
                return check_res
        mock_fn.return_value = DummyLinkchecker

        res = check_resource(self.resource)
        self.assertEqual(res, check_res)
        self.assertNotIn('dummy', self.resource.extras)
Beispiel #13
0
    def test_check_resource_linkchecker_ok(self, mock_fn):
        check_res = {'check:status': 200, 'check:available': True,
                     'check:date': datetime.now()}

        class DummyLinkchecker:
            def check(self, _):
                return check_res
        mock_fn.return_value = DummyLinkchecker

        res = check_resource(self.resource)
        self.assertEquals(res, check_res)
        check_res.update({'check:count-availability': 1})
        self.assertEquals(self.resource.extras, check_res)
Beispiel #14
0
    def test_check_resource_linkchecker_ok(self, mock_fn):
        check_res = {'check:status': 200, 'check:available': True,
                     'check:date': datetime.now()}

        class DummyLinkchecker:
            def check(self, _):
                return check_res
        mock_fn.return_value = DummyLinkchecker

        res = check_resource(self.resource)
        self.assertEqual(res, check_res)
        check_res.update({'check:count-availability': 1})
        self.assertEqual(self.resource.extras, check_res)
Beispiel #15
0
    def test_unvalid_cache(self, mock_fn):
        self.resource.extras = {
            'check:date': datetime.now() - timedelta(seconds=3600),
            'check:status': 42
        }

        check_res = {
            'check:status': 200,
            'check:available': True,
            'check:date': datetime.now()
        }

        class DummyLinkchecker:
            def check(self, _):
                return check_res

        mock_fn.return_value = DummyLinkchecker

        res = check_resource(self.resource)
        # we get the result from DummyLinkchecker and not from cache
        self.assertEquals(res, check_res)
Beispiel #16
0
 def get(self, dataset, rid):
     '''Checks that a resource's URL exists and returns metadata.'''
     resource = self.get_resource_or_404(dataset, rid)
     return check_resource(resource)
Beispiel #17
0
 def get(self, dataset, rid):
     '''Checks that a resource's URL exists and returns metadata.'''
     resource = self.get_resource_or_404(dataset, rid)
     return check_resource(resource)
Beispiel #18
0
 def test_check_resource_linkchecker_no_check(self):
     self.resource.extras['check:checker'] = 'no_check'
     self.resource.save()
     res = check_resource(self.resource)
     self.assertEquals(res.get('check:status'), 204)
     self.assertEquals(res.get('check:available'), True)
Beispiel #19
0
 def test_check_resource_linkchecker_in_resource(self, mock_fn):
     self.resource.extras['check:checker'] = 'another_linkchecker'
     self.resource.save()
     check_resource(self.resource)
     args, kwargs = mock_fn.call_args
     self.assertEquals(args, ('another_linkchecker', ))
Beispiel #20
0
 def test_check_resource_no_linkchecker(self, mock_fn):
     mock_fn.return_value = None
     res = check_resource(self.resource)
     self.assertEquals(res, ({'error': 'No linkchecker configured.'}, 503))
Beispiel #21
0
 def test_check_resource_no_linkchecker(self, mock_fn):
     mock_fn.return_value = None
     res = check_resource(self.resource)
     self.assertEqual(res, ({'error': 'No linkchecker configured.'}, 503))
Beispiel #22
0
def check_resources(self, number=5000):
    '''Check <number> of URLs that have not been (recently) checked'''

    if not current_app.config.get('LINKCHECKING_ENABLED'):
        log.error('Link checking is disabled.')
        return

    base_pipeline = [
        {
            '$match': {
                'resources': {
                    '$gt': []
                }
            }
        },
        {
            '$project': {
                'resources._id': True,
                'resources.extras.check:date': True
            }
        },
        {
            '$unwind': '$resources'
        },
    ]
    # unchecked resources
    pipeline = base_pipeline + [{
        '$match': {
            'resources.extras.check:date': {
                '$eq': None
            }
        }
    }, {
        '$limit': number
    }]
    resources = list(Dataset.objects.aggregate(*pipeline))
    # not recently checked resources
    slots_left = number - len(resources)
    if slots_left:
        pipeline = base_pipeline + [{
            '$match': {
                'resources.extras.check:date': {
                    '$ne': None
                }
            }
        }, {
            '$sort': {
                'resources.extras.check:date': 1
            }
        }, {
            '$limit': slots_left
        }]
        resources += list(Dataset.objects.aggregate(*pipeline))

    nb_resources = len(resources)
    log.info('Checking %s resources...', nb_resources)

    resource_check_result = []
    for idx, dataset_resource in enumerate(resources):
        dataset_obj = Dataset.objects.get(id=dataset_resource['_id'])
        resource_id = dataset_resource['resources']['_id']
        rid = uuid.UUID(resource_id)
        resource_obj = get_by(dataset_obj.resources, 'id', rid)
        log.info('Checking resource %s (%s/%s)', resource_id, idx + 1,
                 nb_resources)
        if resource_obj.need_check():
            result = check_resource(resource_obj)
            #log.info(resource_obj.url)
            #log.info(result)
            if not result['check:available']:
                data = {
                    'dataset': dataset_obj,
                    'resource': resource_obj,
                    'status': result['check:status']
                }
                resource_check_result.append(data)
        else:
            log.info("--> Skipping this resource, cache is fresh enough.")

    #Group resources by dataset and send email
    if resource_check_result:
        resource_check_result.sort(key=lambda item: item['dataset'].id)
        resource_groups = groupby(resource_check_result,
                                  lambda item: item['dataset'])

        admin_role = Role.objects.filter(name='admin').first()
        recipients = [
            user.email for user in User.objects.filter(roles=admin_role).all()
        ]

        subject = 'Relatório de verificação de links do dados.gov.'
        context = {
            'subject': subject,
            'resources': resource_groups,
            'server': current_app.config.get('SERVER_NAME')
        }

        msg = Message(subject=subject,
                      sender='*****@*****.**',
                      recipients=recipients)
        #msg.body = theme.render('mail/link_checker_warning.txt', **context)
        msg.html = theme.render('mail/link_checker_warning.html', **context)

        mail = current_app.extensions.get('mail')
        try:
            mail.send(msg)
        except:
            pass

    log.info('Done.')
Beispiel #23
0
 def test_check_resource_linkchecker_in_resource(self, mock_fn):
     self.resource.extras['check:checker'] = 'another_linkchecker'
     self.resource.save()
     check_resource(self.resource)
     args, kwargs = mock_fn.call_args
     self.assertEqual(args, ('another_linkchecker', ))
Beispiel #24
0
 def test_check_resource_linkchecker_no_check(self):
     self.resource.extras['check:checker'] = 'no_check'
     self.resource.save()
     res = check_resource(self.resource)
     self.assertEqual(res.get('check:status'), 204)
     self.assertEqual(res.get('check:available'), True)