Example #1
0
 def test_update_deletes_datasets(self, mock):
     mock.action.package_list.return_value = {'success': True, 'result': [u"tst-a", u"tst-b"]}
     datasets = crawler.fetch_dataset_list()
     mock.action.package_list.assert_called_once_with()
     mock.action.package_list.return_value = {'success': True, 'result': [u"tst-a"]}
     datasets = crawler.fetch_dataset_list()
     self.assertEquals(1, datasets.count())
Example #2
0
    def test_fetch_package_search_update(self, mock):
        #initial call to fetch dataset list sets up 3 datasets inside the
        # datastore
        mock.action.package_list.return_value = [
            u"tst-deleted", u"tst-not-modified", 'tst-modified'
        ]
        crawler.fetch_dataset_list()

        # the second call, we are giving a time delta, this time the registry
        # has 3 modified datasets, 1 deleted, 1 modified, 1 new
        mock.action.package_search.return_value = {
            'count': 3,
            'results': [
                {'name': 'tst-deleted', 'state': 'deleted'},
                {'name': 'tst-modified', 'state': 'active'},
                {'name': 'tst-new', 'state': 'active'},
            ]
        }
        date = datetime.date(2000, 1, 2)
        datasets = set([ds.name for ds in crawler.fetch_dataset_list(date)])

        # we want to check that the result returned are only the modified/new 
        # datasets as these are the ones that are sent to the job queues
        # normally a call without a time delta results in all the datasets
        # being sent to the job queues
        self.assertEquals(set(['tst-modified', 'tst-new']), datasets)

        # finally we check over all the datasets to make sure nothing has
        # happened to the not modified dataset, and that tst-deleted was 
        # actually deleted.
        all_datasets = set([i.name for i in Dataset.query.all()])
        self.assertEquals(
            set(['tst-not-modified', 'tst-modified', 'tst-new']),
            all_datasets,
        )
Example #3
0
 def test_update_adds_datasets(self, mock):
     mock.action.package_list.return_value = [u"tst-a"]
     datasets = crawler.fetch_dataset_list()
     mock.action.package_list.assert_called_once_with()
     mock.action.package_list.return_value = [u"tst-a", u"tst-b"]
     datasets = crawler.fetch_dataset_list()
     self.assertEquals(2, datasets.count())
 def test_update_deletes_datasets(self, iatikit_mock):
     data_mock = iatikit_mock.return_value
     data_mock.last_updated = datetime.datetime.utcnow()
     data_mock.datasets = [
         iatikit.Dataset("tst-a.xml"),
         iatikit.Dataset("tst-b.xml")
     ]
     datasets = crawler.fetch_dataset_list()
     self.assertEquals(2, datasets.count())
     data_mock.datasets = [iatikit.Dataset("tst-a.xml")]
     datasets = crawler.fetch_dataset_list()
     self.assertEquals(1, datasets.count())
Example #5
0
 def test_update_deletes_datasets(self, mock):
     mock.action.package_list.return_value = {
         'success': True,
         'result': [u"tst-a", u"tst-b"]
     }
     datasets = crawler.fetch_dataset_list()
     mock.action.package_list.assert_called_once_with()
     mock.action.package_list.return_value = {
         'success': True,
         'result': [u"tst-a"]
     }
     datasets = crawler.fetch_dataset_list()
     self.assertEquals(1, len(datasets))
Example #6
0
    def test_fetch_package_search_update(self, mock):
        #initial call to fetch dataset list sets up 3 datasets inside the
        # datastore
        mock.action.package_list.return_value = {
            'success': True,
            'result': [u"tst-deleted", u"tst-not-modified", 'tst-modified'],
        }
        crawler.fetch_dataset_list()

        # the second call, we are giving a time delta, this time the registry
        # has 3 modified datasets, 1 deleted, 1 modified, 1 new
        mock.action.package_search.return_value = {
            'success': True,
            'result': {
                'count':
                3,
                'results': [
                    {
                        'name': 'tst-deleted',
                        'state': 'deleted'
                    },
                    {
                        'name': 'tst-modified',
                        'state': 'active'
                    },
                    {
                        'name': 'tst-new',
                        'state': 'active'
                    },
                ]
            }
        }
        date = datetime.date(2000, 1, 2)
        datasets = set([ds.name for ds in crawler.fetch_dataset_list(date)])

        # we want to check that the result returned are only the modified/new
        # datasets as these are the ones that are sent to the job queues
        # normally a call without a time delta results in all the datasets
        # being sent to the job queues
        self.assertEquals(set(['tst-modified', 'tst-new']), datasets)

        # finally we check over all the datasets to make sure nothing has
        # happened to the not modified dataset, and that tst-deleted was
        # actually deleted.
        all_datasets = set([i.name for i in Dataset.query.all()])
        self.assertEquals(
            set(['tst-not-modified', 'tst-modified', 'tst-new']),
            all_datasets,
        )
Example #7
0
    def test_fetch_package_search(self, mock):
        mock.action.package_search.return_value = {
            'success': True,
            'result': {
                'count':
                2,
                'results': [
                    {
                        'name': 'tst-a',
                        'state': 'active'
                    },
                    {
                        'name': 'tst-b',
                        'state': 'active'
                    },
                ]
            }
        }
        date = datetime.date(2000, 1, 2)
        datasets = crawler.fetch_dataset_list(date)
        # check the solr parameters are formed correctly
        mock.action.package_search.assert_any_call(
            fq='metadata_modified:[2000-01-02T00:00:00Z TO NOW]')

        # check that the result of of fetch_dataset_list are only the 2 datasets
        self.assertEquals(set(['tst-a', 'tst-b']),
                          set([ds.name for ds in datasets]))
Example #8
0
 def test_fetch_package_list(self, mock):
     mock.action.package_list.return_value = {
         'success': True,
         'result': [u"tst-a", u"tst-b"]
     }
     datasets = crawler.fetch_dataset_list()
     mock.action.package_list.assert_called_once_with()
     self.assertIn("tst-a", [ds.name for ds in datasets])
     self.assertIn("tst-b", [ds.name for ds in datasets])
 def test_fetch_package_list(self, iatikit_mock):
     data_mock = iatikit_mock.return_value
     data_mock.last_updated = datetime.datetime.utcnow()
     data_mock.datasets = [
         iatikit.Dataset("tst-a.xml"),
         iatikit.Dataset("tst-b.xml")
     ]
     datasets = crawler.fetch_dataset_list()
     self.assertIn("tst-a", [ds.name for ds in datasets])
     self.assertIn("tst-b", [ds.name for ds in datasets])
Example #10
0
    def test_fetch_package_search(self, mock):
        mock.action.package_search.return_value = {
            'count': 2,
            'results': [
                {'name': 'tst-a', 'state': 'active'},
                {'name': 'tst-b', 'state': 'active'},
            ]
        }
        date = datetime.date(2000, 1, 2)
        datasets = crawler.fetch_dataset_list(date)
        # check the solr parameters are formed correctly
        mock.action.package_search.assert_any_call(
            fq='metadata_modified:[2000-01-02T00:00:00Z TO NOW]')

        # check that the result of of fetch_dataset_list are only the 2 datasets
        self.assertEquals(set(['tst-a', 'tst-b']), set([ds.name for ds in datasets]))
 def test_deleted_activities(self, iatikit_mock):
     fac.DatasetFactory.create(
         name='deleteme',
         resources=[
             fac.ResourceFactory.create(
                 url="http://yes",
                 activities=[
                     fac.ActivityFactory.build(
                         iati_identifier="deleted_activity",
                         title="orig",
                     )
                 ])
         ])
     self.assertIn("deleteme", [ds.name for ds in Dataset.query.all()])
     iatikit_mock.return_value = registry
     datasets = crawler.fetch_dataset_list()
     self.assertNotIn("deleteme", [ds.name for ds in datasets])
     self.assertIn(
         "deleted_activity",
         [da.iati_identifier for da in DeletedActivity.query.all()])
Example #12
0
 def test_deleted_activities(self, mock):
     fac.DatasetFactory.create(
         name='deleteme',
         resources=[ fac.ResourceFactory.create(
             url=u"http://yes",
             activities=[
                 fac.ActivityFactory.build(
                     iati_identifier=u"deleted_activity",
                     title=u"orig",
                 )
             ]
         )]
     )
     mock.action.package_list.return_value = [u"tst-a", u"tst-b"]
     self.assertIn("deleteme", [ds.name for ds in Dataset.query.all()])
     datasets = crawler.fetch_dataset_list()
     self.assertNotIn("deleteme", [ds.name for ds in datasets])
     self.assertIn(
         "deleted_activity",
         [da.iati_identifier for da in DeletedActivity.query.all()]
     )
Example #13
0
 def test_deleted_activities(self, mock):
     fac.DatasetFactory.create(
         name='deleteme',
         resources=[
             fac.ResourceFactory.create(
                 url=u"http://yes",
                 activities=[
                     fac.ActivityFactory.build(
                         iati_identifier=u"deleted_activity",
                         title=u"orig",
                     )
                 ])
         ])
     mock.action.package_list.return_value = {
         'success': True,
         'result': [u"tst-a", u"tst-b"]
     }
     self.assertIn("deleteme", [ds.name for ds in Dataset.query.all()])
     datasets = crawler.fetch_dataset_list()
     self.assertNotIn("deleteme", [ds.name for ds in datasets])
     self.assertIn(
         "deleted_activity",
         [da.iati_identifier for da in DeletedActivity.query.all()])
Example #14
0
 def test_update_adds_datasets(self, mock):
     mock.return_value = [u"tst-a"]
     datasets = crawler.fetch_dataset_list()
     mock.return_value = [u"tst-a", u"tst-b"]
     datasets = crawler.fetch_dataset_list()
     self.assertEquals(2, len(datasets))
Example #15
0
 def test_fetch_package_list(self, mock):
     mock.return_value = [u"tst-a", u"tst-b"]
     datasets = crawler.fetch_dataset_list()
     self.assertIn("tst-a", [ds.name for ds in datasets])
     self.assertIn("tst-b", [ds.name for ds in datasets])
Example #16
0
 def test_fetch_package_list(self, mock):
     mock.action.package_list.return_value = {'success': True, 'result': [u"tst-a", u"tst-b"]}
     datasets = crawler.fetch_dataset_list()
     mock.action.package_list.assert_called_once_with()
     self.assertIn("tst-a", [ds.name for ds in datasets])
     self.assertIn("tst-b", [ds.name for ds in datasets])