Exemple #1
0
    def test_catalog_modified_date(self):

        dataset1 = factories.Dataset(title='First dataset')
        time.sleep(1)
        dataset2 = factories.Dataset(title='Second dataset')

        url = url_for('dcat_catalog',
                      _format='ttl',
                      modified_since=dataset2['metadata_modified'])

        app = self._get_test_app()

        response = app.get(url)

        content = response.body

        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        eq_(dcat_datasets[0]['title'], dataset2['title'])
Exemple #2
0
    def test_catalog_fq_filter(self, app):
        dataset1 = factories.Dataset(
            title='First dataset',
            tags=[
                {'name': 'economy'},
                {'name': 'statistics'}
            ]
        )
        dataset2 = factories.Dataset(
            title='Second dataset',
            tags=[{'name': 'economy'}]
        )
        dataset3 = factories.Dataset(
            title='Third dataset',
            tags=[{'name': 'statistics'}]
        )

        url = url_for('dcat.read_catalog',
                      _format='ttl',
                      fq='tags:economy')


        response = app.get(url)
        content = response.body
        p = RDFParser()
        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]
        assert len(dcat_datasets) == 2
        assert dcat_datasets[0]['title'] in [dataset1['title'], dataset2['title']]
        assert dcat_datasets[1]['title'] in [dataset1['title'], dataset2['title']]
 def _create_datasets(self):
     """ create the required datasets to test """
     log.info('Creating datasets for testing collections')
     # create just one time
     if hasattr(self, 'org1'):
         return 
     reset_db  # TODO it's seems not working
     self.org1 = ckan_factories.Organization()
     log.info('Org1 created {}'.format(self.org1['id']))
     self.org2 = ckan_factories.Organization()
     log.info('Org2 created {}'.format(self.org2['id']))
     self.group1 = ckan_factories.Group()
     log.info('Group1 created {}'.format(self.group1['id']))
     self.group2 = ckan_factories.Group()
     log.info('Group2 created {}'.format(self.group2['id']))
     
     self.parent = ckan_factories.Dataset(owner_org=self.org1['id'],
                                          extras=[{'key': 'collection_metadata', 'value': 'true'}],
                                          title='The Father test_collections_unique',
                                          groups=[{'name': self.group1['name']}, {'name': self.group2['name']}])
     log.info('Parent created {}'.format(self.parent['id']))
     self.child1 = ckan_factories.Dataset(owner_org=self.org1['id'],
                                          extras=[{'key': 'collection_package_id', 'value': self.parent['id']}],
                                          title='The Child 2 test_collections_unique',
                                          groups=[{'name': self.group1['name']}])
     log.info('Child 1 created {}'.format(self.child1['id']))
     self.child2 = ckan_factories.Dataset(owner_org=self.org1['id'],
                                          extras=[{'key': 'collection_package_id', 'value': self.parent['id']}],
                                          title='The Child 2 test_collections_unique',
                                          groups=[{'name': self.group2['name']}])
     log.info('Child 2 created {}'.format(self.child2['id']))
Exemple #4
0
 def test_update_dataset_but_with_same_name(self):
     # this can happen if you remove a trailing space from the title - the
     # harvester sees the title changed and thinks it should have a new
     # name, but clearly it can reuse its existing one
     factories.Dataset(name='trees')
     factories.Dataset(name='trees1')
     assert _ensure_name_is_unique('trees',
                                   existing_name='trees') == 'trees'
Exemple #5
0
    def test_harvest_sources_job_history_clear(self):
        # prepare
        data_dict = SOURCE_DICT.copy()
        source_1 = factories.HarvestSourceObj(**data_dict)
        data_dict['name'] = 'another-source'
        data_dict['url'] = 'http://another-url'
        source_2 = factories.HarvestSourceObj(**data_dict)

        job_1 = factories.HarvestJobObj(source=source_1)
        dataset_1 = ckan_factories.Dataset()
        object_1_ = factories.HarvestObjectObj(job=job_1,
                                               source=source_1,
                                               package_id=dataset_1['id'])
        job_2 = factories.HarvestJobObj(source=source_2)
        dataset_2 = ckan_factories.Dataset()
        object_2_ = factories.HarvestObjectObj(job=job_2,
                                               source=source_2,
                                               package_id=dataset_2['id'])

        # execute
        context = {
            'model': model,
            'session': model.Session,
            'ignore_auth': True,
            'user': ''
        }
        result = toolkit.get_action('harvest_sources_job_history_clear')(
            context, {})

        # verify
        assert_equal(sorted(result),
                     sorted([{
                         'id': source_1.id
                     }, {
                         'id': source_2.id
                     }]))
        source_1 = harvest_model.HarvestSource.get(source_1.id)
        assert source_1
        assert_equal(harvest_model.HarvestJob.get(job_1.id), None)
        assert_equal(harvest_model.HarvestObject.get(object_1_.id), None)
        dataset_from_db_1 = model.Package.get(dataset_1['id'])
        assert dataset_from_db_1, 'is None'
        assert_equal(dataset_from_db_1.id, dataset_1['id'])
        source_2 = harvest_model.HarvestSource.get(source_1.id)
        assert source_2
        assert_equal(harvest_model.HarvestJob.get(job_2.id), None)
        assert_equal(harvest_model.HarvestObject.get(object_2_.id), None)
        dataset_from_db_2 = model.Package.get(dataset_2['id'])
        assert dataset_from_db_2, 'is None'
        assert_equal(dataset_from_db_2.id, dataset_2['id'])
Exemple #6
0
    def test_catalog_q_search(self, app):

        dataset1 = factories.Dataset(title='First dataset')
        factories.Dataset(title='Second dataset')

        url = url_for('dcat.read_catalog', _format='ttl', q='First')

        response = app.get(url)
        content = response.body
        p = RDFParser()
        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]
        assert len(dcat_datasets) == 1
        assert dcat_datasets[0]['title'] == dataset1['title']
Exemple #7
0
    def test_dataset_ttl(self, app):

        dataset = factories.Dataset(
            notes='Test dataset'
        )

        url = url_for('dcat.read_dataset', _id=dataset['name'], _format='ttl')

        response = app.get(url)

        assert response.headers['Content-Type'] == 'text/turtle'

        content = response.body

        # Parse the contents to check it's an actual serialization
        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        assert len(dcat_datasets) == 1

        dcat_dataset = dcat_datasets[0]

        assert dcat_dataset['title'] == dataset['title']
        assert dcat_dataset['notes'] == dataset['notes']
Exemple #8
0
    def test_catalog_pagination_parameters(self, app):

        for i in range(12):
            factories.Dataset()

        url = url_for('dcat.read_catalog',
                      _format='rdf',
                      modified_since='2018-03-22',
                      extra_param='test')

        response = app.get(url)

        content = response.body

        g = Graph()
        g.parse(data=content, format='xml')

        pagination = [o
                      for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0]

        assert self._object_value(g, pagination, HYDRA.itemsPerPage) == '10'

        assert (_sort_query_params(
            self._object_value(g, pagination,
                               HYDRA.firstPage)) == _sort_query_params(
                                   url_for('dcat.read_catalog',
                                           _format='rdf',
                                           page=1,
                                           _external=True,
                                           modified_since='2018-03-22')))
Exemple #9
0
    def test_harvest_source_job_history_clear(self):
        # prepare
        source = factories.HarvestSourceObj(**SOURCE_DICT.copy())
        job = factories.HarvestJobObj(source=source)
        dataset = ckan_factories.Dataset()
        object_ = factories.HarvestObjectObj(job=job,
                                             source=source,
                                             package_id=dataset['id'])

        # execute
        context = {
            'model': model,
            'session': model.Session,
            'ignore_auth': True,
            'user': ''
        }
        result = toolkit.get_action('harvest_source_job_history_clear')(
            context, {
                'id': source.id
            })

        # verify
        assert_equal(result, {'id': source.id})
        source = harvest_model.HarvestSource.get(source.id)
        assert source
        assert_equal(harvest_model.HarvestJob.get(job.id), None)
        assert_equal(harvest_model.HarvestObject.get(object_.id), None)
        dataset_from_db = model.Package.get(dataset['id'])
        assert dataset_from_db, 'is None'
        assert_equal(dataset_from_db.id, dataset['id'])
Exemple #10
0
    def test_dataset_ttl(self):

        dataset = factories.Dataset(
            notes='Test dataset'
        )

        url = url_for('dcat_dataset', _id=dataset['name'], _format='ttl')

        app = self._get_test_app()

        response = app.get(url)

        eq_(response.headers['Content-Type'], 'text/turtle')

        content = response.body

        # Parse the contents to check it's an actual serialization
        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        dcat_dataset = dcat_datasets[0]

        eq_(dcat_dataset['title'], dataset['title'])
        eq_(dcat_dataset['notes'], dataset['notes'])
Exemple #11
0
    def test_catalog_pagination(self):

        for i in xrange(12):
            factories.Dataset()

        app = self._get_test_app()

        url = url_for('dcat_catalog', _format='rdf')

        response = app.get(url)

        content = response.body

        g = Graph()
        g.parse(data=content, format='xml')

        eq_(len([d for d in g.subjects(RDF.type, DCAT.Dataset)]), 10)

        pagination = [o for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0]

        eq_(self._object_value(g, pagination, HYDRA.totalItems), '12')

        eq_(self._object_value(g, pagination, HYDRA.itemsPerPage), '10')

        eq_(self._object_value(g, pagination, HYDRA.firstPage),
            url_for('dcat_catalog', _format='rdf', page=1, host='test.ckan.net'))

        eq_(self._object_value(g, pagination, HYDRA.nextPage),
            url_for('dcat_catalog', _format='rdf', page=2, host='test.ckan.net'))

        eq_(self._object_value(g, pagination, HYDRA.lastPage),
            url_for('dcat_catalog', _format='rdf', page=2, host='test.ckan.net'))
Exemple #12
0
    def test_catalog_pagination(self, app):

        for i in range(12):
            factories.Dataset()

        url = url_for('dcat.read_catalog', _format='rdf')

        response = app.get(url)

        content = response.body

        g = Graph()
        g.parse(data=content, format='xml')

        assert len([d for d in g.subjects(RDF.type, DCAT.Dataset)]) == 10

        pagination = [o for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0]

        assert self._object_value(g, pagination, HYDRA.totalItems) == '12'

        assert self._object_value(g, pagination, HYDRA.itemsPerPage) == '10'

        assert (_sort_query_params(self._object_value(g, pagination, HYDRA.firstPage)) ==
            _sort_query_params(url_for('dcat.read_catalog', _format='rdf', page=1, _external=True)))

        assert (_sort_query_params(self._object_value(g, pagination, HYDRA.nextPage)) ==
            _sort_query_params(url_for('dcat.read_catalog', _format='rdf', page=2, _external=True)))

        assert (_sort_query_params(self._object_value(g, pagination, HYDRA.lastPage)) ==
            _sort_query_params(url_for('dcat.read_catalog', _format='rdf', page=2, _external=True)))
    def test_catalog_pagination_parameters(self):

        for i in range(12):
            factories.Dataset()

        app = self._get_test_app()

        url = url_for('dcat_catalog',
                      _format='rdf',
                      modified_since='2018-03-22',
                      extra_param='test')

        response = app.get(url)

        content = response.body

        g = Graph()
        g.parse(data=content, format='xml')

        pagination = [o
                      for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0]

        eq_(self._object_value(g, pagination, HYDRA.itemsPerPage), '10')

        eq_(
            self._object_value(g, pagination, HYDRA.firstPage),
            url_for('dcat_catalog',
                    _format='rdf',
                    page=1,
                    host='test.ckan.net',
                    modified_since='2018-03-22'))
    def test_catalog_q_search(self):

        dataset1 = factories.Dataset(title='First dataset')
        dataset2 = factories.Dataset(title='Second dataset')

        url = url_for('dcat_catalog', _format='ttl', q='First')

        app = self._get_test_app()
        response = app.get(url)
        content = response.body
        p = RDFParser()
        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]
        eq_(len(dcat_datasets), 1)
        eq_(dcat_datasets[0]['title'], dataset1['title'])
Exemple #15
0
 def test_config_override(self):
     '''Tests if a parameter has precedence over a config value.'''
     factories.Dataset(name='trees')
     assert_equal(
         HarvesterBase._gen_new_name('Trees',
                                     append_type='number-sequence'),
         'trees1')
Exemple #16
0
 def test_update_dataset_to_available_shorter_name(self):
     # this can be handy when if reharvesting, you got duplicates and
     # managed to purge one set and through a minor title change you can now
     # lose the appended number. users don't like unnecessary numbers.
     factories.Dataset(name='trees1')
     assert _ensure_name_is_unique('trees',
                                   existing_name='trees1') == 'trees'
 def test_dataset_endpoint_disabled(self):
     p.unload('dcat')
     p.load('dcat')
     dataset = factories.Dataset(notes='Test dataset')
     # without the route, url_for returns the given parameters
     url = url_for('dcat_dataset', _id=dataset['name'], _format='xml')
     assert not url.startswith('/')
     assert url.startswith('dcat_dataset')
Exemple #18
0
    def test_structured_data_not_generated(self, app):

        dataset = factories.Dataset(notes='test description')

        url = url_for('dataset.read', id=dataset['name'])

        response = app.get(url)
        assert '<script type="application/ld+json">' not in response.body
    def _create_packages_and_tracking(self):

        self.package = ckan_factories.Dataset()
        # add 12 visit to the dataset page
        url = url_for(controller='package', action='read',id=self.package['name'])
        app = self._get_test_app()
        for r in range(12):
            self._post_to_tracking(url=url, app=app, ip='199.200.100.{}'.format(r))
Exemple #20
0
    def test_dataset_no_header_returns_html(self, app):

        dataset = factories.Dataset()

        url = url_for('dataset.read', id=dataset['name'])

        response = app.get(url)

        assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
Exemple #21
0
    def test_selected_group_with_no_groups(self):
        dataset = factories.Dataset(tag_string='geography',
                                    accessLevel='public',
                                    contact_name='John Smith',
                                    contact_email='*****@*****.**',
                                    rights='No restrictions on public use',
                                    accrualPeriodicity='R/P1W')

        assert get_selected_group(dataset) == ''
    def test_datagovtheme_html_loads(self, app):

        notes = 'Notes for a test dataset'
        name = 'random_test' + str(int(time.time()))
        dataset = factories.Dataset(notes=notes, name=name)

        dataset_response = app.get('/dataset/{}'.format(dataset['name']))

        assert '<div itemprop="description" class="notes embedded-content">' in dataset_response.body
        assert notes in dataset_response.body
Exemple #23
0
    def test_package_create_without_any_group_modifier(self):
        dataset = factories.Dataset(name='test-dataset-1',
                                    tag_string='geography',
                                    accessLevel='public',
                                    contact_name='John Smith',
                                    contact_email='*****@*****.**',
                                    rights='No restrictions on public use',
                                    accrualPeriodicity='R/P1W')

        assert dataset['groups'] == []
Exemple #24
0
    def test_dataset_no_header_returns_html(self):

        dataset = factories.Dataset()

        url = url_for('dataset_read', id=dataset['name'])

        app = self._get_test_app()

        response = app.get(url)

        eq_(response.headers['Content-Type'], 'text/html; charset=utf-8')
Exemple #25
0
    def test_dataset_profiles_not_found(self, app):

        dataset = factories.Dataset(
            notes='Test dataset'
        )

        url = url_for('dcat.read_dataset', _id=dataset['name'], _format='jsonld', profiles='nope')

        response = app.get(url, status=409)

        assert 'Unknown RDF profiles: nope' in response.body
Exemple #26
0
    def test_labels_enable_by_config(self, app):
        dataset = factories.Dataset(extras=[
            {'key': 'version_notes', 'value': 'bla'}
        ])

        url = url_for('dataset.read', id=dataset['name'], locale='ca')

        response = app.get(url)

        assert 'Notes de la versió' in response.body
        assert not 'Version notes' in response.body
Exemple #27
0
    def test_labels_default(self, app):

        dataset = factories.Dataset(extras=[
            {'key': 'version_notes', 'value': 'bla'}
        ])

        url = url_for('dataset.read', id=dataset['name'])

        response = app.get(url)

        assert 'Version notes' in response.body
Exemple #28
0
    def test_dataset_not_supported_returns_html(self, app):

        dataset = factories.Dataset()

        url = url_for('dataset.read', id=dataset['name'])

        headers = {'Accept': 'image/gif'}

        response = app.get(url, headers=headers)

        assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
Exemple #29
0
    def test_dataset_multiple(self, app):

        dataset = factories.Dataset()

        url = url_for('dataset.read', id=dataset['name'])

        headers = {'Accept': 'text/csv; q=1.0, text/turtle; q=0.6, application/ld+json; q=0.3'}

        response = app.get(url, headers=headers)

        assert response.headers['Content-Type'] == 'text/turtle'
Exemple #30
0
    def test_dataset_basic(self, app):

        dataset = factories.Dataset()

        url = url_for('dataset.read', id=dataset['name'])

        headers = {'Accept': 'application/ld+json'}

        response = app.get(url, headers=headers)

        assert response.headers['Content-Type'] == 'application/ld+json'