def test_prioritize_title_over_alternate_title(self):
     title = faker.sentence()
     license = LicenseFactory(title=title)
     LicenseFactory(alternate_titles=[title])
     found = License.guess(title)
     assert isinstance(found, License)
     assert license.id == found.id
Example #2
0
    def test_oembed_region_territory_api_get(self, api):
        '''It should fetch a region territory in the oembed format.'''
        paca, bdr, arles = create_geozones_fixtures()
        licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte')
        LicenseFactory(id='notspecified', title='Not Specified')
        for territory_dataset_class in TERRITORY_DATASETS['region'].values():
            organization = OrganizationFactory(
                id=territory_dataset_class.organization_id)
            territory = territory_dataset_class(paca)
            reference = 'territory-{id}'.format(id=territory.slug)
            response = api.get(url_for('api.oembeds', references=reference))
            assert200(response)
            data = json.loads(response.data)[0]
            assert 'html' in data
            assert 'width' in data
            assert 'maxwidth' in data
            assert 'height' in data
            assert 'maxheight' in data
            assert data['type'] == 'rich'
            assert data['version'] == '1.0'

            html = data['html']
            assert territory.title in html
            assert escape(territory.url) in html
            assert 'alt="{name}"'.format(name=organization.name) in html
            assert md(territory.description, source_tooltip=True) in html
            assert 'Download from local.test' in html
            assert 'Add to your own website' in html
            if territory_dataset_class not in (
                    TERRITORY_DATASETS['region']['zonages_reg'], ):
                assert 'License: {0}'.format(licence_ouverte.title) in html
                assert '© {0}'.format(licence_ouverte.id) in html
                assert (
                    '<a data-tooltip="Source" href="http://local.test/datasets'
                    in html)
Example #3
0
 def test_oembed_county_territory_api_get(self):
     '''It should fetch a county territory in the oembed format.'''
     midi_pyrenees = GeoZoneFactory(id='fr/region/73',
                                    level='fr/region',
                                    name='Midi-Pyrénées',
                                    code='73')
     aveyron = GeoZoneFactory(id='fr/county/12',
                              level='fr/county',
                              name='Aveyron',
                              code='12',
                              parents=[midi_pyrenees.id])
     licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte')
     LicenseFactory(id='notspecified', title='Not Specified')
     for territory_dataset_class in TERRITORY_DATASETS['county'].values():
         organization = OrganizationFactory(
             id=territory_dataset_class.organization_id)
         territory = territory_dataset_class(aveyron)
         reference = 'territory-{id}'.format(id=territory.slug)
         response = self.get(url_for('api.oembeds', references=reference))
         self.assert200(response)
         data = json.loads(response.data)[0]
         self.assertIn('html', data)
         self.assertIn('width', data)
         self.assertIn('maxwidth', data)
         self.assertIn('height', data)
         self.assertIn('maxheight', data)
         self.assertTrue(data['type'], 'rich')
         self.assertTrue(data['version'], '1.0')
         self.assertIn(territory.title, data['html'])
         self.assertIn(cgi.escape(territory.url), data['html'])
         self.assertIn('alt="{name}"'.format(name=organization.name),
                       data['html'])
         self.assertIn(md(territory.description, source_tooltip=True),
                       data['html'])
         self.assertIn('Download from localhost', data['html'])
         self.assertIn('Add to your own website', data['html'])
         if territory_dataset_class not in (
                 TERRITORY_DATASETS['county']['comptes_c'],
                 TERRITORY_DATASETS['county']['zonages_c']):
             self.assertIn(
                 'License: {title}'.format(title=licence_ouverte.title),
                 data['html'])
             self.assertIn(
                 '© {license_id}'.format(license_id=licence_ouverte.id),
                 data['html'])
             self.assertIn(
                 '<a data-tooltip="Source" href="http://localhost/datasets',
                 data['html'])
Example #4
0
    def test_dataset_licenses_list(self):
        '''It should fetch the dataset licenses list from the API'''
        licenses = LicenseFactory.create_batch(4)

        response = self.get(url_for('api.licenses'))
        self.assert200(response)
        self.assertEqual(len(response.json), len(licenses))
Example #5
0
 def test_match_by_alternate_title_with_multiple_candidates_from_one_licence(
         self):
     license = LicenseFactory(
         alternate_titles=['Licence Ouverte v2', 'Licence Ouverte v2.0'])
     found = License.guess('Licence Ouverte v2.0')
     assert isinstance(found, License)
     assert license.id == found.id
Example #6
0
    def test_all_resource_fields(self):
        license = LicenseFactory()
        resource = ResourceFactory(format='csv')
        dataset = DatasetFactory(resources=[resource], license=license)
        permalink = url_for('datasets.resource',
                            id=resource.id,
                            _external=True)

        r = resource_to_rdf(resource, dataset)

        assert r.value(DCT.title) == Literal(resource.title)
        assert r.value(DCT.description) == Literal(resource.description)
        assert r.value(DCT.issued) == Literal(resource.published)
        assert r.value(DCT.modified) == Literal(resource.modified)
        assert r.value(DCT.license).identifier == URIRef(license.url)
        assert r.value(DCT.rights) == Literal(license.title)
        assert r.value(DCAT.downloadURL).identifier == URIRef(resource.url)
        assert r.value(DCAT.accessURL).identifier == URIRef(permalink)
        assert r.value(DCAT.bytesSize) == Literal(resource.filesize)
        assert r.value(DCAT.mediaType) == Literal(resource.mime)
        assert r.value(DCT.term('format')) == Literal(resource.format)

        checksum = r.value(SPDX.checksum)
        assert r.graph.value(checksum.identifier, RDF.type) == SPDX.Checksum
        assert (r.graph.value(checksum.identifier, SPDX.algorithm)
                == SPDX.checksumAlgorithm_sha1)
        assert (checksum.value(SPDX.checksumValue)
                == Literal(resource.checksum.value))
Example #7
0
    def test_xml_catalog(self, rmock):
        LicenseFactory(id='lov2', title='Licence Ouverte Version 2.0')

        url = mock_dcat(rmock, 'catalog.xml', path='catalog.xml')
        org = OrganizationFactory()
        source = HarvestSourceFactory(backend='dcat',
                                      url=url,
                                      organization=org)

        actions.run(source.slug)

        # test dct:license support
        extras = {'extras__dct:identifier': '3'}
        dataset = Dataset.objects.get(**extras)
        assert dataset.license.id == 'lov2'
        assert dataset.extras['remote_url'] == 'http://data.test.org/datasets/3'
        assert dataset.created_at.date() == date(2016, 12, 14)
        assert dataset.last_modified.date() == date(2016, 12, 14)
        assert dataset.frequency == 'daily'
        assert dataset.description == 'Dataset 3 description'

        extras = {'extras__dct:identifier': '1'}
        dataset = Dataset.objects.get(**extras)
        # test abstract description support
        assert dataset.description == 'Dataset 1 description'
Example #8
0
 def test_oembed_town_territory_api_get(self):
     '''It should fetch a town territory in the oembed format.'''
     arles = GeoZoneFactory(id='fr/town/13004',
                            level='fr/town',
                            name='Arles',
                            code='13004',
                            population=52439)
     licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte')
     odbl_license = LicenseFactory(id='odc-odbl', title='ODbL')
     LicenseFactory(id='notspecified', title='Not Specified')
     town_datasets = TERRITORY_DATASETS['town']
     for territory_dataset_class in town_datasets.values():
         organization = OrganizationFactory(
             id=territory_dataset_class.organization_id)
         territory = territory_dataset_class(arles)
         reference = 'territory-{id}'.format(id=territory.slug)
         response = self.get(url_for('api.oembeds', references=reference))
         self.assert200(response)
         data = json.loads(response.data)[0]
         self.assertIn('html', data)
         self.assertIn('width', data)
         self.assertIn('maxwidth', data)
         self.assertIn('height', data)
         self.assertIn('maxheight', data)
         self.assertTrue(data['type'], 'rich')
         self.assertTrue(data['version'], '1.0')
         self.assertIn(territory.title, data['html'])
         self.assertIn(cgi.escape(territory.url), data['html'])
         self.assertIn('alt="{name}"'.format(name=organization.name),
                       data['html'])
         self.assertIn(md(territory.description, source_tooltip=True),
                       data['html'])
         self.assertIn('Download from localhost', data['html'])
         self.assertIn('Add to your own website', data['html'])
         if territory_dataset_class not in (town_datasets['comptes_t'], ):
             if territory_dataset_class == town_datasets['ban_odbl_t']:
                 license = odbl_license
             else:
                 license = licence_ouverte
             self.assertIn('License: {title}'.format(title=license.title),
                           data['html'])
             self.assertIn('© {license_id}'.format(license_id=license.id),
                           data['html'])
             self.assertIn(
                 '<a data-tooltip="Source" href="http://localhost/datasets',
                 data['html'])
Example #9
0
    def test_oembed_town_territory_api_get(self, api):
        '''It should fetch a town territory in the oembed format.'''
        paca, bdr, arles = create_geozones_fixtures()
        licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte')
        odbl_license = LicenseFactory(id='odc-odbl', title='ODbL')
        LicenseFactory(id='notspecified', title='Not Specified')
        town_datasets = TERRITORY_DATASETS['commune']
        for territory_dataset_class in town_datasets.values():
            organization = OrganizationFactory(
                id=territory_dataset_class.organization_id)
            territory = territory_dataset_class(arles)
            reference = 'territory-{id}'.format(id=territory.slug)
            response = api.get(url_for('api.oembeds', references=reference))
            assert200(response)
            data = json.loads(response.data)[0]
            assert 'html' in data
            assert 'width' in data
            assert 'maxwidth' in data
            assert 'height' in data
            assert 'maxheight' in data
            assert data['type'] == 'rich'
            assert data['version'] == '1.0'

            html = data['html']
            assert territory.title in html
            assert cgi.escape(territory.url) in html
            assert 'alt="{name}"'.format(name=organization.name) in html
            assert md(territory.description, source_tooltip=True) in html
            assert 'Download from localhost' in html
            assert 'Add to your own website' in html
            if territory_dataset_class not in (town_datasets['comptes_com'], ):
                if territory_dataset_class == town_datasets['ban_odbl_com']:
                    license = odbl_license
                else:
                    license = licence_ouverte
                assert 'License: {title}'.format(title=license.title) in html
                assert '© {license_id}'.format(license_id=license.id) in html
                assert (
                    '<a data-tooltip="Source" href="http://localhost/datasets'
                    in html)
Example #10
0
 def test_oembed_region_territory_api_get(self):
     '''It should fetch a region territory in the oembed format.'''
     paca, bdr, arles = create_geozones_fixtures()
     licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte')
     LicenseFactory(id='notspecified', title='Not Specified')
     for territory_dataset_class in TERRITORY_DATASETS['region'].values():
         organization = OrganizationFactory(
             id=territory_dataset_class.organization_id)
         territory = territory_dataset_class(paca)
         reference = 'territory-{id}'.format(id=territory.slug)
         response = self.get(url_for('api.oembeds', references=reference))
         self.assert200(response)
         data = json.loads(response.data)[0]
         self.assertIn('html', data)
         self.assertIn('width', data)
         self.assertIn('maxwidth', data)
         self.assertIn('height', data)
         self.assertIn('maxheight', data)
         self.assertTrue(data['type'], 'rich')
         self.assertTrue(data['version'], '1.0')
         self.assertIn(territory.title, data['html'])
         self.assertIn(cgi.escape(territory.url), data['html'])
         self.assertIn('alt="{name}"'.format(name=organization.name),
                       data['html'])
         self.assertIn(md(territory.description, source_tooltip=True),
                       data['html'])
         self.assertIn('Download from localhost', data['html'])
         self.assertIn('Add to your own website', data['html'])
         if territory_dataset_class not in (
                 TERRITORY_DATASETS['region']['comptes_reg'],
                 TERRITORY_DATASETS['region']['zonages_reg']):
             self.assertIn(
                 'License: {title}'.format(title=licence_ouverte.title),
                 data['html'])
             self.assertIn(
                 '© {license_id}'.format(license_id=licence_ouverte.id),
                 data['html'])
             self.assertIn(
                 '<a data-tooltip="Source" href="http://localhost/datasets',
                 data['html'])
Example #11
0
    def test_xml_catalog(self, rmock):
        LicenseFactory(id='lov2', title='Licence Ouverte Version 2.0')

        url = mock_dcat(rmock, 'catalog.xml', path='catalog.xml')
        org = OrganizationFactory()
        source = HarvestSourceFactory(backend='dcat',
                                      url=url,
                                      organization=org)

        actions.run(source.slug)

        # test dct:license support
        extras = {'extras__dct:identifier': '3'}
        dataset = Dataset.objects.get(**extras)
        assert dataset.license.id == 'lov2'
Example #12
0
    def test_sigoreme_xml_catalog(self, rmock):
        LicenseFactory(id='fr-lo', title='Licence ouverte / Open Licence')
        url = mock_dcat(rmock, 'sig.oreme.rdf')
        org = OrganizationFactory()
        source = HarvestSourceFactory(backend='dcat',
                                      url=url,
                                      organization=org)
        actions.run(source.slug)
        dataset = Dataset.objects.filter(organization=org).first()

        assert dataset is not None
        assert dataset.frequency == 'irregular'
        assert 'gravi' in dataset.tags  # support dcat:keyword
        assert 'geodesy' in dataset.tags  # support dcat:theme
        assert dataset.license.id == 'fr-lo'
        assert len(dataset.resources) == 1
        assert dataset.description.startswith('Data from the \'National network')
Example #13
0
    def test_match_license_from_rights_uri(self):
        license = LicenseFactory()
        node = BNode()
        g = Graph()

        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.title, Literal(faker.sentence())))
        rnode = BNode()
        g.set((rnode, RDF.type, DCAT.Distribution))
        g.set((rnode, DCAT.downloadURL, URIRef(faker.uri())))
        g.set((rnode, DCT.rights, URIRef(license.url)))
        g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)

        assert isinstance(dataset.license, License)
        assert dataset.license == license
Example #14
0
    def test_match_license_from_license_title(self):
        license = LicenseFactory()
        node = BNode()
        g = Graph()

        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.title, Literal(faker.sentence())))
        rnode = BNode()
        g.set((rnode, RDF.type, DCAT.Distribution))
        g.set((rnode, DCAT.downloadURL, URIRef(faker.uri())))
        g.set((rnode, DCT.license, Literal(license.title)))
        g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        self.assertIsInstance(dataset.license, License)
        self.assertEqual(dataset.license, license)
Example #15
0
    def test_json_ld(self):
        '''It should render a json-ld markup into the dataset page'''
        resource = ResourceFactory(format='png',
                                   description='* Title 1\n* Title 2',
                                   metrics={'views': 10})
        license = LicenseFactory(url='http://www.datagouv.fr/licence')
        dataset = DatasetFactory(license=license,
                                 tags=['foo', 'bar'],
                                 resources=[resource],
                                 description='a&éèëù$£',
                                 owner=UserFactory(),
                                 extras={'foo': 'bar'})
        community_resource = CommunityResourceFactory(
            dataset=dataset,
            format='csv',
            description='* Title 1\n* Title 2',
            metrics={'views': 42})

        url = url_for('datasets.show', dataset=dataset)
        response = self.get(url)
        self.assert200(response)
        json_ld = self.get_json_ld(response)
        self.assertEquals(json_ld['@context'], 'http://schema.org')
        self.assertEquals(json_ld['@type'], 'Dataset')
        self.assertEquals(json_ld['@id'], str(dataset.id))
        self.assertEquals(json_ld['description'], 'a&éèëù$£')
        self.assertEquals(json_ld['alternateName'], dataset.slug)
        self.assertEquals(json_ld['dateCreated'][:16],
                          dataset.created_at.isoformat()[:16])
        self.assertEquals(json_ld['dateModified'][:16],
                          dataset.last_modified.isoformat()[:16])
        self.assertEquals(json_ld['url'], 'http://localhost{}'.format(url))
        self.assertEquals(json_ld['name'], dataset.title)
        self.assertEquals(json_ld['keywords'], 'bar,foo')
        self.assertEquals(len(json_ld['distribution']), 1)

        json_ld_resource = json_ld['distribution'][0]
        self.assertEquals(json_ld_resource['@type'], 'DataDownload')
        self.assertEquals(json_ld_resource['@id'], str(resource.id))
        self.assertEquals(json_ld_resource['url'], resource.latest)
        self.assertEquals(json_ld_resource['name'], resource.title)
        self.assertEquals(json_ld_resource['contentUrl'], resource.url)
        self.assertEquals(json_ld_resource['dateCreated'][:16],
                          resource.created_at.isoformat()[:16])
        self.assertEquals(json_ld_resource['dateModified'][:16],
                          resource.modified.isoformat()[:16])
        self.assertEquals(json_ld_resource['datePublished'][:16],
                          resource.published.isoformat()[:16])
        self.assertEquals(json_ld_resource['encodingFormat'], 'png')
        self.assertEquals(json_ld_resource['contentSize'],
                          resource.filesize)
        self.assertEquals(json_ld_resource['fileFormat'], resource.mime)
        self.assertEquals(json_ld_resource['description'],
                          'Title 1 Title 2')
        self.assertEquals(json_ld_resource['interactionStatistic'],
                          {
                              '@type': 'InteractionCounter',
                              'interactionType': {
                                  '@type': 'DownloadAction',
                              },
                              'userInteractionCount': 10,
                          })

        self.assertEquals(len(json_ld['contributedDistribution']), 1)
        json_ld_resource = json_ld['contributedDistribution'][0]
        self.assertEquals(json_ld_resource['@type'], 'DataDownload')
        self.assertEquals(json_ld_resource['@id'], str(community_resource.id))
        self.assertEquals(json_ld_resource['url'], community_resource.latest)
        self.assertEquals(json_ld_resource['name'], community_resource.title)
        self.assertEquals(json_ld_resource['contentUrl'],
                          community_resource.url)
        self.assertEquals(json_ld_resource['dateCreated'][:16],
                          community_resource.created_at.isoformat()[:16])
        self.assertEquals(json_ld_resource['dateModified'][:16],
                          community_resource.modified.isoformat()[:16])
        self.assertEquals(json_ld_resource['datePublished'][:16],
                          community_resource.published.isoformat()[:16])
        self.assertEquals(json_ld_resource['encodingFormat'],
                          community_resource.format)
        self.assertEquals(json_ld_resource['contentSize'],
                          community_resource.filesize)
        self.assertEquals(json_ld_resource['fileFormat'],
                          community_resource.mime)
        self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2')
        self.assertEquals(json_ld_resource['interactionStatistic'], {
            '@type': 'InteractionCounter',
            'interactionType': {
                '@type': 'DownloadAction',
            },
            'userInteractionCount': 42,
        })

        self.assertEquals(json_ld['extras'],
                          [{
                              '@type': 'http://schema.org/PropertyValue',
                              'name': 'foo',
                              'value': 'bar',
                          }])
        self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence')
        self.assertEquals(json_ld['author']['@type'], 'Person')
Example #16
0
 def test_multiple_strings(self):
     license = LicenseFactory()
     found = License.guess('should not match', license.id)
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)
Example #17
0
 def test_match_by_title_with_extra_inner_space(self):
     license = LicenseFactory(title='License ODBl')
     found = License.guess('License  ODBl')  # 2 spaces instead of 1
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)
Example #18
0
 def test_match_by_title_with_mismatching_case(self):
     license = LicenseFactory(title='License ODBl')
     found = License.guess('License ODBL')
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)
 def test_match_by_alternate_title_with_extra_inner_space(self):
     license = LicenseFactory(alternate_titles=['License ODBl'])
     found = License.guess('License  ODBl')  # 2 spaces instead of 1
     assert isinstance(found, License)
     assert license.id == found.id
Example #20
0
 def test_match_by_title_with_low_edit_distance(self):
     license = LicenseFactory(title='License')
     found = License.guess('Licence')
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)
Example #21
0
 def test_not_found_with_default(self):
     license = LicenseFactory()
     found = License.guess('should not be found', default=license)
     self.assertEqual(found.id, license.id)
Example #22
0
 def test_exact_match_by_title(self):
     license = LicenseFactory()
     found = License.guess(license.title)
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)
Example #23
0
def generate_licenses(count):
    for _ in range(0, count):
        LicenseFactory()
Example #24
0
 def setUp(self):
     # Feed the DB with random data to ensure true matching
     LicenseFactory.create_batch(3)
 def test_multiple_strings(self):
     license = LicenseFactory()
     found = License.guess('should not match', license.id)
     assert isinstance(found, License)
     assert license.id == found.id
 def test_exact_match_by_title_with_spaces(self):
     license = LicenseFactory()
     found = License.guess(' {0} '.format(license.title))
     assert isinstance(found, License)
     assert license.id == found.id
 def test_exact_match_by_alternate_title_with_spaces(self):
     alternate_title = faker.sentence()
     license = LicenseFactory(alternate_titles=[alternate_title])
     found = License.guess(' {0} '.format(alternate_title))
     assert isinstance(found, License)
     assert license.id == found.id
Example #28
0
 def setUp(self):
     # Feed the DB with random data to ensure true matching
     LicenseFactory.create_batch(3)
Example #29
0
 def test_exact_match_by_title_with_spaces(self):
     license = LicenseFactory()
     found = License.guess(' {0} '.format(license.title))
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)
 def test_match_by_alternate_title_with_mismatching_case(self):
     license = LicenseFactory(alternate_titles=['License ODBl'])
     found = License.guess('License ODBL')
     assert isinstance(found, License)
     assert license.id == found.id
 def test_match_by_alternate_title_with_low_edit_distance(self):
     license = LicenseFactory(alternate_titles=['License'])
     found = License.guess('Licence')
     assert isinstance(found, License)
     assert license.id == found.id