def test_blogpost_with_first_image_as_thumbnail_as_src_set( self, blogpost, tpl): title = faker.sentence() post_url = faker.uri() image_url = faker.image_url() summary = faker.sentence() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) srcset = ', '.join(' '.join((faker.image_url(width=w), '{0}w'.format(w))) for w in ('1200', '1024', '300')) sizes = "(max-width: 1200px) 100vw, 1200px" content = tpl.format(image_url, srcset, sizes) feed = self.feed('Some blog', title, content, post_url, published=publish_date, summary=summary) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == summary assert_equal_dates(post['date'], publish_date) assert post['image_url'] == image_url assert post['srcset'] == srcset assert post['sizes'] == sizes
def test_blogpost_with_thumbnail_as_enclosure(self, blogpost, mime): title = faker.sentence() post_url = faker.uri() image_url = faker.image_url() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) content = faker.sentence() html_content = '<div>{0}</div>'.format(content) feed = self.feed('Some blog', title, html_content, post_url, published=publish_date, enclosure={ 'type': mime, 'url': image_url }) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == content assert_equal_dates(post['date'], publish_date) assert post['image_url'] == image_url assert 'srcset' not in post assert 'sizes' not in post
def test_blogpost_with_first_image_as_thumbnail_as_src_set(self, blogpost, tpl): title = faker.sentence() post_url = faker.uri() image_url = faker.image_url() summary = faker.sentence() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) srcset = ', '.join( ' '.join((faker.image_url(width=w), '{0}w'.format(w))) for w in ('1200', '1024', '300') ) sizes = "(max-width: 1200px) 100vw, 1200px" content = tpl.format(image_url, srcset, sizes) feed = self.feed('Some blog', title, content, post_url, published=publish_date, summary=summary) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == summary assert_equal_dates(post['date'], publish_date) assert post['image_url'] == image_url assert post['srcset'] == srcset assert post['sizes'] == sizes
def test_blogpost_with_first_image_as_thumbnail_and_summary( self, blogpost): title = faker.sentence() post_url = faker.uri() image_url = faker.image_url() summary = faker.sentence() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) content = '<p><img class="whatever" src="{0}" /> Whatever whatever</p>'.format( image_url) feed = self.feed('Some blog', title, content, post_url, published=publish_date, summary=summary) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == summary assert_equal_dates(post['date'], publish_date) assert post['image_url'] == image_url assert 'srcset' not in post assert 'sizes' not in post
def test_unicode(self): g = Graph() title = 'ééé' description = 'éééé' node = BNode() g.add((node, RDF.type, DCAT.Dataset)) g.add((node, DCT.title, Literal(title))) g.add((node, DCT.description, Literal(description))) rnode = BNode() g.add((rnode, RDF.type, DCAT.Distribution)) g.add((rnode, DCT.title, Literal(title))) g.add((rnode, DCT.description, Literal(description))) g.add((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) dataset.validate() assert dataset.title == title assert dataset.description == description resource = dataset.resources[0] assert resource.title == title assert resource.description == description
def test_json_error_check_one(self): url = faker.uri() httpretty.register_uri(httpretty.POST, CHECK_ONE_URL, body='<strong>not json</strong>', content_type='test/html') response = self.get(url_for('api.checkurl'), qs={'url': url, 'group': ''}) self.assertStatus(response, 503)
def test_connection_error(self): url = faker.uri() exception = requests.ConnectionError('Unable to connect') httpretty.register_uri(httpretty.POST, CHECK_ONE_URL, body=exception_factory(exception)) response = self.get(url_for('api.checkurl'), qs={'url': url, 'group': ''}) self.assertStatus(response, 503)
def test_timeout(self): url = faker.uri() exception = requests.Timeout('Request timed out') httpretty.register_uri(httpretty.POST, CHECK_ONE_URL, body=exception_factory(exception)) response = self.get(url_for('api.checkurl'), qs={'url': url, 'group': ''}) self.assertStatus(response, 503)
def test_invalid_url(self): url = faker.uri() mock_url_check(url, {'status': 503}) response = self.get(url_for('api.checkurl'), qs={ 'url': url, 'group': '' }) self.assertStatus(response, 503)
def test_delayed_url(self): url = faker.uri() mock_url_check(url, status=404) response = self.get(url_for('api.checkurl'), qs={'url': url, 'group': ''}) self.assertStatus(response, 503) self.assertEqual( response.json['error'], 'We were unable to retrieve the URL after 2 attempts.')
def test_download_url_over_access_url(self): node = BNode() g = Graph() access_url = faker.uri() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(faker.sentence()))) g.add((node, DCAT.accessURL, Literal(access_url))) resource = resource_from_rdf(g) resource.validate() assert resource.url == access_url download_url = faker.uri() g.add((node, DCAT.downloadURL, Literal(download_url))) resource = resource_from_rdf(g) resource.validate() assert resource.url == download_url
def test_no_preview_for(typ): domain = faker.domain_name() remote_id = faker.unique_string() resource = ResourceFactory(extras={'ods:type': typ}) # affectation prevent garbage collector from removing object before the end of the test DatasetFactory(resources=[resource], extras={ 'harvest:remote_id': remote_id, 'harvest:domain': domain, 'ods:url': faker.uri(), }) assert resource.preview_url is None
def test_no_preview_for_community_resources(): domain = faker.domain_name() remote_id = faker.unique_string() dataset = DatasetFactory( extras={ 'harvest:remote_id': remote_id, 'harvest:domain': domain, 'ods:url': faker.uri(), }) resource = CommunityResourceFactory(dataset=dataset, extras={'ods:type': 'api'}) assert resource.preview_url is None
def test_returned_metadata(self): url = faker.uri() metadata = { 'content-type': 'text/html; charset=utf-8', 'status': 200, } mock_url_check(url, metadata) response = self.get(url_for('api.checkurl'), qs={'url': url, 'group': ''}) self.assert200(response) self.assertEqual(response.json['status'], 200) self.assertEqual(response.json['url'], url) self.assertEqual(response.json['content-type'], 'text/html; charset=utf-8')
def test_json_error_check_url(self): url = faker.uri() url_hash = faker.md5() httpretty.register_uri(httpretty.POST, CHECK_ONE_URL, body=json.dumps({'url-hash': url_hash}), content_type='application/json') check_url = '/'.join((METADATA_URL, url_hash)) httpretty.register_uri(httpretty.GET, check_url, body='<strong>not json</strong>', content_type='test/html') response = self.get(url_for('api.checkurl'), qs={'url': url, 'group': ''}) self.assertStatus(response, 503) self.assertIn('error', response.json)
def test_render_home_with_blog_without_thumbnail(self, home): '''It should render the home page with the latest blog article''' post = { 'title': faker.name(), 'link': faker.uri(), 'summary': faker.sentence(), 'date': faker.date_time(), } response = home(post) assert200(response) page = response.data.decode('utf8') assert post['title'] in page assert post['link'] in page assert post['summary'] in page assert 'blog-thumbnail' not in page
def test_render_home_with_blog_without_thumbnail(self, home): '''It should render the home page with the latest blog article''' post = { 'title': faker.name(), 'link': faker.uri(), 'summary': faker.sentence(), 'date': faker.date_time(), } response = home(post) assert200(response) page = response.data.decode('utf8') assert post['title'] in page assert post['link'] in page assert post['summary'] in page assert 'blog-thumbnail' not in page
def test_resource_html_description(self): node = BNode() g = Graph() description = faker.paragraph() html_description = '<div>{0}</div>'.format(description) g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(faker.sentence()))) g.add((node, DCT.description, Literal(html_description))) g.add((node, DCAT.downloadURL, Literal(faker.uri()))) resource = resource_from_rdf(g) resource.validate() assert resource.description == description
def test_display_preview_for_api_resources(): domain = faker.domain_name() remote_id = faker.unique_string() resource = ResourceFactory(extras={'ods:type': 'api'}) _ = DatasetFactory(resources=[resource], extras={ # noqa 'harvest:remote_id': remote_id, 'harvest:domain': domain, 'ods:url': faker.uri(), }) assert resource.preview_url == url_for('ods.preview', domain=domain, id=remote_id, _external=True, _scheme='')
def test_can_extract_from_rdf_resource(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCAT.downloadURL, Literal(url))) resource = resource_from_rdf(g.resource(node)) resource.validate() assert isinstance(resource, Resource) assert resource.title == title assert resource.url == url
def test_minimal_resource_fields(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCAT.downloadURL, Literal(url))) resource = resource_from_rdf(g) resource.validate() self.assertIsInstance(resource, Resource) self.assertEqual(resource.title, title) self.assertEqual(resource.url, url)
def test_dataset_has_resources_from_literal_instead_of_uriref(self): node = BNode() g = Graph() g.add((node, RDF.type, DCAT.Dataset)) g.add((node, DCT.title, Literal(faker.sentence()))) rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) # Resource URL is expressed as a Literal g.set((rnode, DCAT.downloadURL, Literal(faker.uri()))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) dataset.validate() assert isinstance(dataset, Dataset) assert len(dataset.resources) == 1
def test_dataset_has_resources(self): node = BNode() g = Graph() g.add((node, RDF.type, DCAT.Dataset)) g.add((node, DCT.title, Literal(faker.sentence()))) for i in range(3): rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) g.set((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) dataset.validate() assert isinstance(dataset, Dataset) assert len(dataset.resources) == 3
def test_dataset_has_resources_from_buggy_plural_distribution(self): '''Try to extract resources from the wrong distributions attribute''' node = BNode() g = Graph() g.add((node, RDF.type, DCAT.Dataset)) g.add((node, DCT.title, Literal(faker.sentence()))) rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) g.set((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.add((node, DCAT.distributions, rnode)) # use plural name dataset = dataset_from_rdf(g) dataset.validate() assert isinstance(dataset, Dataset) assert len(dataset.resources) == 1
def test_match_license_from_rights_uri(self): license = LicenseFactory() node = BNode() g = Graph() g.set((node, RDF.type, DCAT.Dataset)) g.set((node, DCT.title, Literal(faker.sentence()))) rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) g.set((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.set((rnode, DCT.rights, URIRef(license.url))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) assert isinstance(dataset.license, License) assert dataset.license == license
def test_all_fields(self): user = UserFactory(website=faker.uri()) user_url = url_for('users.show_redirect', user=user.id, _external=True) u = user_to_rdf(user) g = u.graph self.assertIsInstance(u, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, FOAF.Person))), 1) self.assertEqual(u.value(RDF.type).identifier, FOAF.Person) self.assertIsInstance(u.identifier, URIRef) self.assertEqual(u.identifier.toPython(), user_url) self.assertEqual(u.value(FOAF.name), Literal(user.fullname)) self.assertEqual(u.value(RDFS.label), Literal(user.fullname)) self.assertEqual( u.value(FOAF.homepage).identifier, URIRef(user.website))
def test_all_fields(self): org = OrganizationFactory(url=faker.uri()) org_url = url_for('organizations.show_redirect', org=org.id, _external=True) o = organization_to_rdf(org) g = o.graph self.assertIsInstance(o, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, FOAF.Organization))), 1) self.assertEqual(o.value(RDF.type).identifier, FOAF.Organization) self.assertIsInstance(o.identifier, URIRef) self.assertEqual(o.identifier.toPython(), org_url) self.assertEqual(o.value(FOAF.name), Literal(org.name)) self.assertEqual(o.value(RDFS.label), Literal(org.name)) self.assertEqual(o.value(FOAF.homepage).identifier, URIRef(org.url))
def test_render_home_with_blog(self, rmock, client): '''It should render the home page with the latest blog article''' post_url = faker.uri() feed = AtomFeed('Some blog', feed_url=WP_ATOM_URL) feed.add('Some post', '<div>Some content</div>', content_type='html', author=faker.name(), url=post_url, updated=faker.date_time(), published=faker.date_time()) rmock.get(WP_ATOM_URL, text=feed.to_string(), headers={'Content-Type': 'application/atom+xml'}) response = client.get(url_for('site.home')) assert200(response) assert 'Some post' in response.data.decode('utf8') assert post_url in response.data.decode('utf8')
def test_render_home_with_blog(self): '''It should render the home page with the latest blog article''' post_url = faker.uri() feed = AtomFeed('Some blog', feed_url=WP_ATOM_URL) feed.add('Some post', '<div>Some content</div>', content_type='html', author=faker.name(), url=post_url, updated=faker.date_time(), published=faker.date_time()) httpretty.register_uri(httpretty.GET, WP_ATOM_URL, body=feed.to_string(), content_type='application/atom+xml') response = self.get(url_for('site.home')) self.assert200(response) self.assertIn('Some post', response.data.decode('utf8')) self.assertIn(post_url, response.data.decode('utf8'))
def test_match_license_from_license_title(self): license = LicenseFactory() node = BNode() g = Graph() g.set((node, RDF.type, DCAT.Dataset)) g.set((node, DCT.title, Literal(faker.sentence()))) rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) g.set((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.set((rnode, DCT.license, Literal(license.title))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) dataset.validate() self.assertIsInstance(dataset.license, License) self.assertEqual(dataset.license, license)
def test_all_fields(self): org = OrganizationFactory(url=faker.uri()) org_url = url_for('organizations.show_redirect', org=org.id, _external=True) o = organization_to_rdf(org) g = o.graph self.assertIsInstance(o, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, FOAF.Organization))), 1) self.assertEqual(o.value(RDF.type).identifier, FOAF.Organization) self.assertIsInstance(o.identifier, URIRef) self.assertEqual(o.identifier.toPython(), org_url) self.assertEqual(o.value(FOAF.name), Literal(org.name)) self.assertEqual(o.value(RDFS.label), Literal(org.name)) self.assertEqual(o.value(FOAF.homepage).identifier, URIRef(org.url))
def test_all_resource_fields(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() description = faker.paragraph() filesize = faker.pyint() issued = faker.date_time_between(start_date='-60d', end_date='-30d') modified = faker.past_datetime(start_date='-30d') mime = faker.mime_type() sha1 = faker.sha1() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCT.description, Literal(description))) g.add((node, DCAT.downloadURL, Literal(url))) g.add((node, DCT.issued, Literal(issued))) g.add((node, DCT.modified, Literal(modified))) g.add((node, DCAT.bytesSize, Literal(filesize))) g.add((node, DCAT.mediaType, Literal(mime))) g.add((node, DCT.term('format'), Literal('CSV'))) checksum = BNode() g.add((node, SPDX.checksum, checksum)) g.add((checksum, RDF.type, SPDX.Checksum)) g.add((checksum, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) g.add((checksum, SPDX.checksumValue, Literal(sha1))) resource = resource_from_rdf(g) resource.validate() assert isinstance(resource, Resource) assert resource.title == title assert resource.url == url assert resource.description == description assert resource.filesize == filesize assert resource.mime == mime assert isinstance(resource.checksum, Checksum) assert resource.checksum.type == 'sha1' assert resource.checksum.value == sha1 assert resource.published == issued assert resource.modified == modified assert resource.format == 'csv'
def test_all_fields(self): user = UserFactory(website=faker.uri()) user_url = url_for('users.show_redirect', user=user.id, _external=True) u = user_to_rdf(user) g = u.graph self.assertIsInstance(u, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, FOAF.Person))), 1) self.assertEqual(u.value(RDF.type).identifier, FOAF.Person) self.assertIsInstance(u.identifier, URIRef) self.assertEqual(u.identifier.toPython(), user_url) self.assertEqual(u.value(FOAF.name), Literal(user.fullname)) self.assertEqual(u.value(RDFS.label), Literal(user.fullname)) self.assertEqual(u.value(FOAF.homepage).identifier, URIRef(user.website))
def test_basic_blogpost(self, blogpost): title = faker.sentence() post_url = faker.uri() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) content = faker.sentence() html_content = '<div>{0}</div>'.format(content) feed = self.feed('Some blog', title, html_content, post_url, published=publish_date) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == content assert_equal_dates(post['date'], publish_date) assert 'image_url' not in post assert 'srcset' not in post assert 'sizes' not in post
def test_basic_blogpost(self, blogpost): title = faker.sentence() post_url = faker.uri() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) content = faker.sentence() html_content = '<div>{0}</div>'.format(content) feed = self.feed('Some blog', title, html_content, post_url, published=publish_date) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == content assert_equal_dates(post['date'], publish_date) assert 'image_url' not in post assert 'srcset' not in post assert 'sizes' not in post
def test_render_home_with_blog(self): '''It should render the home page with the latest blog article''' post_url = faker.uri() feed = AtomFeed('Some blog', feed_url=WP_ATOM_URL) feed.add('Some post', '<div>Some content</div>', content_type='html', author=faker.name(), url=post_url, updated=faker.date_time(), published=faker.date_time()) httpretty.register_uri(httpretty.GET, WP_ATOM_URL, body=feed.to_string(), content_type='application/atom+xml') response = self.get(url_for('site.home')) self.assert200(response) self.assertIn('Some post', response.data.decode('utf8')) self.assertIn(post_url, response.data.decode('utf8'))
def test_blogpost_with_first_image_as_thumbnail(self, blogpost): title = faker.sentence() post_url = faker.uri() image_url = faker.image_url() summary = faker.sentence() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) content = '<p><img class="whatever" src="{0}" /> {1}</p>'.format(image_url, summary) feed = self.feed('Some blog', title, content, post_url, published=publish_date) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == summary assert_equal_dates(post['date'], publish_date) assert post['image_url'] == image_url assert 'srcset' not in post assert 'sizes' not in post
def test_blogpost_with_thumbnail_as_enclosure(self, blogpost, mime): title = faker.sentence() post_url = faker.uri() image_url = faker.image_url() tz = pytz.timezone(faker.timezone()) publish_date = faker.date_time(tzinfo=tz) content = faker.sentence() html_content = '<div>{0}</div>'.format(content) feed = self.feed('Some blog', title, html_content, post_url, published=publish_date, enclosure={'type': mime, 'url': image_url}) post = blogpost(feed) assert post['title'] == title assert post['link'] == post_url assert post['summary'] == content assert_equal_dates(post['date'], publish_date) assert post['image_url'] == image_url assert 'srcset' not in post assert 'sizes' not in post
def test_exact_match_by_alternate_url(self): alternate_url = faker.uri() license = LicenseFactory(alternate_urls=[alternate_url]) found = License.guess(alternate_url) assert isinstance(found, License) assert license.id == found.id