def test_source_link_filename(db): ds1 = factories.DataSetFactory( reference_code=factories.DatasetReferenceCodeFactory(code="DW")) source1 = factories.SourceLinkFactory( dataset=ds1, name="A test source", url="s3://csv-pipelines/my-data.csv.zip", link_type=SourceLink.TYPE_LOCAL, ) assert source1.get_filename() == "DW00001-a-test-source.zip" ds2 = factories.DataSetFactory() source2 = factories.SourceLinkFactory( dataset=ds2, name="A test source", url="s3://csv-pipelines/my-data.csv", link_type=SourceLink.TYPE_LOCAL, ) assert source2.get_filename() == "a-test-source.csv" ds3 = factories.DataSetFactory() source3 = factories.SourceLinkFactory( dataset=ds3, name="A test source", url="http://www.google.com/index.html", link_type=SourceLink.TYPE_EXTERNAL, ) assert source3.get_filename() == "a-test-source.csv"
def test_source_link_data_last_updated(mock_client): dataset = factories.DataSetFactory.create() local_link = factories.SourceLinkFactory( dataset=dataset, link_type=SourceLink.TYPE_LOCAL, url="s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt", ) # Returns last modified date if the file exists mock_client().head_object.return_value = { "ContentType": "text/plain", "LastModified": datetime(2020, 9, 2, 0, 1, 0), } assert local_link.get_data_last_updated_date() == datetime( 2020, 9, 2, 0, 1, 0) # Returns None if file does not exist on s3 mock_client().head_object.side_effect = [ botocore.exceptions.ClientError( error_response={"Error": { "Message": "it failed" }}, operation_name="head_object", ) ] assert local_link.get_data_last_updated_date() is None # External links never have a last updated date external_link = factories.SourceLinkFactory( dataset=dataset, link_type=SourceLink.TYPE_EXTERNAL, url="http://www.example.com", ) assert external_link.get_data_last_updated_date() is None
def test_dataset_detail_view_unpublished(self): group = factories.DataGroupingFactory.create() factories.DataSetFactory.create() ds = factories.DataSetFactory.create(grouping=group, published=False) factories.SourceLinkFactory(dataset=ds) factories.SourceLinkFactory(dataset=ds) response = self._authenticated_get( reverse( 'catalogue:dataset_fullpath', kwargs={ 'group_slug': group.slug, 'set_slug': ds.slug }, )) self.assertEqual(response.status_code, 404)
def test_download_external_file(self, request_client, published): dataset = factories.DataSetFactory.create( published=published, user_access_type='REQUIRES_AUTHENTICATION' ) link = factories.SourceLinkFactory( dataset=dataset, link_type=SourceLink.TYPE_EXTERNAL, url='http://example.com', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = request_client.get( reverse( 'datasets:dataset_source_link_download', kwargs={'dataset_uuid': dataset.id, 'source_link_id': link.id}, ), follow=False, ) assert response.status_code == 302 assert EventLog.objects.count() == log_count + 1 assert ( EventLog.objects.latest().event_type == EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD ) assert ( DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count + 1 )
def test_download_local_file(self, mock_client, request_client, published): dataset = factories.DataSetFactory.create( published=published, user_access_type='REQUIRES_AUTHENTICATION') link = factories.SourceLinkFactory( id='158776ec-5c40-4c58-ba7c-a3425905ec45', dataset=dataset, link_type=SourceLink.TYPE_LOCAL, url='s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads mock_client().get_object.return_value = { 'ContentType': 'text/plain', 'Body': StreamingBody(io.BytesIO(b'This is a test file'), len(b'This is a test file')), } response = request_client.get( reverse( 'datasets:dataset_source_link_download', kwargs={ 'dataset_uuid': dataset.id, 'source_link_id': link.id }, )) assert response.status_code == 200 assert list(response.streaming_content)[0] == b'This is a test file' mock_client().get_object.assert_called_with( Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url) assert EventLog.objects.count() == log_count + 1 assert (EventLog.objects.latest().event_type == EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD) assert (DataSet.objects.get( pk=dataset.id).number_of_downloads == download_count + 1)
def test_download_external_file(self): group = factories.DataGroupingFactory.create() dataset = factories.DataSetFactory.create( grouping=group, published=True, user_access_type='REQUIRES_AUTHENTICATION') link = factories.SourceLinkFactory( id='158776ec-5c40-4c58-ba7c-a3425905ec45', dataset=dataset, link_type=SourceLink.TYPE_EXTERNAL, url='http://example.com', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = self._authenticated_get( reverse( 'catalogue:dataset_source_link_download', kwargs={ 'group_slug': group.slug, 'set_slug': dataset.slug, 'source_link_id': link.id, }, )) self.assertRedirects(response, 'http://example.com', fetch_redirect_response=False) self.assertEqual(EventLog.objects.count(), log_count + 1) self.assertEqual( EventLog.objects.latest().event_type, EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD, ) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count + 1)
def test_forbidden_dataset(self): group = factories.DataGroupingFactory.create() dataset = factories.DataSetFactory.create( grouping=group, published=True, user_access_type='REQUIRES_AUTHORIZATION') link = factories.SourceLinkFactory( id='158776ec-5c40-4c58-ba7c-a3425905ec45', dataset=dataset, link_type=SourceLink.TYPE_EXTERNAL, url='http://example.com', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = self._authenticated_get( reverse( 'catalogue:dataset_source_link_download', kwargs={ 'group_slug': group.slug, 'set_slug': dataset.slug, 'source_link_id': link.id, }, )) self.assertEqual(response.status_code, 403) self.assertEqual(EventLog.objects.count(), log_count) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count)
def test_link_data_cut_doesnt_have_fields(client): ds = factories.DataSetFactory.create(published=True) factories.SourceLinkFactory(dataset=ds) response = client.get(ds.get_absolute_url()) assert response.status_code == 200 assert response.context["fields"] is None
def test_link_data_cut_doesnt_have_preview(access_type, client): data_cut = factories.DataSetFactory(user_access_type=access_type, published=True) factories.SourceLinkFactory(dataset=data_cut) response = client.get(data_cut.get_absolute_url()) assert response.status_code == 200
def test_dataset_detail_view_published(self): group = factories.DataGroupingFactory.create() factories.DataSetFactory.create() ds = factories.DataSetFactory.create(grouping=group, published=True) sl1 = factories.SourceLinkFactory(dataset=ds) sl2 = factories.SourceLinkFactory(dataset=ds) response = self._authenticated_get( reverse( 'catalogue:dataset_fullpath', kwargs={ 'group_slug': group.slug, 'set_slug': ds.slug }, )) self.assertEqual(response.status_code, 200) self.assertContains(response, ds.name) self.assertContains(response, sl1.name, 1) self.assertContains(response, sl2.name, 1)
def test_link_data_cut_doesnt_have_preview(client): ds = factories.DataSetFactory(user_access_type='REQUIRES_AUTHENTICATION', published=True) factories.SourceLinkFactory(dataset=ds) response = client.get(ds.get_absolute_url()) assert response.status_code == 200 assert 'No preview available' in response.rendered_content
def test_failed_reading_from_s3(self, mock_client): link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.csv") mock_client().head_object.side_effect = [ botocore.exceptions.ClientError( error_response={"Error": { "Message": "it failed" }}, operation_name="head_object", ) ] assert link.get_preview_data() == (None, [])
def test_dataset_has_request_access_link(client, eligibility_criteria, view_name): ds = factories.DataSetFactory.create( eligibility_criteria=eligibility_criteria, published=True) factories.SourceLinkFactory(dataset=ds) response = client.get(ds.get_absolute_url()) request_access_url = reverse(view_name, args=[ds.id]) assert response.status_code == 200 assert request_access_url in str(response.content)
def test_link_data_cut_doesnt_have_fields(client): ds = factories.DataSetFactory.create(published=True) factories.SourceLinkFactory(dataset=ds) response = client.get( reverse( 'catalogue:dataset_fullpath', kwargs={ 'group_slug': ds.grouping.slug, 'set_slug': ds.slug }, )) assert response.status_code == 200 assert response.context["fields"] is None
def test_preview_csv(self, mock_client): link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.csv") mock_client().head_object.return_value = {"ContentType": "text/csv"} csv_content = b"col1,col2\nrow1-col1, row1-col2\nrow2-col1, row2-col2\ntrailing" mock_client().get_object.return_value = { "ContentType": "text/plain", "ContentLength": len(csv_content), "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)), } assert link.get_preview_data() == ( ["col1", "col2"], [ OrderedDict([("col1", "row1-col1"), ("col2", " row1-col2")]), OrderedDict([("col1", "row2-col1"), ("col2", " row2-col2")]), ], )
def test_dataset_has_request_access_link(client, eligibility_criteria, view_name): group = factories.DataGroupingFactory.create() ds = factories.DataSetFactory.create( grouping=group, eligibility_criteria=eligibility_criteria, published=True ) factories.SourceLinkFactory(dataset=ds) response = client.get( reverse( 'catalogue:dataset_fullpath', kwargs={'group_slug': group.slug, 'set_slug': ds.slug}, ) ) request_access_url = reverse(view_name, args=[group.slug, ds.slug]) assert response.status_code == 200 assert request_access_url in str(response.content)
def test_download_local_file(self, mock_client): group = factories.DataGroupingFactory.create() dataset = factories.DataSetFactory.create( grouping=group, published=True, user_access_type='REQUIRES_AUTHENTICATION') link = factories.SourceLinkFactory( id='158776ec-5c40-4c58-ba7c-a3425905ec45', dataset=dataset, link_type=SourceLink.TYPE_LOCAL, url='s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads mock_client().get_object.return_value = { 'ContentType': 'text/plain', 'Body': StreamingBody(io.BytesIO(b'This is a test file'), len(b'This is a test file')), } response = self._authenticated_get( reverse( 'catalogue:dataset_source_link_download', kwargs={ 'group_slug': group.slug, 'set_slug': dataset.slug, 'source_link_id': link.id, }, )) self.assertEqual(response.status_code, 200) self.assertEqual( list(response.streaming_content)[0], b'This is a test file') mock_client().get_object.assert_called_with( Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url) self.assertEqual(EventLog.objects.count(), log_count + 1) self.assertEqual( EventLog.objects.latest().event_type, EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD, ) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count + 1)
def test_forbidden_dataset(self, client): dataset = factories.DataSetFactory.create( published=True, user_access_type='REQUIRES_AUTHORIZATION' ) link = factories.SourceLinkFactory( id='158776ec-5c40-4c58-ba7c-a3425905ec45', dataset=dataset, link_type=SourceLink.TYPE_EXTERNAL, url='http://example.com', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = client.get( reverse( 'datasets:dataset_source_link_download', kwargs={'dataset_uuid': dataset.id, 'source_link_id': link.id}, ) ) assert response.status_code == 403 assert EventLog.objects.count() == log_count assert DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count
def test_clone_dataset_copies_related_objects(db): ds = factories.DataSetFactory.create(published=True) factories.DataSetUserPermissionFactory(dataset=ds) factories.SourceLinkFactory(dataset=ds) factories.SourceViewFactory(dataset=ds) factories.SourceTableFactory(dataset=ds) factories.CustomDatasetQueryFactory(dataset=ds) clone = ds.clone() assert not clone.datasetuserpermission_set.all() assert [obj.dataset for obj in clone.sourcelink_set.all()] == [clone] assert [obj.dataset for obj in clone.sourceview_set.all()] == [clone] assert [obj.dataset for obj in clone.sourcetable_set.all()] == [clone] assert [obj.dataset for obj in clone.customdatasetquery_set.all()] == [clone] assert ds.datasetuserpermission_set.all() assert [obj.dataset for obj in ds.sourcelink_set.all()] == [ds] assert [obj.dataset for obj in ds.sourceview_set.all()] == [ds] assert [obj.dataset for obj in ds.sourcetable_set.all()] == [ds] assert [obj.dataset for obj in ds.customdatasetquery_set.all()] == [ds]
def test_download_local_file(self, mock_client, request_client, published, access_type): dataset = factories.DataSetFactory.create(published=published, user_access_type=access_type) link = factories.SourceLinkFactory( id="158776ec-5c40-4c58-ba7c-a3425905ec45", dataset=dataset, link_type=SourceLink.TYPE_LOCAL, url="s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt", ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads mock_client().get_object.return_value = { "ContentType": "text/plain", "ContentLength": len(b"This is a test file"), "Body": StreamingBody(io.BytesIO(b"This is a test file"), len(b"This is a test file")), } response = request_client.get( reverse( "datasets:dataset_source_link_download", kwargs={ "dataset_uuid": dataset.id, "source_link_id": link.id }, )) assert response.status_code == 200 assert list(response.streaming_content)[0] == b"This is a test file" assert response["content-length"] == str(len(b"This is a test file")) mock_client().get_object.assert_called_with( Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url) assert EventLog.objects.count() == log_count + 1 assert EventLog.objects.latest( ).event_type == EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD assert DataSet.objects.get( pk=dataset.id).number_of_downloads == download_count + 1
def test_authorised_link(self, access_type, client, mocker): dataset = factories.DataSetFactory(user_access_type=access_type) link = factories.SourceLinkFactory( id="158776ec-5c40-4c58-ba7c-a3425905ec45", dataset=dataset, url="s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.csv", ) mock_client = mocker.patch( "dataworkspace.apps.core.boto3_client.boto3.client") mock_client().head_object.return_value = {"ContentType": "text/csv"} csv_content = b"header1,header2\nrow1 col1, row1 col2\nrow2 col1, row2 col2\n" mock_client().get_object.return_value = { "ContentType": "text/plain", "ContentLength": len(csv_content), "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)), } response = client.get( reverse("datasets:data_cut_source_link_preview", args=(dataset.id, link.id))) assert response.status_code == 200 content = response.content.decode("utf-8") assert ("<thead>" '<tr class="govuk-table__row">' '<th class="govuk-table__header">header1</th>' '<th class="govuk-table__header">header2</th>' "</tr>" "</thead><tbody>" '<tr class="govuk-table__row">' '<td class="govuk-table__cell">row1 col1</td>' '<td class="govuk-table__cell">row1 col2</td>' "</tr>" '<tr class="govuk-table__row">' '<td class="govuk-table__cell">row2 col1</td>' '<td class="govuk-table__cell">row2 col2</td>' "</tr></tbody>") in "".join( [s.strip() for s in content.splitlines() if s.strip()]) assert "Showing <strong>2</strong> records." in content assert "Download as CSV" in content
def test_file_not_csv(self, mock_client): link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.txt") mock_client().head_object.return_value = {"ContentType": "text/csv"} assert link.get_preview_data() == (None, [])
def test_not_s3_link(self): link = factories.SourceLinkFactory(url="http://example.com/a-file.csv") assert link.get_preview_data() == (None, [])
def test_unauthorised_link(self, client): link = factories.SourceLinkFactory() response = client.get( reverse("datasets:data_cut_source_link_preview", args=(link.dataset.id, link.id))) assert response.status_code == 403