Esempio n. 1
0
def test_source_link_filename(db):
    ds1 = factories.DataSetFactory(
        reference_code=factories.DatasetReferenceCodeFactory(code="DW"))
    source1 = factories.SourceLinkFactory(
        dataset=ds1,
        name="A test source",
        url="s3://csv-pipelines/my-data.csv.zip",
        link_type=SourceLink.TYPE_LOCAL,
    )
    assert source1.get_filename() == "DW00001-a-test-source.zip"

    ds2 = factories.DataSetFactory()
    source2 = factories.SourceLinkFactory(
        dataset=ds2,
        name="A test source",
        url="s3://csv-pipelines/my-data.csv",
        link_type=SourceLink.TYPE_LOCAL,
    )
    assert source2.get_filename() == "a-test-source.csv"

    ds3 = factories.DataSetFactory()
    source3 = factories.SourceLinkFactory(
        dataset=ds3,
        name="A test source",
        url="http://www.google.com/index.html",
        link_type=SourceLink.TYPE_EXTERNAL,
    )
    assert source3.get_filename() == "a-test-source.csv"
Esempio n. 2
0
def test_source_link_data_last_updated(mock_client):
    dataset = factories.DataSetFactory.create()
    local_link = factories.SourceLinkFactory(
        dataset=dataset,
        link_type=SourceLink.TYPE_LOCAL,
        url="s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt",
    )

    # Returns last modified date if the file exists
    mock_client().head_object.return_value = {
        "ContentType": "text/plain",
        "LastModified": datetime(2020, 9, 2, 0, 1, 0),
    }
    assert local_link.get_data_last_updated_date() == datetime(
        2020, 9, 2, 0, 1, 0)

    # Returns None if file does not exist on s3
    mock_client().head_object.side_effect = [
        botocore.exceptions.ClientError(
            error_response={"Error": {
                "Message": "it failed"
            }},
            operation_name="head_object",
        )
    ]
    assert local_link.get_data_last_updated_date() is None

    # External links never have a last updated date
    external_link = factories.SourceLinkFactory(
        dataset=dataset,
        link_type=SourceLink.TYPE_EXTERNAL,
        url="http://www.example.com",
    )
    assert external_link.get_data_last_updated_date() is None
Esempio n. 3
0
 def test_dataset_detail_view_unpublished(self):
     group = factories.DataGroupingFactory.create()
     factories.DataSetFactory.create()
     ds = factories.DataSetFactory.create(grouping=group, published=False)
     factories.SourceLinkFactory(dataset=ds)
     factories.SourceLinkFactory(dataset=ds)
     response = self._authenticated_get(
         reverse(
             'catalogue:dataset_fullpath',
             kwargs={
                 'group_slug': group.slug,
                 'set_slug': ds.slug
             },
         ))
     self.assertEqual(response.status_code, 404)
Esempio n. 4
0
 def test_download_external_file(self, request_client, published):
     dataset = factories.DataSetFactory.create(
         published=published, user_access_type='REQUIRES_AUTHENTICATION'
     )
     link = factories.SourceLinkFactory(
         dataset=dataset,
         link_type=SourceLink.TYPE_EXTERNAL,
         url='http://example.com',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     response = request_client.get(
         reverse(
             'datasets:dataset_source_link_download',
             kwargs={'dataset_uuid': dataset.id, 'source_link_id': link.id},
         ),
         follow=False,
     )
     assert response.status_code == 302
     assert EventLog.objects.count() == log_count + 1
     assert (
         EventLog.objects.latest().event_type
         == EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD
     )
     assert (
         DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count + 1
     )
 def test_download_local_file(self, mock_client, request_client, published):
     dataset = factories.DataSetFactory.create(
         published=published, user_access_type='REQUIRES_AUTHENTICATION')
     link = factories.SourceLinkFactory(
         id='158776ec-5c40-4c58-ba7c-a3425905ec45',
         dataset=dataset,
         link_type=SourceLink.TYPE_LOCAL,
         url='s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     mock_client().get_object.return_value = {
         'ContentType':
         'text/plain',
         'Body':
         StreamingBody(io.BytesIO(b'This is a test file'),
                       len(b'This is a test file')),
     }
     response = request_client.get(
         reverse(
             'datasets:dataset_source_link_download',
             kwargs={
                 'dataset_uuid': dataset.id,
                 'source_link_id': link.id
             },
         ))
     assert response.status_code == 200
     assert list(response.streaming_content)[0] == b'This is a test file'
     mock_client().get_object.assert_called_with(
         Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url)
     assert EventLog.objects.count() == log_count + 1
     assert (EventLog.objects.latest().event_type ==
             EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD)
     assert (DataSet.objects.get(
         pk=dataset.id).number_of_downloads == download_count + 1)
Esempio n. 6
0
 def test_download_external_file(self):
     group = factories.DataGroupingFactory.create()
     dataset = factories.DataSetFactory.create(
         grouping=group,
         published=True,
         user_access_type='REQUIRES_AUTHENTICATION')
     link = factories.SourceLinkFactory(
         id='158776ec-5c40-4c58-ba7c-a3425905ec45',
         dataset=dataset,
         link_type=SourceLink.TYPE_EXTERNAL,
         url='http://example.com',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     response = self._authenticated_get(
         reverse(
             'catalogue:dataset_source_link_download',
             kwargs={
                 'group_slug': group.slug,
                 'set_slug': dataset.slug,
                 'source_link_id': link.id,
             },
         ))
     self.assertRedirects(response,
                          'http://example.com',
                          fetch_redirect_response=False)
     self.assertEqual(EventLog.objects.count(), log_count + 1)
     self.assertEqual(
         EventLog.objects.latest().event_type,
         EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD,
     )
     self.assertEqual(
         DataSet.objects.get(pk=dataset.id).number_of_downloads,
         download_count + 1)
Esempio n. 7
0
 def test_forbidden_dataset(self):
     group = factories.DataGroupingFactory.create()
     dataset = factories.DataSetFactory.create(
         grouping=group,
         published=True,
         user_access_type='REQUIRES_AUTHORIZATION')
     link = factories.SourceLinkFactory(
         id='158776ec-5c40-4c58-ba7c-a3425905ec45',
         dataset=dataset,
         link_type=SourceLink.TYPE_EXTERNAL,
         url='http://example.com',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     response = self._authenticated_get(
         reverse(
             'catalogue:dataset_source_link_download',
             kwargs={
                 'group_slug': group.slug,
                 'set_slug': dataset.slug,
                 'source_link_id': link.id,
             },
         ))
     self.assertEqual(response.status_code, 403)
     self.assertEqual(EventLog.objects.count(), log_count)
     self.assertEqual(
         DataSet.objects.get(pk=dataset.id).number_of_downloads,
         download_count)
Esempio n. 8
0
def test_link_data_cut_doesnt_have_fields(client):
    ds = factories.DataSetFactory.create(published=True)
    factories.SourceLinkFactory(dataset=ds)

    response = client.get(ds.get_absolute_url())

    assert response.status_code == 200
    assert response.context["fields"] is None
def test_link_data_cut_doesnt_have_preview(access_type, client):
    data_cut = factories.DataSetFactory(user_access_type=access_type,
                                        published=True)
    factories.SourceLinkFactory(dataset=data_cut)

    response = client.get(data_cut.get_absolute_url())

    assert response.status_code == 200
Esempio n. 10
0
 def test_dataset_detail_view_published(self):
     group = factories.DataGroupingFactory.create()
     factories.DataSetFactory.create()
     ds = factories.DataSetFactory.create(grouping=group, published=True)
     sl1 = factories.SourceLinkFactory(dataset=ds)
     sl2 = factories.SourceLinkFactory(dataset=ds)
     response = self._authenticated_get(
         reverse(
             'catalogue:dataset_fullpath',
             kwargs={
                 'group_slug': group.slug,
                 'set_slug': ds.slug
             },
         ))
     self.assertEqual(response.status_code, 200)
     self.assertContains(response, ds.name)
     self.assertContains(response, sl1.name, 1)
     self.assertContains(response, sl2.name, 1)
def test_link_data_cut_doesnt_have_preview(client):
    ds = factories.DataSetFactory(user_access_type='REQUIRES_AUTHENTICATION',
                                  published=True)
    factories.SourceLinkFactory(dataset=ds)

    response = client.get(ds.get_absolute_url())

    assert response.status_code == 200
    assert 'No preview available' in response.rendered_content
Esempio n. 12
0
 def test_failed_reading_from_s3(self, mock_client):
     link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.csv")
     mock_client().head_object.side_effect = [
         botocore.exceptions.ClientError(
             error_response={"Error": {
                 "Message": "it failed"
             }},
             operation_name="head_object",
         )
     ]
     assert link.get_preview_data() == (None, [])
Esempio n. 13
0
def test_dataset_has_request_access_link(client, eligibility_criteria,
                                         view_name):
    ds = factories.DataSetFactory.create(
        eligibility_criteria=eligibility_criteria, published=True)

    factories.SourceLinkFactory(dataset=ds)

    response = client.get(ds.get_absolute_url())

    request_access_url = reverse(view_name, args=[ds.id])

    assert response.status_code == 200
    assert request_access_url in str(response.content)
Esempio n. 14
0
def test_link_data_cut_doesnt_have_fields(client):
    ds = factories.DataSetFactory.create(published=True)
    factories.SourceLinkFactory(dataset=ds)

    response = client.get(
        reverse(
            'catalogue:dataset_fullpath',
            kwargs={
                'group_slug': ds.grouping.slug,
                'set_slug': ds.slug
            },
        ))

    assert response.status_code == 200
    assert response.context["fields"] is None
Esempio n. 15
0
 def test_preview_csv(self, mock_client):
     link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.csv")
     mock_client().head_object.return_value = {"ContentType": "text/csv"}
     csv_content = b"col1,col2\nrow1-col1, row1-col2\nrow2-col1, row2-col2\ntrailing"
     mock_client().get_object.return_value = {
         "ContentType": "text/plain",
         "ContentLength": len(csv_content),
         "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)),
     }
     assert link.get_preview_data() == (
         ["col1", "col2"],
         [
             OrderedDict([("col1", "row1-col1"), ("col2", " row1-col2")]),
             OrderedDict([("col1", "row2-col1"), ("col2", " row2-col2")]),
         ],
     )
Esempio n. 16
0
def test_dataset_has_request_access_link(client, eligibility_criteria, view_name):
    group = factories.DataGroupingFactory.create()
    ds = factories.DataSetFactory.create(
        grouping=group, eligibility_criteria=eligibility_criteria, published=True
    )

    factories.SourceLinkFactory(dataset=ds)

    response = client.get(
        reverse(
            'catalogue:dataset_fullpath',
            kwargs={'group_slug': group.slug, 'set_slug': ds.slug},
        )
    )

    request_access_url = reverse(view_name, args=[group.slug, ds.slug])

    assert response.status_code == 200
    assert request_access_url in str(response.content)
Esempio n. 17
0
 def test_download_local_file(self, mock_client):
     group = factories.DataGroupingFactory.create()
     dataset = factories.DataSetFactory.create(
         grouping=group,
         published=True,
         user_access_type='REQUIRES_AUTHENTICATION')
     link = factories.SourceLinkFactory(
         id='158776ec-5c40-4c58-ba7c-a3425905ec45',
         dataset=dataset,
         link_type=SourceLink.TYPE_LOCAL,
         url='s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     mock_client().get_object.return_value = {
         'ContentType':
         'text/plain',
         'Body':
         StreamingBody(io.BytesIO(b'This is a test file'),
                       len(b'This is a test file')),
     }
     response = self._authenticated_get(
         reverse(
             'catalogue:dataset_source_link_download',
             kwargs={
                 'group_slug': group.slug,
                 'set_slug': dataset.slug,
                 'source_link_id': link.id,
             },
         ))
     self.assertEqual(response.status_code, 200)
     self.assertEqual(
         list(response.streaming_content)[0], b'This is a test file')
     mock_client().get_object.assert_called_with(
         Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url)
     self.assertEqual(EventLog.objects.count(), log_count + 1)
     self.assertEqual(
         EventLog.objects.latest().event_type,
         EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD,
     )
     self.assertEqual(
         DataSet.objects.get(pk=dataset.id).number_of_downloads,
         download_count + 1)
Esempio n. 18
0
 def test_forbidden_dataset(self, client):
     dataset = factories.DataSetFactory.create(
         published=True, user_access_type='REQUIRES_AUTHORIZATION'
     )
     link = factories.SourceLinkFactory(
         id='158776ec-5c40-4c58-ba7c-a3425905ec45',
         dataset=dataset,
         link_type=SourceLink.TYPE_EXTERNAL,
         url='http://example.com',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     response = client.get(
         reverse(
             'datasets:dataset_source_link_download',
             kwargs={'dataset_uuid': dataset.id, 'source_link_id': link.id},
         )
     )
     assert response.status_code == 403
     assert EventLog.objects.count() == log_count
     assert DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count
Esempio n. 19
0
def test_clone_dataset_copies_related_objects(db):
    ds = factories.DataSetFactory.create(published=True)

    factories.DataSetUserPermissionFactory(dataset=ds)
    factories.SourceLinkFactory(dataset=ds)
    factories.SourceViewFactory(dataset=ds)
    factories.SourceTableFactory(dataset=ds)
    factories.CustomDatasetQueryFactory(dataset=ds)

    clone = ds.clone()

    assert not clone.datasetuserpermission_set.all()
    assert [obj.dataset for obj in clone.sourcelink_set.all()] == [clone]
    assert [obj.dataset for obj in clone.sourceview_set.all()] == [clone]
    assert [obj.dataset for obj in clone.sourcetable_set.all()] == [clone]
    assert [obj.dataset for obj in clone.customdatasetquery_set.all()] == [clone]

    assert ds.datasetuserpermission_set.all()
    assert [obj.dataset for obj in ds.sourcelink_set.all()] == [ds]
    assert [obj.dataset for obj in ds.sourceview_set.all()] == [ds]
    assert [obj.dataset for obj in ds.sourcetable_set.all()] == [ds]
    assert [obj.dataset for obj in ds.customdatasetquery_set.all()] == [ds]
 def test_download_local_file(self, mock_client, request_client, published,
                              access_type):
     dataset = factories.DataSetFactory.create(published=published,
                                               user_access_type=access_type)
     link = factories.SourceLinkFactory(
         id="158776ec-5c40-4c58-ba7c-a3425905ec45",
         dataset=dataset,
         link_type=SourceLink.TYPE_LOCAL,
         url="s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt",
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     mock_client().get_object.return_value = {
         "ContentType":
         "text/plain",
         "ContentLength":
         len(b"This is a test file"),
         "Body":
         StreamingBody(io.BytesIO(b"This is a test file"),
                       len(b"This is a test file")),
     }
     response = request_client.get(
         reverse(
             "datasets:dataset_source_link_download",
             kwargs={
                 "dataset_uuid": dataset.id,
                 "source_link_id": link.id
             },
         ))
     assert response.status_code == 200
     assert list(response.streaming_content)[0] == b"This is a test file"
     assert response["content-length"] == str(len(b"This is a test file"))
     mock_client().get_object.assert_called_with(
         Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url)
     assert EventLog.objects.count() == log_count + 1
     assert EventLog.objects.latest(
     ).event_type == EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD
     assert DataSet.objects.get(
         pk=dataset.id).number_of_downloads == download_count + 1
Esempio n. 21
0
 def test_authorised_link(self, access_type, client, mocker):
     dataset = factories.DataSetFactory(user_access_type=access_type)
     link = factories.SourceLinkFactory(
         id="158776ec-5c40-4c58-ba7c-a3425905ec45",
         dataset=dataset,
         url="s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.csv",
     )
     mock_client = mocker.patch(
         "dataworkspace.apps.core.boto3_client.boto3.client")
     mock_client().head_object.return_value = {"ContentType": "text/csv"}
     csv_content = b"header1,header2\nrow1 col1, row1 col2\nrow2 col1, row2 col2\n"
     mock_client().get_object.return_value = {
         "ContentType": "text/plain",
         "ContentLength": len(csv_content),
         "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)),
     }
     response = client.get(
         reverse("datasets:data_cut_source_link_preview",
                 args=(dataset.id, link.id)))
     assert response.status_code == 200
     content = response.content.decode("utf-8")
     assert ("<thead>"
             '<tr class="govuk-table__row">'
             '<th class="govuk-table__header">header1</th>'
             '<th class="govuk-table__header">header2</th>'
             "</tr>"
             "</thead><tbody>"
             '<tr class="govuk-table__row">'
             '<td class="govuk-table__cell">row1 col1</td>'
             '<td class="govuk-table__cell">row1 col2</td>'
             "</tr>"
             '<tr class="govuk-table__row">'
             '<td class="govuk-table__cell">row2 col1</td>'
             '<td class="govuk-table__cell">row2 col2</td>'
             "</tr></tbody>") in "".join(
                 [s.strip() for s in content.splitlines() if s.strip()])
     assert "Showing <strong>2</strong> records." in content
     assert "Download as CSV" in content
Esempio n. 22
0
 def test_file_not_csv(self, mock_client):
     link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.txt")
     mock_client().head_object.return_value = {"ContentType": "text/csv"}
     assert link.get_preview_data() == (None, [])
Esempio n. 23
0
 def test_not_s3_link(self):
     link = factories.SourceLinkFactory(url="http://example.com/a-file.csv")
     assert link.get_preview_data() == (None, [])
Esempio n. 24
0
 def test_unauthorised_link(self, client):
     link = factories.SourceLinkFactory()
     response = client.get(
         reverse("datasets:data_cut_source_link_preview",
                 args=(link.dataset.id, link.id)))
     assert response.status_code == 403