def test_source_link_filename(db): ds1 = factories.DataSetFactory( reference_code=factories.DatasetReferenceCodeFactory(code="DW")) source1 = factories.SourceLinkFactory( dataset=ds1, name="A test source", url="s3://csv-pipelines/my-data.csv.zip", link_type=SourceLink.TYPE_LOCAL, ) assert source1.get_filename() == "DW00001-a-test-source.zip" ds2 = factories.DataSetFactory() source2 = factories.SourceLinkFactory( dataset=ds2, name="A test source", url="s3://csv-pipelines/my-data.csv", link_type=SourceLink.TYPE_LOCAL, ) assert source2.get_filename() == "a-test-source.csv" ds3 = factories.DataSetFactory() source3 = factories.SourceLinkFactory( dataset=ds3, name="A test source", url="http://www.google.com/index.html", link_type=SourceLink.TYPE_EXTERNAL, ) assert source3.get_filename() == "a-test-source.csv"
def test_dataset_source_filename(db, factory): ds1 = factories.DataSetFactory( reference_code=factories.DatasetReferenceCodeFactory(code="DW")) source1 = factory(dataset=ds1, name="A test source") assert source1.get_filename() == "DW00001-a-test-source.csv" ds2 = factories.DataSetFactory() source2 = factory(dataset=ds2, name="A test source") assert source2.get_filename() == "a-test-source.csv"
def test_materialized_view_download(self, request_client, published): dsn = database_dsn(settings.DATABASES_DATA['my_database']) with psycopg2.connect(dsn) as conn, conn.cursor() as cursor: cursor.execute(''' CREATE TABLE if not exists materialized_test_table (field2 int,field1 varchar(255)); TRUNCATE TABLE materialized_test_table; INSERT INTO materialized_test_table VALUES(1, 'record1'); INSERT INTO materialized_test_table VALUES(2, 'record2'); DROP MATERIALIZED VIEW IF EXISTS materialized_test_view; CREATE MATERIALIZED VIEW materialized_test_view AS SELECT * FROM materialized_test_table; ''') dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHENTICATION', published=published) source_view = factories.SourceViewFactory( dataset=dataset, database=factories.DatabaseFactory(memorable_name='my_database'), schema='public', view='materialized_test_view', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = request_client.get(source_view.get_absolute_url()) assert response.status_code == 200 assert ( b''.join(response.streaming_content) == b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n' ) assert EventLog.objects.count() == log_count + 1 assert (EventLog.objects.latest().event_type == EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD) assert (DataSet.objects.get( pk=dataset.id).number_of_downloads == download_count + 1)
def test_view_download(self): dsn = database_dsn(settings.DATABASES_DATA['my_database']) with connect(dsn) as conn, conn.cursor() as cursor: cursor.execute(''' CREATE TABLE if not exists download_test_table (field2 int,field1 varchar(255)); TRUNCATE TABLE download_test_table; INSERT INTO download_test_table VALUES(1, 'record1'); INSERT INTO download_test_table VALUES(2, 'record2'); CREATE OR REPLACE VIEW download_test_view AS SELECT * FROM download_test_table; ''') dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHENTICATION') source_view = factories.SourceViewFactory( dataset=dataset, database=factories.DatabaseFactory(memorable_name='my_database'), schema='public', view='download_test_view', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = self._authenticated_get(source_view.get_absolute_url()) self.assertEqual(response.status_code, 200) self.assertEqual( b''.join(response.streaming_content), b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n', ) self.assertEqual(EventLog.objects.count(), log_count + 1) self.assertEqual( EventLog.objects.latest().event_type, EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD, ) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count + 1)
def test_authorised_query(self, access_type, client, test_db): dataset = factories.DataSetFactory(type=DataSetType.MASTER, user_access_type=access_type) query = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query="SELECT 1 as a, 2 as b", reviewed=True, ) response = client.get( reverse("datasets:data_cut_query_preview", args=(dataset.id, query.id))) assert response.status_code == 200 content = response.content.decode("utf-8") assert ("<thead>" '<tr class="govuk-table__row">' '<th class="govuk-table__header">a</th>' '<th class="govuk-table__header">b</th>' "</tr>" "</thead><tbody>" '<tr class="govuk-table__row">' '<td class="govuk-table__cell">1</td>' '<td class="govuk-table__cell">2</td>' "</tr></tbody>") in "".join( [s.strip() for s in content.splitlines() if s.strip()]) assert "Showing <strong>1</strong> record." in content assert "Download as CSV" in content
def test_preview_valid_datacut(self, access_type, client, test_db): dataset = factories.DataSetFactory( type=DataSetType.DATACUT, user_access_type=access_type, ) # Check if sample data shown correctly query1 = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query="SELECT 1 as a, 2 as b", ) response = client.get( reverse( "datasets:dataset_query_preview", kwargs={ "dataset_uuid": dataset.id, "query_id": query1.id }, )) response_content = response.content.decode(response.charset) html = "".join( [s.strip() for s in response_content.splitlines() if s.strip()]) assert response.status_code == 200 assert "<li>a</li><li>b</li>" in html # check fields assert ("<thead>" '<tr class="govuk-table__row">' '<th class="govuk-table__header ref-data-col-">a</th>' '<th class="govuk-table__header ref-data-col-">b</th>' "</tr>" "</thead>" "<tbody>" '<tr class="govuk-table__row">' '<td class="govuk-table__cell">1</td>' '<td class="govuk-table__cell">2</td>' "</tr>" "</tbody>") in html # check sample data assert "Showing all rows from data." in html assert "Download" in html # check download button available # Check if sample limited to 20 random rows if more data available preview_rows = settings.DATASET_PREVIEW_NUM_OF_ROWS query2 = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query=f"SELECT * FROM generate_series(1, {preview_rows * 2}) as a;", ) response = client.get( reverse( "datasets:dataset_query_preview", kwargs={ "dataset_uuid": dataset.id, "query_id": query2.id }, )) response_content = response.content.decode(response.charset) assert ( f"Showing <strong>{preview_rows}</strong> random rows from data." in response_content)
def _create_query(self, sql, reviewed=True, published=True, data_grid_enabled=False): with psycopg2.connect( self._get_dsn()) as conn, conn.cursor() as cursor: cursor.execute(""" CREATE TABLE IF NOT EXISTS custom_query_test ( id INT, name VARCHAR(255), date DATE ); TRUNCATE TABLE custom_query_test; INSERT INTO custom_query_test VALUES(1, 'the first record', NULL); INSERT INTO custom_query_test VALUES(2, 'the second record', '2019-01-01'); INSERT INTO custom_query_test VALUES(3, 'the last record', NULL); """) dataset = factories.DataSetFactory( user_access_type=UserAccessType.REQUIRES_AUTHENTICATION, published=published) return factories.CustomDatasetQueryFactory( dataset=dataset, database=self._get_database(), query=sql, reviewed=reviewed, data_grid_enabled=data_grid_enabled, )
def test_view_download(self, access_type, request_client, published): dsn = database_dsn(settings.DATABASES_DATA["my_database"]) with psycopg2.connect(dsn) as conn, conn.cursor() as cursor: cursor.execute(""" CREATE TABLE if not exists download_test_table (field2 int,field1 varchar(255)); TRUNCATE TABLE download_test_table; INSERT INTO download_test_table VALUES(1, 'record1'); INSERT INTO download_test_table VALUES(2, 'record2'); CREATE OR REPLACE VIEW download_test_view AS SELECT * FROM download_test_table; """) dataset = factories.DataSetFactory(user_access_type=access_type, published=published) source_view = factories.SourceViewFactory( dataset=dataset, database=factories.DatabaseFactory(memorable_name="my_database"), schema="public", view="download_test_view", ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = request_client.get(source_view.get_absolute_url()) assert response.status_code == 200 assert ( b"".join(response.streaming_content) == b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n' ) assert EventLog.objects.count() == log_count + 1 assert EventLog.objects.latest( ).event_type == EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD assert DataSet.objects.get( pk=dataset.id).number_of_downloads == download_count + 1
def test_preview_valid_datacut(self, client, test_db): dataset = factories.DataSetFactory( type=DataSet.TYPE_DATA_CUT, user_access_type='REQUIRES_AUTHENTICATION', ) # Check if sample data shown correctly query1 = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query='SELECT 1 as a, 2 as b', ) response = client.get( reverse( 'datasets:dataset_query_preview', kwargs={ 'dataset_uuid': dataset.id, 'query_id': query1.id }, )) response_content = response.content.decode(response.charset) html = ''.join( [s.strip() for s in response_content.splitlines() if s.strip()]) assert response.status_code == 200 assert '<li>a</li><li>b</li>' in html # check fields assert ('<thead>' '<tr class="govuk-table__row">' '<th class="govuk-table__header ref-data-col-">a</th>' '<th class="govuk-table__header ref-data-col-">b</th>' '</tr>' '</thead>' '<tbody>' '<tr class="govuk-table__row">' '<td class="govuk-table__cell">1</td>' '<td class="govuk-table__cell">2</td>' '</tr>' '</tbody>') in html # check sample data assert 'Showing all rows from data.' in html assert 'Download' in html # check download button available # Check if sample limited to 20 random rows if more data available preview_rows = settings.DATASET_PREVIEW_NUM_OF_ROWS query2 = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query=f'SELECT * FROM generate_series(1, {preview_rows * 2}) as a;', ) response = client.get( reverse( 'datasets:dataset_query_preview', kwargs={ 'dataset_uuid': dataset.id, 'query_id': query2.id }, )) response_content = response.content.decode(response.charset) assert ( f'Showing <strong>{preview_rows}</strong> random rows from data.' in response_content)
def test_link_data_cut_doesnt_have_preview(access_type, client): data_cut = factories.DataSetFactory(user_access_type=access_type, published=True) factories.SourceLinkFactory(dataset=data_cut) response = client.get(data_cut.get_absolute_url()) assert response.status_code == 200
def test_link_data_cut_doesnt_have_preview(client): ds = factories.DataSetFactory(user_access_type='REQUIRES_AUTHENTICATION', published=True) factories.SourceLinkFactory(dataset=ds) response = client.get(ds.get_absolute_url()) assert response.status_code == 200 assert 'No preview available' in response.rendered_content
def test_forbidden_dataset(self, client): dataset = factories.DataSetFactory(user_access_type='REQUIRES_AUTHORIZATION') source_view = factories.SourceViewFactory(dataset=dataset) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = client.get(source_view.get_absolute_url()) assert response.status_code == 403 assert EventLog.objects.count() == log_count assert DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count
def test_custom_query_data_last_updated(metadata_db): dataset = factories.DataSetFactory() # Ensure the earliest "last updated" date is returned when # there are multiple tables in the query query = factories.CustomDatasetQueryFactory( dataset=dataset, database=metadata_db, query="select * from table1 join table2 on 1=1", ) factories.CustomDatasetQueryTableFactory(query=query, schema="public", table="table1") factories.CustomDatasetQueryTableFactory(query=query, schema="public", table="table2") assert query.get_data_last_updated_date() == datetime(2020, 9, 1, 0, 1, 0, tzinfo=UTC) # Ensure a single table returns the last update date query = factories.CustomDatasetQueryFactory( dataset=dataset, database=metadata_db, query="select * from table1", ) factories.CustomDatasetQueryTableFactory(query=query, schema="public", table="table1") assert query.get_data_last_updated_date() == datetime(2020, 9, 2, 0, 1, 0, tzinfo=UTC) # Ensure None is returned if we don't have any metadata for the tables query = factories.CustomDatasetQueryFactory( dataset=dataset, database=metadata_db, query="select * from table3", ) assert query.get_data_last_updated_date() is None # Ensure None is returned if the last updated date is null query = factories.CustomDatasetQueryFactory( dataset=dataset, database=metadata_db, query="select * from table4", ) assert query.get_data_last_updated_date() is None
def test_missing_view(self, client): dataset = factories.DataSetFactory(user_access_type='REQUIRES_AUTHENTICATION') source_view = factories.SourceViewFactory( dataset=dataset, database=factories.DatabaseFactory(memorable_name='my_database'), ) download_count = dataset.number_of_downloads response = client.get(source_view.get_absolute_url()) assert response.status_code == 404 assert DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count
def test_source_table_data_last_updated(metadata_db): dataset = factories.DataSetFactory() table = factories.SourceTableFactory( dataset=dataset, database=metadata_db, schema='public', table='table1' ) assert table.get_data_last_updated_date() == datetime(2020, 9, 2, 0, 1, 0) table = factories.SourceTableFactory( dataset=dataset, database=metadata_db, schema='public', table='doesntexist' ) assert table.get_data_last_updated_date() is None
def test_forbidden_dataset(self): dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHORIZATION') source_view = factories.SourceViewFactory(dataset=dataset) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = self._authenticated_get(source_view.get_absolute_url()) self.assertEqual(response.status_code, 403) self.assertEqual(EventLog.objects.count(), log_count) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count)
def test_homepage_group_list(self): group_with_published_dataset = factories.DataGroupingFactory.create() factories.DataSetFactory(grouping=group_with_published_dataset, published=True) group_with_unpublished_dataset = factories.DataGroupingFactory.create() factories.DataSetFactory(grouping=group_with_unpublished_dataset, published=False) empty_group = factories.DataGroupingFactory.create() deleted_group = factories.DataGroupingFactory.create() deleted_group.delete() response = self._authenticated_get(reverse('root')) self.assertEqual(response.status_code, 200) self.assertContains(response, group_with_published_dataset.name, 1) self.assertNotContains(response, group_with_unpublished_dataset.name) self.assertNotContains(response, empty_group.name) # Do not show deleted groups self.assertNotContains(response, deleted_group.name)
def test_missing_view(self): dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHENTICATION') source_view = factories.SourceViewFactory( dataset=dataset, database=factories.DatabaseFactory(memorable_name='my_database'), ) download_count = dataset.number_of_downloads response = self._authenticated_get(source_view.get_absolute_url()) self.assertEqual(response.status_code, 404) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count)
def test_forbidden_dataset(self, client): dataset = factories.DataSetFactory(user_access_type='REQUIRES_AUTHORIZATION') query = factories.CustomDatasetQueryFactory( dataset=dataset, database=self._get_database(), query='SELECT * FROM a_table', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = client.get(query.get_absolute_url()) assert response.status_code == 403 assert EventLog.objects.count() == log_count assert DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count
def test_forbidden_dataset(self): dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHORIZATION') query = factories.CustomDatasetQueryFactory( dataset=dataset, database=self.database, query='SELECT * FROM a_table') log_count = EventLog.objects.count() download_count = dataset.number_of_downloads response = self._authenticated_get(query.get_absolute_url()) self.assertEqual(response.status_code, 403) self.assertEqual(EventLog.objects.count(), log_count) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count)
def test_unauthorised_query(self, client, test_db): dataset = factories.DataSetFactory( type=DataSetType.MASTER, user_access_type=UserAccessType.REQUIRES_AUTHORIZATION, ) query = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query="SELECT 1 as a, 2 as b", reviewed=False, ) response = client.get( reverse("datasets:data_cut_query_preview", args=(dataset.id, query.id))) assert response.status_code == 403
def _preview_unreviewed_datacut(self, client, dataset_db): dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHENTICATION', published=True) sql = 'SELECT 1 as a, 2 as b' query = factories.CustomDatasetQueryFactory(dataset=dataset, database=dataset_db, query=sql, reviewed=False) return client.get( reverse( 'datasets:dataset_query_preview', kwargs={ 'dataset_uuid': dataset.id, 'query_id': query.id }, ))
def test_preview_forbidden_datacut(self, client, dataset_db): dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHORIZATION') query = factories.CustomDatasetQueryFactory( dataset=dataset, database=dataset_db, query='SELECT * FROM a_table', ) response = client.get( reverse( 'datasets:dataset_query_preview', kwargs={ 'dataset_uuid': dataset.id, 'query_id': query.id }, )) assert response.status_code == 403
def test_source_table_data_last_updated(metadata_db): dataset = factories.DataSetFactory() table = factories.SourceTableFactory(dataset=dataset, database=metadata_db, schema="public", table="table1") assert table.get_data_last_updated_date() == datetime(2020, 9, 2, 0, 1, 0, tzinfo=UTC) table = factories.SourceTableFactory(dataset=dataset, database=metadata_db, schema="public", table="doesntexist") assert table.get_data_last_updated_date() is None
def _preview_unreviewed_datacut(self, client, test_db): dataset = factories.DataSetFactory( type=DataSetType.DATACUT, user_access_type=UserAccessType.REQUIRES_AUTHENTICATION, published=True, ) sql = "SELECT 1 as a, 2 as b" query = factories.CustomDatasetQueryFactory(dataset=dataset, database=test_db, query=sql, reviewed=False) return client.get( reverse( "datasets:dataset_query_preview", kwargs={ "dataset_uuid": dataset.id, "query_id": query.id }, ))
def test_preview_forbidden_datacut(self, client, test_db): dataset = factories.DataSetFactory( type=DataSetType.DATACUT, user_access_type=UserAccessType.REQUIRES_AUTHORIZATION, ) query = factories.CustomDatasetQueryFactory( dataset=dataset, database=test_db, query="SELECT * FROM a_table", ) response = client.get( reverse( "datasets:dataset_query_preview", kwargs={ "dataset_uuid": dataset.id, "query_id": query.id }, )) assert response.status_code == 403
def test_preview_invalid_datacut(self, client, dataset_db): dataset = factories.DataSetFactory( user_access_type='REQUIRES_AUTHENTICATION') query = factories.CustomDatasetQueryFactory( dataset=dataset, database=dataset_db, query='SELECT * FROM invalid_table', ) response = client.get( reverse( 'datasets:dataset_query_preview', kwargs={ 'dataset_uuid': dataset.id, 'query_id': query.id }, )) response_content = response.content.decode(response.charset) assert 'Data Fields' not in response_content assert 'No data available' in response_content assert 'Download' not in response_content
def test_preview_forbidden_master_dataset(self, client, test_db): dataset = factories.DataSetFactory( type=DataSet.TYPE_MASTER_DATASET, user_access_type='REQUIRES_AUTHORIZATION') source_table = factories.SourceTableFactory( dataset=dataset, name='source_table1', database=test_db, schema='public', table='test_table', ) response = client.get( reverse( 'datasets:dataset_table_preview', kwargs={ 'dataset_uuid': dataset.id, 'table_uuid': source_table.id }, )) assert response.status_code == 403
def test_preview_table(self, client, test_db): dataset = factories.DataSetFactory( type=DataSetType.MASTER, user_access_type=UserAccessType.REQUIRES_AUTHENTICATION, ) # Check if sample data shown correctly source_table = factories.SourceTableFactory( dataset=dataset, name="source_table1", database=test_db, schema="public", table="test_table", ) response = client.get( reverse( "datasets:dataset_table_preview", kwargs={ "dataset_uuid": dataset.id, "table_uuid": source_table.id }, )) response_content = response.content.decode(response.charset) html = "".join( [s.strip() for s in response_content.splitlines() if s.strip()]) assert response.status_code == 200 assert "<li>a</li><li>b</li>" in html # check fields assert ("<thead>" '<tr class="govuk-table__row">' '<th class="govuk-table__header ref-data-col-">a</th>' '<th class="govuk-table__header ref-data-col-">b</th>' "</tr>" "</thead>" "<tbody>" '<tr class="govuk-table__row">' '<td class="govuk-table__cell">1</td>' '<td class="govuk-table__cell">2</td>' "</tr>" "</tbody>") in html # check sample data assert "Showing all rows from data." in html assert "Download" not in html # check download button available
def test_preview_forbidden_master_dataset(self, client, test_db): dataset = factories.DataSetFactory( type=DataSetType.MASTER, user_access_type=UserAccessType.REQUIRES_AUTHORIZATION, ) source_table = factories.SourceTableFactory( dataset=dataset, name="source_table1", database=test_db, schema="public", table="test_table", ) response = client.get( reverse( "datasets:dataset_table_preview", kwargs={ "dataset_uuid": dataset.id, "table_uuid": source_table.id }, )) assert response.status_code == 403