def test_DODataset_name(api_key_auth_client_usr): bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr) name = 'fake-name' result = bq_user_dataset.name(name) assert bq_user_dataset == result assert bq_user_dataset._name == name
def test_can_download_to_dataframe(mocker, api_key_auth_client_usr): # mock fake_response = ResponseMock(StringIO(CSV_SAMPLE_REDUCED)) mocker.patch.object(APIKeyAuthClient, 'send', return_value=fake_response) bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr) # test result = bq_user_dataset.name( 'census_tracts_american_samoa').download_stream() assert isinstance(result, ResponseStream)
def test_can_upload_from_file_object(mocker, api_key_auth_client_usr): # mock fake_response = ResponseMock() mocker.patch.object(APIKeyAuthClient, 'send', return_value=fake_response) bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr) # test unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace('-', '_') file_object = StringIO(CSV_SAMPLE_REDUCED) result = bq_user_dataset.name(unique_table_name).upload_file_object( file_object) assert result == fake_response
def test_can_import_a_dataset(mocker, api_key_auth_client_usr): # mock fake_response = ResponseMock({'item_queue_id': '123'}) mocker.patch.object(APIKeyAuthClient, 'send', return_value=fake_response) bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr) # test unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace('-', '_') file_object = StringIO(CSV_SAMPLE_REDUCED) dataset = bq_user_dataset.name(unique_table_name) \ .column(name='id', type='INT64') \ .column('geom', 'GEOMETRY') \ .ttl_seconds(30) dataset.create() dataset.upload_file_object(file_object) job = dataset.import_dataset() assert isinstance(job, DODatasetJob)
class TestDODataset(unittest.TestCase): """This test suite needs the ENV variable USERURL pointing to a working DO API in "tests/e2e/secret.json". DO API must have the user/apikey mapping set to get access to the user's DO Project in GCP. """ def setUp(self): if os.environ.get('APIKEY') and os.environ.get( 'USERNAME') and os.environ.get('USERURL'): self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] self.base_url = os.environ['USERURL'] else: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] self.base_url = creds['USERURL'] credentials = Credentials(username=self.username, api_key=self.apikey, base_url=self.base_url) auth_client = credentials.get_api_key_auth_client() self.do_dataset = DODataset(auth_client=auth_client) def test_can_upload_from_dataframe(self): sample = StringIO(CSV_SAMPLE_REDUCED) df = pandas.read_csv(sample) unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') self.do_dataset.name(unique_table_name).upload(df) def test_can_upload_from_file_object(self): unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') file_object = StringIO(CSV_SAMPLE_REDUCED) self.do_dataset.name(unique_table_name).upload_file_object(file_object) def test_can_import_a_dataset(self): unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') file_object = StringIO(CSV_SAMPLE_REDUCED) dataset = self.do_dataset.name(unique_table_name) \ .column(name='id', type='INT64') \ .column('geom', 'GEOMETRY') \ .ttl_seconds(30) dataset.create() dataset.upload_file_object(file_object) job = dataset.import_dataset() self.assertIsInstance(job, DODatasetJob) def test_can_get_status_from_import(self): unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') file_object = StringIO(CSV_SAMPLE_REDUCED) dataset = self.do_dataset.name(unique_table_name) \ .column(name='id', type='INT64') \ .column('geom', 'GEOMETRY') \ .ttl_seconds(30) dataset.create() dataset.upload_file_object(file_object) job = dataset.import_dataset() status = job.status() self.assertIn( status, ['pending', 'running', 'cancelled', 'success', 'failure']) def test_can_wait_for_job_completion(self): unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') file_object = StringIO(CSV_SAMPLE_REDUCED) dataset = self.do_dataset.name(unique_table_name) \ .column(name='id', type='INT64') \ .column('geom', 'GEOMETRY') \ .ttl_seconds(30) dataset.create() dataset.upload_file_object(file_object, geom_column='geom') job = dataset.import_dataset() status = job.result() self.assertIn(status, ['success']) def test_can_upload_a_dataframe_and_wait_for_completion(self): unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') sample = StringIO(CSV_SAMPLE_REDUCED) df = pandas.read_csv(sample) dataset = self.do_dataset.name(unique_table_name) \ .column(name='id', type='INT64') \ .column('geom', 'GEOMETRY') \ .ttl_seconds(30) dataset.create() status = dataset.upload_dataframe(df, geom_column='geom') self.assertIn(status, ['success']) def test_can_download_to_dataframe(self): result = self.do_dataset.name( 'census_tracts_american_samoa').download_stream() df = pandas.read_csv(result) self.assertEqual(df.shape, (18, 13)) # do some checks on the contents sample = pandas.DataFrame(df.head(), columns=('state_fips_code', 'county_fips_code', 'geo_id', 'tract_name', 'internal_point_geo')) sample['internal_point_geo'] = df['internal_point_geo'].apply( wkt.loads) geosample = geopandas.GeoDataFrame(sample, geometry='internal_point_geo') self.assertEqual(geosample.to_csv(index=False), EXPECTED_CSV_SAMPLE) def test_creation_of_dataset(self): unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') dataset = self.do_dataset.name(unique_table_name) \ .column(name='cartodb_id', type='INT64') \ .column('the_geom', 'GEOMETRY') \ .ttl_seconds(30) dataset.create() # do a quick check on the resulting table result = dataset.download_stream() df = pandas.read_csv(result) self.assertEqual(df.shape, (0, 2)) self.assertEqual(df.to_csv(index=False), 'cartodb_id,the_geom\n') def test_points_enrichment_dataset(self): variable_slug = 'poverty_a86da569' variable_column_name = 'poverty' unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') gdf = read_file( file_path('../observatory/enrichment/files/points.geojson')) gdf[_ENRICHMENT_ID] = range(gdf.shape[0]) gdf[_GEOM_COLUMN] = gdf.geometry gdf = gdf[[_ENRICHMENT_ID, _GEOM_COLUMN]] dataset = self.do_dataset.name(unique_table_name) \ .column(_ENRICHMENT_ID, 'INT64') \ .column(_GEOM_COLUMN, 'GEOMETRY') \ .ttl_seconds(_TTL_IN_SECONDS) dataset.create() status = dataset.upload_dataframe(gdf, geom_column=_GEOM_COLUMN) self.assertIn(status, ['success']) geom_type = GEOM_TYPE_POINTS variables = [variable_slug] output_name = '{}_result'.format(unique_table_name) status = dataset.enrichment(geom_type=geom_type, variables=variables, output_name=output_name) self.assertIn(status, ['success']) result = self.do_dataset.name(output_name).download_stream() result_df = pandas.read_csv(result) self.assertIn(variable_column_name, result_df.columns) def test_polygons_enrichment_dataset(self): variable_slug = 'poverty_a86da569' variable_column_name = 'poverty' unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace( '-', '_') gdf = read_file( file_path('../observatory/enrichment/files/polygon.geojson')) gdf[_ENRICHMENT_ID] = range(gdf.shape[0]) gdf[_GEOM_COLUMN] = gdf.geometry gdf = gdf[[_ENRICHMENT_ID, _GEOM_COLUMN]] dataset = self.do_dataset.name(unique_table_name) \ .column(_ENRICHMENT_ID, 'INT64') \ .column(_GEOM_COLUMN, 'GEOMETRY') \ .ttl_seconds(_TTL_IN_SECONDS) dataset.create() status = dataset.upload_dataframe(gdf, geom_column=_GEOM_COLUMN) self.assertIn(status, ['success']) geom_type = GEOM_TYPE_POLYGONS variables = [variable_slug] output_name = '{}_result'.format(unique_table_name) status = dataset.enrichment(geom_type=geom_type, variables=variables, output_name=output_name) self.assertIn(status, ['success']) result = self.do_dataset.name(output_name).download_stream() df = pandas.read_csv(result) self.assertIn(variable_column_name, df.columns)