def test_describe_type_number(self, mocker): """client.SQLClient.describe type: number""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_DESCRIBE_NUMBER) client = SQLClient(self.credentials) client._get_column_type = lambda t, c: 'number' client.describe('table_name', 'column_name') mock.assert_called_once_with(''' SELECT COUNT(*),AVG(column_name),MIN(column_name),MAX(column_name) FROM table_name; '''.strip())
def setUp(self): if (os.environ.get('APIKEY') is None or os.environ.get('USERNAME') is None): try: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] except Exception: # noqa: E722 warnings.warn("Skipping Context tests. To test it, " "create a `secret.json` file in test/ by " "renaming `secret.json.sample` to `secret.json` " "and updating the credentials to match your " "environment.") self.apikey = None self.username = None else: self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] # sets skip value self.no_credentials = self.apikey is None or self.username is None # table naming info has_mpl = 'mpl' if os.environ.get('MPLBACKEND') else 'nonmpl' pyver = sys.version[0:3].replace('.', '_') buildnum = os.environ.get('TRAVIS_BUILD_NUMBER') or 'none' # Skip tests checking quotas when running in TRAVIS # since usually multiple tests will be running concurrently # in that case self.no_credits = self.no_credentials or buildnum != 'none' self.test_slug = '{ver}_{num}_{mpl}'.format(ver=pyver, num=buildnum, mpl=has_mpl) self.test_tables = [] self.base_url = self.user_url().format(username=self.username) self.credentials = Credentials(self.username, self.apikey, self.base_url) self.sql_client = SQLClient(self.credentials) self.points = [ ['a', '0101000020E610000028B85851837F52C025404D2D5B5F4440'], ['b', '0101000020E610000036B05582C5A10DC0A032FE7DC6354440'], ['c', '0101000020E6100000912C6002B7EE17C0C45A7C0A80AD4240'] ] self.point_lnglat = [['a', -73.99239, 40.74497], ['b', -3.70399, 40.42012], ['c', -5.98312, 37.35547]] self.tearDown()
def test_query_verbose(self, mocker): """client.SQLClient.query verbose""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_SELECT_RESPONSE) output = SQLClient(self.credentials).query('query', verbose=True) assert output == SQL_SELECT_RESPONSE mock.assert_called_once_with('query')
def test_execute(self, mocker): """client.SQLClient.execute""" mock = mocker.patch.object(ContextManager, 'execute_long_running_query', return_value=SQL_BATCH_RESPONSE) output = SQLClient(self.credentials).execute('query') assert output == SQL_BATCH_RESPONSE mock.assert_called_once_with('query')
def test_update_table(self, mocker): """client.SQLClient.update_table""" mock = mocker.patch.object(ContextManager, 'execute_long_running_query') SQLClient(self.credentials).update_table('table_name', 'name', 'b', 'id = 0') mock.assert_called_once_with(''' UPDATE table_name SET name='b' WHERE id = 0; '''.strip())
def test_insert_table(self, mocker): """client.SQLClient.insert_table""" mock = mocker.patch.object(ContextManager, 'execute_long_running_query') SQLClient(self.credentials).insert_table('table_name', ['id', 'name'], [0, 'a']) mock.assert_called_once_with(''' INSERT INTO table_name (id,name) VALUES(0,'a'); '''.strip())
def test_drop_table(self, mocker): """client.SQLClient.drop_table""" mock = mocker.patch.object(ContextManager, 'execute_long_running_query') SQLClient(self.credentials).drop_table('table_name') mock.assert_called_once_with(''' DROP TABLE IF EXISTS table_name; '''.strip())
def test_rename_table(self, mocker): """client.SQLClient.rename_table""" mock = mocker.patch.object(ContextManager, 'execute_long_running_query') SQLClient(self.credentials).rename_table('table_name', 'new_table_name') mock.assert_called_once_with(''' ALTER TABLE table_name RENAME TO new_table_name; '''.strip())
def test_count(self, mocker): """client.SQLClient.count""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_COUNT_RESPONSE) output = SQLClient(self.credentials).count('table_name') assert output == 12345 mock.assert_called_once_with(''' SELECT COUNT(*) FROM table_name; '''.strip())
def test_distinct(self, mocker): """client.SQLClient.distinct""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_DISTINCT_RESPONSE) output = SQLClient(self.credentials).distinct('table_name', 'column_name') assert output == [('A', 1234), ('B', 5678)] mock.assert_called_once_with(''' SELECT column_name, COUNT(*) FROM table_name GROUP BY 1 ORDER BY 2 DESC '''.strip())
def test_query(self, mocker): """client.SQLClient.query""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_SELECT_RESPONSE) output = SQLClient(self.credentials).query('query') assert output == [{ 'column_a': 'A', 'column_b': 123, 'column_c': '0123456789ABCDEF' }] mock.assert_called_once_with('query')
def test_schema(self, mocker): """client.SQLClient.schema""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_SCHEMA_RESPONSE) output = SQLClient(self.credentials).schema('table_name', raw=True) assert output == { 'column_a': 'string', 'column_b': 'number', 'column_c': 'geometry' } mock.assert_called_once_with(''' SELECT * FROM table_name LIMIT 0; '''.strip())
def test_create_table_cartodbfy_public_user(self, mocker): """client.SQLClient.create_table cartodbfy: public user""" mocker.patch.object(ContextManager, 'get_schema', return_value='public') mock = mocker.patch.object(ContextManager, 'execute_long_running_query') SQLClient(self.credentials).create_table( 'table_name', [('id', 'INT'), ('name', 'TEXT')]) mock.assert_called_once_with(''' BEGIN; DROP TABLE IF EXISTS table_name; CREATE TABLE table_name (id INT,name TEXT); SELECT CDB_CartoDBFyTable('public', 'table_name'); COMMIT; '''.strip())
def test_create_table_no_cartodbfy(self, mocker): """client.SQLClient.create_table""" mocker.patch.object(ContextManager, 'get_schema') mock = mocker.patch.object(ContextManager, 'execute_long_running_query') SQLClient(self.credentials).create_table( 'table_name', [('id', 'INT'), ('name', 'TEXT')], cartodbfy=False) mock.assert_called_once_with(''' BEGIN; DROP TABLE IF EXISTS table_name; CREATE TABLE table_name (id INT,name TEXT); ; COMMIT; '''.strip())
def test_bounds(self, mocker): """client.SQLClient.bounds""" mock = mocker.patch.object(ContextManager, 'execute_query', return_value=SQL_BOUNDS_RESPONSE) output = SQLClient(self.credentials).bounds('query') assert output == [ [-16.2500006525, 28.0999760122], [2.65424597028, 43.530016092] ] mock.assert_called_once_with(''' SELECT ARRAY[ ARRAY[st_xmin(geom_env), st_ymin(geom_env)], ARRAY[st_xmax(geom_env), st_ymax(geom_env)] ] bounds FROM ( SELECT ST_Extent(the_geom) geom_env FROM (query) q ) q; '''.strip())
class TestIsolines(unittest.TestCase, _UserUrlLoader, _ReportQuotas): """Tests for cartoframes.data.service.Geocode""" def setUp(self): if (os.environ.get('APIKEY') is None or os.environ.get('USERNAME') is None): try: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] except Exception: # noqa: E722 warnings.warn("Skipping Context tests. To test it, " "create a `secret.json` file in test/ by " "renaming `secret.json.sample` to `secret.json` " "and updating the credentials to match your " "environment.") self.apikey = None self.username = None else: self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] # sets skip value self.no_credentials = self.apikey is None or self.username is None # table naming info has_mpl = 'mpl' if os.environ.get('MPLBACKEND') else 'nonmpl' pyver = sys.version[0:3].replace('.', '_') buildnum = os.environ.get('TRAVIS_BUILD_NUMBER') or 'none' # Skip tests checking quotas when running in TRAVIS # since usually multiple tests will be running concurrently # in that case self.no_credits = self.no_credentials or buildnum != 'none' self.test_slug = '{ver}_{num}_{mpl}'.format(ver=pyver, num=buildnum, mpl=has_mpl) self.test_tables = [] self.base_url = self.user_url().format(username=self.username) self.credentials = Credentials(self.username, self.apikey, self.base_url) self.sql_client = SQLClient(self.credentials) self.points = [ ['a', '0101000020E610000028B85851837F52C025404D2D5B5F4440'], ['b', '0101000020E610000036B05582C5A10DC0A032FE7DC6354440'], ['c', '0101000020E6100000912C6002B7EE17C0C45A7C0A80AD4240'] ] self.point_lnglat = [['a', -73.99239, 40.74497], ['b', -3.70399, 40.42012], ['c', -5.98312, 37.35547]] self.tearDown() def skip(self, if_no_credits=False, if_no_credentials=False): if self.no_credits and if_no_credits: raise unittest.SkipTest( "skipping this test to avoid consuming credits") if self.no_credentials and if_no_credentials: raise unittest.SkipTest("no carto credentials, skipping this test") def get_test_table_name(self, name): n = len(self.test_tables) + 1 table_name = normalize_name('cf_test_table_{name}_{n}_{slug}'.format( name=name, n=n, slug=self.test_slug)) self.test_tables.append(table_name) return table_name def tearDown(self): """restore to original state""" sql_drop = 'DROP TABLE IF EXISTS {};' for table in self.test_tables: try: delete_table(table, credentials=self.credentials) self.sql_client.query(sql_drop.format(table)) except CartoException: warnings.warn('Error deleting tables') def used_quota(self, iso): return TestIsolines.update_quotas('isolines', iso.used_quota()) def points_query(self): point_query = "SELECT '{name}' AS name, '{geom}'::geometry AS the_geom" return ' UNION '.join([ point_query.format(name=name, geom=geom) for name, geom in self.points ]) def test_isochrones_from_dataframe_dataset(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = GeoDataFrame(self.points, columns=['name', 'the_geom']) quota = self.used_quota(iso) # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result, meta = iso.isochrones(gdf, [100, 1000], mode='car', exclusive=True) self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_local()) self.assertEqual(meta.get('required_quota'), 6) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('cartodb_id' in result_columns) self.assertTrue('cartodb_id' in result.dataframe) self.assertTrue('source_id' in result_columns) self.assertTrue('source_id' in result.dataframe) self.assertEqual(result.dataframe['source_id'].min(), gdf.index.min()) self.assertEqual(result.dataframe['source_id'].max(), gdf.index.max()) def test_isochrones_from_dataframe_dataset_as_new_table(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = GeoDataFrame(self.points, columns=['name', 'the_geom']) quota = self.used_quota(iso) table_name = self.get_test_table_name('isodf') # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', table_name=table_name, dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(gdf, [100, 1000], mode='car', table_name=table_name, exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_remote()) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('source_id' in result_columns) def test_isochrones_from_dataframe(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) df = DataFrame(self.points, columns=['name', 'the_geom']) quota = self.used_quota(iso) # Preview result = iso.isochrones(df, [100, 1000], mode='car', dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(df, [100, 1000], mode='car', exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) quota += 6 self.assertEqual(self.used_quota(iso), quota) self.assertTrue('the_geom' in result) self.assertTrue('data_range' in result) self.assertEqual(len(result.index), 6) result_columns = result.columns self.assertTrue('cartodb_id' in result_columns) self.assertTrue('source_id' in result_columns) self.assertEqual(result['source_id'].min(), df.index.min()) self.assertEqual(result['source_id'].max(), df.index.max()) def test_isochrones_from_dataframe_as_new_table(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) df = DataFrame(self.points, columns=['name', 'the_geom']) quota = self.used_quota(iso) table_name = self.get_test_table_name('isodfds') # Preview result = iso.isochrones(df, [100, 1000], mode='car', table_name=table_name, dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(df, [100, 1000], mode='car', table_name=table_name, exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) quota += 6 self.assertEqual(self.used_quota(iso), quota) self.assertTrue('the_geom' in result) self.assertTrue('data_range' in result) self.assertEqual(len(result.index), 6) gdf = read_carto(table_name, credentials=self.credentials) result_columns = gdf.columns self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(gdf.size, 6) self.assertTrue('source_id' in result_columns) def test_isochrones_from_table_dataset(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = GeoDataFrame(self.points, columns=['name', 'the_geom']) table_name = self.get_test_table_name('isotb') to_carto(gdf, table_name=table_name, credentials=self.credentials) quota = self.used_quota(iso) # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(gdf, [100, 1000], mode='car', exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_local()) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('cartodb_id' in result_columns) self.assertTrue('cartodb_id' in result.dataframe) self.assertTrue('source_id' in result_columns) self.assertTrue('source_id' in result.dataframe) def test_isochrones_from_table_dataset_as_new_table(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = GeoDataFrame(self.points, columns=['name', 'the_geom']) table_name = self.get_test_table_name('isotb') to_carto(gdf, table_name=table_name) result_table_name = self.get_test_table_name('isotbr') quota = self.used_quota(iso) # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', table_name=result_table_name, dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(gdf, [100, 1000], mode='car', table_name=result_table_name, exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_remote()) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('cartodb_id' in result_columns) self.assertTrue('source_id' in result_columns) def test_isochrones_from_dataframe_with_lnglat(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) df = DataFrame(self.point_lnglat, columns=['name', 'lng', 'lat']) quota = self.used_quota(iso) # Preview result = iso.isochrones(df, [100, 1000], mode='car', with_lnglat=('lng', 'lat'), dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(df, [100, 1000], mode='car', with_lnglat=('lng', 'lat'), exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) quota += 6 self.assertEqual(self.used_quota(iso), quota) self.assertTrue('the_geom' in result) self.assertTrue('data_range' in result) self.assertEqual(len(result.index), 6) result_columns = result.columns self.assertTrue('cartodb_id' in result_columns) self.assertTrue('source_id' in result_columns) self.assertEqual(result['source_id'].min(), df.index.min()) self.assertEqual(result['source_id'].max(), df.index.max()) def test_isochrones_from_query_dataset(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = read_carto(self.points_query(), credentials=self.credentials) quota = self.used_quota(iso) # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(gdf, [100, 1000], mode='car', exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_local()) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('cartodb_id' in result_columns) self.assertTrue('cartodb_id' in result.dataframe) self.assertFalse('source_id' in result_columns) self.assertFalse('source_id' in result.dataframe) def test_isochrones_from_table_query_as_new_table(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = read_carto(self.points_query()) result_table_name = self.get_test_table_name('isotbr') quota = self.used_quota(iso) # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', table_name=result_table_name, dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(gdf, [100, 1000], mode='car', table_name=result_table_name, exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_remote()) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('cartodb_id' in result_columns) self.assertFalse('source_id' in result_columns) def test_isodistances_from_dataframe(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) df = DataFrame(self.points, columns=['name', 'the_geom']) quota = self.used_quota(iso) # Preview result = iso.isodistances(df, [100, 1000], mode='car', dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isodistances result = iso.isodistances(df, [100, 1000], mode='car', exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) quota += 6 self.assertEqual(self.used_quota(iso), quota) self.assertTrue('the_geom' in result) self.assertTrue('data_range' in result) self.assertEqual(len(result.index), 6) def test_isochrones_from_dataframe_dataset_with_isoline_options(self): self.skip(if_no_credits=True, if_no_credentials=True) iso = Isolines(credentials=self.credentials) gdf = GeoDataFrame(self.points, columns=['name', 'the_geom']) quota = self.used_quota(iso) # Preview result = iso.isochrones(gdf, [100, 1000], mode='car', maxpoints=10, dry_run=True, exclusive=True).metadata self.assertEqual(result.get('required_quota'), 6) self.assertEqual(self.used_quota(iso), quota) # Isochrones result = iso.isochrones(gdf, [100, 1000], mode='car', maxpoints=10, exclusive=True).data self.assertTrue(isinstance(result, GeoDataFrame)) self.assertTrue(result.is_local()) quota += 6 self.assertEqual(self.used_quota(iso), quota) result_columns = result.get_column_names() self.assertTrue('the_geom' in result_columns) self.assertTrue('data_range' in result_columns) self.assertEqual(result.get_num_rows(), 6) self.assertTrue('cartodb_id' in result_columns) self.assertTrue('cartodb_id' in result.dataframe) self.assertTrue('source_id' in result_columns) self.assertTrue('source_id' in result.dataframe)
class TestDataObsClient(unittest.TestCase, _UserUrlLoader): """Tests for cartoframes.client.DataObsClient""" def setUp(self): if (os.environ.get('APIKEY') is None or os.environ.get('USERNAME') is None): try: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] except Exception: warnings.warn("Skipping Context tests. To test it, " "create a `secret.json` file in test/ by " "renaming `secret.json.sample` to `secret.json` " "and updating the credentials to match your " "environment.") self.apikey = None self.username = None else: self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] self.base_url = self.user_url().format(username=self.username) self.credentials = Credentials(self.username, self.apikey, self.base_url) self.sql_client = SQLClient(self.credentials) # table naming info has_mpl = 'mpl' if os.environ.get('MPLBACKEND') else 'nonmpl' pyver = sys.version[0:3].replace('.', '_') buildnum = os.environ.get('TRAVIS_BUILD_NUMBER') test_slug = '{ver}_{num}_{mpl}'.format(ver=pyver, num=buildnum, mpl=has_mpl) # test tables self.test_read_table = 'cb_2013_us_csa_500k' self.valid_columns = set([ 'affgeoid', 'aland', 'awater', 'created_at', 'csafp', 'geoid', 'lsad', 'name', 'the_geom', 'updated_at' ]) # torque table self.test_point_table = 'tweets_obama' # for writing to carto self.test_write_table = normalize_name( 'cf_test_table_{}'.format(test_slug)) self.mixed_case_table = normalize_name('AbCdEfG_{}'.format(test_slug)) # for batch writing to carto self.test_write_batch_table = normalize_name( 'cf_testbatch_table_{}'.format(test_slug)) self.test_write_lnglat_table = normalize_name( 'cf_testwrite_lnglat_table_{}'.format(test_slug)) self.write_named_index = normalize_name( 'cf_testwrite_non_default_index_{}'.format(test_slug)) # for queries self.test_query_table = normalize_name( 'cf_testquery_table_{}'.format(test_slug)) self.test_delete_table = normalize_name( 'cf_testdelete_table_{}'.format(test_slug)) # for data observatory self.test_data_table = 'carto_usa_offices' def tearDown(self): """restore to original state""" tables = ( self.test_write_table, self.test_write_batch_table, self.test_write_lnglat_table, self.test_query_table, self.mixed_case_table.lower(), self.write_named_index, ) sql_drop = 'DROP TABLE IF EXISTS {};' for table in tables: try: delete_table(table, credentials=self.credentials) self.sql_client.query(sql_drop.format(table)) except CartoException: warnings.warn('Error deleting tables') def test_boundaries(self): """DataObsClient.boundaries""" do = DataObsClient(self.credentials) # all boundary metadata boundary_meta = do.boundaries() self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') meta_cols = set(( 'geom_id', 'geom_tags', 'geom_type', )) self.assertTrue(meta_cols & set(boundary_meta.columns)) # boundary metadata with correct timespan meta_2015 = do.boundaries(timespan='2015') self.assertTrue(meta_2015[meta_2015.valid_timespan].shape[0] > 0) # test for no data with an incorrect or invalid timespan meta_9999 = do.boundaries(timespan='invalid_timespan') self.assertTrue(meta_9999[meta_9999.valid_timespan].shape[0] == 0) # boundary metadata in a region regions = ( self.test_read_table, self.test_data_table, [5.9559111595, 45.8179931641, 10.4920501709, 47.808380127], 'Australia', ) for region in regions: boundary_meta = do.boundaries(region=region) self.assertTrue(meta_cols & set(boundary_meta.columns)) self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') # boundaries for world boundaries = do.boundaries(boundary='us.census.tiger.state') self.assertTrue(boundaries.shape[0] > 0) self.assertEqual(boundaries.shape[1], 2) self.assertSetEqual(set(( 'the_geom', 'geom_refs', )), set(boundaries.columns)) # boundaries for region boundaries = ('us.census.tiger.state', ) for b in boundaries: geoms = do.boundaries(boundary=b, region=self.test_data_table) self.assertTrue(geoms.shape[0] > 0) self.assertEqual(geoms.shape[1], 2) self.assertSetEqual(set(( 'the_geom', 'geom_refs', )), set(geoms.columns)) # presence or lack of clipped boundaries nonclipped = ( True, False, ) for tf in nonclipped: meta = do.boundaries(include_nonclipped=tf) self.assertEqual('us.census.tiger.state' in set(meta.geom_id), tf) with self.assertRaises(ValueError): do.boundaries(region=[1, 2, 3]) with self.assertRaises(ValueError): do.boundaries(region=10) def test_discovery(self): """DataObsClient.discovery""" do = DataObsClient(self.credentials) meta = do.discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) meta_columns = set( ('denom_aggregate', 'denom_colname', 'denom_description', 'denom_geomref_colname', 'denom_id', 'denom_name', 'denom_reltype', 'denom_t_description', 'denom_tablename', 'denom_type', 'geom_colname', 'geom_description', 'geom_geomref_colname', 'geom_id', 'geom_name', 'geom_t_description', 'geom_tablename', 'geom_timespan', 'geom_type', 'id', 'max_score_rank', 'max_timespan_rank', 'normalization', 'num_geoms', 'numer_aggregate', 'numer_colname', 'numer_description', 'numer_geomref_colname', 'numer_id', 'numer_name', 'numer_t_description', 'numer_tablename', 'numer_timespan', 'numer_type', 'score', 'score_rank', 'score_rownum', 'suggested_name', 'target_area', 'target_geoms', 'timespan_rank', 'timespan_rownum')) self.assertSetEqual(set(meta.columns), meta_columns, msg='metadata columns are all there') self.assertTrue((meta['numer_timespan'] == '2010 - 2014').all()) self.assertTrue( (meta['numer_description'].str.contains('poverty')).all()) # test region = list of lng/lats with self.assertRaises(ValueError): do.discovery([1, 2, 3]) switzerland = [ 5.9559111595, 45.8179931641, 10.4920501709, 47.808380127 ] dd = do.discovery(switzerland, keywords='freight', time='2010') self.assertEqual(dd['numer_id'][0], 'eu.eurostat.tgs00078') dd = do.discovery('Australia', regex='.*Torres Strait Islander.*') for nid in dd['numer_id'].values: self.assertRegexpMatches( nid, r'^au\.data\.B01_Indig_[A-Za-z_]+Torres_St[A-Za-z_]+[FMP]$') with self.assertRaises(CartoException): do.discovery('non_existent_table_abcdefg') dd = do.discovery('United States', boundaries='us.epa.huc.hydro_unit', time=( '2006', '2010', )) self.assertTrue(dd.shape[0] >= 1) poverty = do.discovery('United States', boundaries='us.census.tiger.census_tract', keywords=[ 'poverty status', ], time='2011 - 2015', include_quantiles=False) df_quantiles = poverty[poverty.numer_aggregate == 'quantile'] self.assertEqual(df_quantiles.shape[0], 0) poverty = do.discovery('United States', boundaries='us.census.tiger.census_tract', keywords=[ 'poverty status', ], time='2011 - 2015', include_quantiles=True) df_quantiles = poverty[poverty.numer_aggregate == 'quantile'] self.assertTrue(df_quantiles.shape[0] > 0) def test_augment(self): """DataObsClient.augment""" do = DataObsClient(self.credentials) meta = do.discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) gdf = do.augment(self.test_data_table, meta) anscols = set(meta['suggested_name']) origcols = set( read_carto(self.test_data_table, credentials=self.credentials, limit=1, decode_geom=True).columns) self.assertSetEqual( anscols, set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) meta = [ { 'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015' }, ] gdf = do.augment(self.test_data_table, meta) self.assertSetEqual( set(('median_income_2011_2015', )), set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) with self.assertRaises(ValueError, msg='no measures'): meta = do.discovery('United States', keywords='not a measure') do.augment(self.test_read_table, meta) with self.assertRaises(ValueError, msg='too many metadata measures'): # returns ~180 measures meta = do.discovery(region='united states', keywords='education') do.augment(self.test_read_table, meta) @pytest.mark.skip() def test_augment_with_persist_as(self): """DataObsClient.augment with persist_as""" do = DataObsClient(self.credentials) meta = do.discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) gdf = do.augment(self.test_data_table, meta) anscols = set(meta['suggested_name']) origcols = set( read_carto(self.test_data_table, credentials=self.credentials, limit=1, decode_geom=True).columns) self.assertSetEqual( anscols, set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) meta = [ { 'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015' }, ] gdf = do.augment(self.test_data_table, meta, persist_as=self.test_write_table) self.assertSetEqual( set(('median_income_2011_2015', )), set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) self.assertEqual(gdf.index.name, 'cartodb_id') self.assertEqual(gdf.index.dtype, 'int64') df = read_carto(self.test_write_table, credentials=self.credentials, decode_geom=False) self.assertEqual(df.index.name, 'cartodb_id') self.assertEqual(df.index.dtype, 'int64') # same number of rows self.assertEqual(len(df), len(gdf), msg='Expected number or rows') # same type of object self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame') # same column names self.assertSetEqual(set(gdf.columns.values), set(df.columns.values), msg='Should have the columns requested') # should have exected schema self.assertEqual(sorted(tuple(str(d) for d in df.dtypes)), sorted(tuple(str(d) for d in gdf.dtypes)), msg='Should have same schema/types') def test_augment_column_name_collision(self): """DataObsClient.augment column name collision""" dup_col = 'female_third_level_studies_2011_by_female_pop' self.sql_client.query(""" create table {table} as ( select cdb_latlng(40.4165,-3.70256) the_geom, 1 {dup_col}) """.format(dup_col=dup_col, table=self.test_write_table)) self.sql_client.query( "select cdb_cartodbfytable('public', '{table}')".format( table=self.test_write_table)) do = DataObsClient(self.credentials) meta = do.discovery(region=self.test_write_table, keywords='female') meta = meta[meta.suggested_name == dup_col] gdf = do.augment(self.test_write_table, meta[meta.suggested_name == dup_col]) self.assertIn('_' + dup_col, gdf.keys()) def test_get_countrytag(self): valid_regions = ( 'Australia', 'Brasil', 'EU', 'España', 'U.K.', ) valid_answers = [ 'section/tags.{c}'.format(c=c) for c in ( 'au', 'br', 'eu', 'spain', 'uk', ) ] invalid_regions = ( 'USofA', None, '', 'Jupiter', ) for idx, r in enumerate(valid_regions): self.assertEqual(get_countrytag(r.lower()), valid_answers[idx]) for r in invalid_regions: with self.assertRaises(ValueError): get_countrytag(r)
def setUp(self): if (os.environ.get('APIKEY') is None or os.environ.get('USERNAME') is None): try: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] except Exception: warnings.warn("Skipping Context tests. To test it, " "create a `secret.json` file in test/ by " "renaming `secret.json.sample` to `secret.json` " "and updating the credentials to match your " "environment.") self.apikey = None self.username = None else: self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] self.base_url = self.user_url().format(username=self.username) self.credentials = Credentials(self.username, self.apikey, self.base_url) self.sql_client = SQLClient(self.credentials) # table naming info has_mpl = 'mpl' if os.environ.get('MPLBACKEND') else 'nonmpl' pyver = sys.version[0:3].replace('.', '_') buildnum = os.environ.get('TRAVIS_BUILD_NUMBER') test_slug = '{ver}_{num}_{mpl}'.format(ver=pyver, num=buildnum, mpl=has_mpl) # test tables self.test_read_table = 'cb_2013_us_csa_500k' self.valid_columns = set([ 'affgeoid', 'aland', 'awater', 'created_at', 'csafp', 'geoid', 'lsad', 'name', 'the_geom', 'updated_at' ]) # torque table self.test_point_table = 'tweets_obama' # for writing to carto self.test_write_table = normalize_name( 'cf_test_table_{}'.format(test_slug)) self.mixed_case_table = normalize_name('AbCdEfG_{}'.format(test_slug)) # for batch writing to carto self.test_write_batch_table = normalize_name( 'cf_testbatch_table_{}'.format(test_slug)) self.test_write_lnglat_table = normalize_name( 'cf_testwrite_lnglat_table_{}'.format(test_slug)) self.write_named_index = normalize_name( 'cf_testwrite_non_default_index_{}'.format(test_slug)) # for queries self.test_query_table = normalize_name( 'cf_testquery_table_{}'.format(test_slug)) self.test_delete_table = normalize_name( 'cf_testdelete_table_{}'.format(test_slug)) # for data observatory self.test_data_table = 'carto_usa_offices'