def test_augment_with_persist_as(self): """DataObsClient.augment with persist_as""" do = DataObsClient(self.credentials) meta = do.discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) gdf = do.augment(self.test_data_table, meta) anscols = set(meta['suggested_name']) origcols = set( read_carto(self.test_data_table, credentials=self.credentials, limit=1, decode_geom=True).columns) self.assertSetEqual( anscols, set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) meta = [ { 'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015' }, ] gdf = do.augment(self.test_data_table, meta, persist_as=self.test_write_table) self.assertSetEqual( set(('median_income_2011_2015', )), set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) self.assertEqual(gdf.index.name, 'cartodb_id') self.assertEqual(gdf.index.dtype, 'int64') df = read_carto(self.test_write_table, credentials=self.credentials, decode_geom=False) self.assertEqual(df.index.name, 'cartodb_id') self.assertEqual(df.index.dtype, 'int64') # same number of rows self.assertEqual(len(df), len(gdf), msg='Expected number or rows') # same type of object self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame') # same column names self.assertSetEqual(set(gdf.columns.values), set(df.columns.values), msg='Should have the columns requested') # should have exected schema self.assertEqual(sorted(tuple(str(d) for d in df.dtypes)), sorted(tuple(str(d) for d in gdf.dtypes)), msg='Should have same schema/types')
def test_augment_deprecation(self): with warnings.catch_warnings(record=True) as w: do = DataObsClient(self.credentials) with warnings.catch_warnings(record=True) as w: try: do.augment() except Exception: pass assert issubclass(w[-1].category, DeprecationWarning) assert 'deprecated' in str(w[-1].message)
def test_augment_column_name_collision(self): """DataObsClient.augment column name collision""" dup_col = 'female_third_level_studies_2011_by_female_pop' self.sql_client.query(""" create table {table} as ( select cdb_latlng(40.4165,-3.70256) the_geom, 1 {dup_col}) """.format(dup_col=dup_col, table=self.test_write_table)) self.sql_client.query( "select cdb_cartodbfytable('public', '{table}')".format( table=self.test_write_table)) do = DataObsClient(self.credentials) meta = do.discovery(region=self.test_write_table, keywords='female') meta = meta[meta.suggested_name == dup_col] gdf = do.augment(self.test_write_table, meta[meta.suggested_name == dup_col]) self.assertIn('_' + dup_col, gdf.keys())
def test_augment(self): """DataObsClient.augment""" do = DataObsClient(self.credentials) meta = do.discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) gdf = do.augment(self.test_data_table, meta) anscols = set(meta['suggested_name']) origcols = set( read_carto(self.test_data_table, credentials=self.credentials, limit=1, decode_geom=True).columns) self.assertSetEqual( anscols, set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) meta = [ { 'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015' }, ] gdf = do.augment(self.test_data_table, meta) self.assertSetEqual( set(('median_income_2011_2015', )), set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'}) with self.assertRaises(ValueError, msg='no measures'): meta = do.discovery('United States', keywords='not a measure') do.augment(self.test_read_table, meta) with self.assertRaises(ValueError, msg='too many metadata measures'): # returns ~180 measures meta = do.discovery(region='united states', keywords='education') do.augment(self.test_read_table, meta)
def test_discovery(self): """DataObsClient.discovery""" do = DataObsClient(self.credentials) meta = do.discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) meta_columns = set( ('denom_aggregate', 'denom_colname', 'denom_description', 'denom_geomref_colname', 'denom_id', 'denom_name', 'denom_reltype', 'denom_t_description', 'denom_tablename', 'denom_type', 'geom_colname', 'geom_description', 'geom_geomref_colname', 'geom_id', 'geom_name', 'geom_t_description', 'geom_tablename', 'geom_timespan', 'geom_type', 'id', 'max_score_rank', 'max_timespan_rank', 'normalization', 'num_geoms', 'numer_aggregate', 'numer_colname', 'numer_description', 'numer_geomref_colname', 'numer_id', 'numer_name', 'numer_t_description', 'numer_tablename', 'numer_timespan', 'numer_type', 'score', 'score_rank', 'score_rownum', 'suggested_name', 'target_area', 'target_geoms', 'timespan_rank', 'timespan_rownum')) self.assertSetEqual(set(meta.columns), meta_columns, msg='metadata columns are all there') self.assertTrue((meta['numer_timespan'] == '2010 - 2014').all()) self.assertTrue( (meta['numer_description'].str.contains('poverty')).all()) # test region = list of lng/lats with self.assertRaises(ValueError): do.discovery([1, 2, 3]) switzerland = [ 5.9559111595, 45.8179931641, 10.4920501709, 47.808380127 ] dd = do.discovery(switzerland, keywords='freight', time='2010') self.assertEqual(dd['numer_id'][0], 'eu.eurostat.tgs00078') dd = do.discovery('Australia', regex='.*Torres Strait Islander.*') for nid in dd['numer_id'].values: self.assertRegexpMatches( nid, r'^au\.data\.B01_Indig_[A-Za-z_]+Torres_St[A-Za-z_]+[FMP]$') with self.assertRaises(CartoException): do.discovery('non_existent_table_abcdefg') dd = do.discovery('United States', boundaries='us.epa.huc.hydro_unit', time=( '2006', '2010', )) self.assertTrue(dd.shape[0] >= 1) poverty = do.discovery('United States', boundaries='us.census.tiger.census_tract', keywords=[ 'poverty status', ], time='2011 - 2015', include_quantiles=False) df_quantiles = poverty[poverty.numer_aggregate == 'quantile'] self.assertEqual(df_quantiles.shape[0], 0) poverty = do.discovery('United States', boundaries='us.census.tiger.census_tract', keywords=[ 'poverty status', ], time='2011 - 2015', include_quantiles=True) df_quantiles = poverty[poverty.numer_aggregate == 'quantile'] self.assertTrue(df_quantiles.shape[0] > 0)
def test_boundaries(self): """DataObsClient.boundaries""" do = DataObsClient(self.credentials) # all boundary metadata boundary_meta = do.boundaries() self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') meta_cols = set(( 'geom_id', 'geom_tags', 'geom_type', )) self.assertTrue(meta_cols & set(boundary_meta.columns)) # boundary metadata with correct timespan meta_2015 = do.boundaries(timespan='2015') self.assertTrue(meta_2015[meta_2015.valid_timespan].shape[0] > 0) # test for no data with an incorrect or invalid timespan meta_9999 = do.boundaries(timespan='invalid_timespan') self.assertTrue(meta_9999[meta_9999.valid_timespan].shape[0] == 0) # boundary metadata in a region regions = ( self.test_read_table, self.test_data_table, [5.9559111595, 45.8179931641, 10.4920501709, 47.808380127], 'Australia', ) for region in regions: boundary_meta = do.boundaries(region=region) self.assertTrue(meta_cols & set(boundary_meta.columns)) self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') # boundaries for world boundaries = do.boundaries(boundary='us.census.tiger.state') self.assertTrue(boundaries.shape[0] > 0) self.assertEqual(boundaries.shape[1], 2) self.assertSetEqual(set(( 'the_geom', 'geom_refs', )), set(boundaries.columns)) # boundaries for region boundaries = ('us.census.tiger.state', ) for b in boundaries: geoms = do.boundaries(boundary=b, region=self.test_data_table) self.assertTrue(geoms.shape[0] > 0) self.assertEqual(geoms.shape[1], 2) self.assertSetEqual(set(( 'the_geom', 'geom_refs', )), set(geoms.columns)) # presence or lack of clipped boundaries nonclipped = ( True, False, ) for tf in nonclipped: meta = do.boundaries(include_nonclipped=tf) self.assertEqual('us.census.tiger.state' in set(meta.geom_id), tf) with self.assertRaises(ValueError): do.boundaries(region=[1, 2, 3]) with self.assertRaises(ValueError): do.boundaries(region=10)
def test_class_deprecation(self): with warnings.catch_warnings(record=True) as w: _ = DataObsClient(self.credentials) assert issubclass(w[-1].category, DeprecationWarning) assert 'deprecated' in str(w[-1].message)