def test_dataset_download_and_upload(self): self.assertNotExistsTable(self.test_write_table) query = 'SELECT 1 as fakec' dataset = Dataset.from_query(query=query, context=self.cc) dataset.upload(table_name=self.test_write_table) dataset = Dataset.from_table(table_name=self.test_write_table, context=self.cc) dataset.download() dataset.upload(table_name=self.test_write_table, if_exists=Dataset.REPLACE)
def test_dataset_download_bool_null(self): self.assertNotExistsTable(self.test_write_table) query = 'SELECT * FROM (values (true, true), (false, false), (false, null)) as x(fakec_bool, fakec_bool_null)' dataset = Dataset.from_query(query=query, context=self.cc) dataset.upload(table_name=self.test_write_table) dataset = Dataset.from_table(table_name=self.test_write_table, context=self.cc) df = dataset.download() self.assertEqual(df['fakec_bool'].dtype, 'bool') self.assertEqual(df['fakec_bool_null'].dtype, 'object') self.assertEqual(list(df['fakec_bool']), [True, False, False]) self.assertEqual(list(df['fakec_bool_null']), [True, False, None])
def test_cartocontext_execute(self): """context.CartoContext.execute""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) df = pd.DataFrame({'vals': list('abcd'), 'ids': list('wxyz')}) df = df.astype({'vals': str, 'ids': str}) cc.write(df, self.test_write_table, overwrite=True) self.assertEquals(Dataset.from_table(context=cc, table_name=self.test_write_table).exists(), True) cc.execute(''' DROP TABLE {table_name} '''.format(table_name=self.test_write_table)) self.assertEquals(Dataset.from_table(context=cc, table_name=self.test_write_table).exists(), False)
def test_dataset_constructor_validation_fails_with_table_name_and_geodataframe( self): table_name = 'fake_table' schema = 'fake_schema' gdf = {} with self.assertRaises(ValueError): Dataset(table_name=table_name, schema=schema, gdf=gdf)
def test_dataset_write_geopandas(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_taxi import shapely import geopandas as gpd df = read_taxi(limit=50) df.drop(['the_geom'], axis=1, inplace=True) gdf = gpd.GeoDataFrame( df.drop(['dropoff_longitude', 'dropoff_latitude'], axis=1), crs={'init': 'epsg:4326'}, geometry=[ shapely.geometry.Point(xy) for xy in zip(df.dropoff_longitude, df.dropoff_latitude) ]) # TODO: use from_geodataframe dataset = Dataset.from_dataframe(gdf).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 50)
def test_dataset_upload_validation_fails_with_query_and_append(self): query = 'SELECT 1' dataset = Dataset.from_query(query=query, context=self.cc) err_msg = 'Error using append with a query Dataset. It is not possible to append data to a query' with self.assertRaises(CartoException, msg=err_msg): dataset.upload(table_name=self.test_write_table, if_exists=Dataset.APPEND)
def test_dataset_constructor_validation_fails_with_table_name_and_query( self): table_name = 'fake_table' schema = 'fake_schema' query = 'select * from fake_table' with self.assertRaises(ValueError): Dataset(table_name=table_name, schema=schema, query=query)
def test_dataset_upload_validation_df_fails_without_context(self): df = load_geojson(self.test_geojson) dataset = Dataset.from_dataframe(df=df) with self.assertRaises( ValueError, msg= 'You should provide a table_name and context to upload data.'): dataset.upload(table_name=self.test_write_table)
def test_dataset_upload_validation_query_fails_without_table_name(self): query = 'SELECT 1' dataset = Dataset.from_query(query=query, context=self.cc) with self.assertRaises( ValueError, msg= 'You should provide a table_name and context to upload data.'): dataset.upload()
def test_dataset_write_if_exists_replace(self): from cartoframes.examples import read_brooklyn_poverty df = read_brooklyn_poverty() dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name dataset = Dataset.from_dataframe(df).upload( if_exists=Dataset.REPLACE, table_name=self.test_write_table, context=self.cc) self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 2049)
def test_dataset_from_geojson(self): geojson = self.test_geojson dataset = Dataset.from_geojson(geojson=geojson) self.assertIsInstance(dataset, Dataset) self.assertIsNotNone(dataset.gdf) self.assertIsNone(dataset.table_name) self.assertIsNone(dataset.query) self.assertIsNone(dataset.df) self.assertIsNone(dataset.cc) self.assertEqual(dataset.state, Dataset.STATE_LOCAL)
def test_dataset_from_dataframe(self): df = load_geojson(self.test_geojson) dataset = Dataset.from_dataframe(df=df) self.assertIsInstance(dataset, Dataset) self.assertIsNotNone(dataset.df) self.assertIsNone(dataset.table_name) self.assertIsNone(dataset.query) self.assertIsNone(dataset.gdf) self.assertIsNone(dataset.cc) self.assertEqual(dataset.state, Dataset.STATE_LOCAL)
def test_dataset_from_query(self): query = 'SELECT * FROM fake_table' dataset = Dataset.from_query(query=query, context=self.cc) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.query, query) self.assertIsNone(dataset.table_name) self.assertIsNone(dataset.df) self.assertIsNone(dataset.gdf) self.assertEqual(dataset.cc, self.cc) self.assertEqual(dataset.state, Dataset.STATE_REMOTE)
def test_dataset_from_table(self): table_name = 'fake_table' dataset = Dataset.from_table(table_name=table_name, context=self.cc) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.table_name, table_name) self.assertEqual(dataset.schema, 'public') self.assertIsNone(dataset.query) self.assertIsNone(dataset.df) self.assertIsNone(dataset.gdf) self.assertEqual(dataset.cc, self.cc) self.assertEqual(dataset.state, Dataset.STATE_REMOTE)
def test_dataset_download_validations(self): self.assertNotExistsTable(self.test_write_table) df = load_geojson(self.test_geojson) dataset = Dataset.from_dataframe(df=df) error_msg = 'You should provide a context and a table_name or query to download data.' with self.assertRaises(ValueError, msg=error_msg): dataset.download() query = 'SELECT 1 as fakec' dataset = Dataset.from_query(query=query, context=self.cc) dataset.upload(table_name=self.test_write_table) dataset.table_name = 'non_used_table' df = dataset.download() self.assertEqual('fakec' in df.columns, True) dataset = Dataset.from_table(table_name=self.test_write_table, context=self.cc) df = dataset.download() self.assertEqual('fakec' in df.columns, True)
def test_dataset_write_lines_dataset(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_ne_50m_graticules_15 df = read_ne_50m_graticules_15() dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 35)
def test_dataset_upload_into_existing_table_fails_without_replace_property( self): query = 'SELECT 1' dataset = Dataset.from_query(query=query, context=self.cc) dataset.upload(table_name=self.test_write_table) err_msg = ( 'Table with name {t} and schema {s} already exists in CARTO. Please choose a different `table_name`' 'or use if_exists="replace" to overwrite it').format( t=self.test_write_table, s='public') with self.assertRaises(CartoException, msg=err_msg): dataset.upload(table_name=self.test_write_table) dataset.upload(table_name=self.test_write_table, if_exists=Dataset.REPLACE)
def test_dataset_write_points_dataset(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_mcdonalds_nyc df = read_mcdonalds_nyc(limit=100) dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 100)
def test_dataset_write_if_exists_fail_by_default(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_brooklyn_poverty df = read_brooklyn_poverty() dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name err_msg = ( 'Table with name {t} and schema {s} already exists in CARTO. Please choose a different `table_name`' 'or use if_exists="replace" to overwrite it').format( t=self.test_write_table, s='public') with self.assertRaises(CartoException, msg=err_msg): dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 2049)
def test_dataset_write_with_different_geometry_column(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_brooklyn_poverty df = read_brooklyn_poverty() df.rename(columns={'the_geom': 'geometry'}, inplace=True) dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 2049)
def test_dataset_write_lnglat_dataset(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_taxi df = read_taxi(limit=100) lnglat = ('dropoff_longitude', 'dropoff_latitude') dataset = Dataset.from_dataframe(df).upload( with_lnglat=lnglat, table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 100)
def test_dataset_schema_from_org_context(self): username = '******' class FakeCreds(): def username(self): return username class FakeContext(): def __init__(self): self.is_org = True self.creds = FakeCreds() def get_default_schema(self): return username dataset = Dataset.from_table(table_name='fake_table', context=FakeContext()) self.assertEqual(dataset.schema, username)
def test_dataset_write_wkt(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_taxi df = read_taxi(limit=50) df['the_geom'] = df.apply(lambda x: 'POINT ({x} {y})'.format( x=x['dropoff_longitude'], y=x['dropoff_latitude']), axis=1) dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 50)
def test_dataset_upload_validation_fails_only_with_table_name(self): table_name = 'fake_table' dataset = Dataset.from_table(table_name=table_name, context=self.cc) err_msg = 'Nothing to upload. We need data in a DataFrame or GeoDataFrame or a query to upload data to CARTO.' with self.assertRaises(ValueError, msg=err_msg): dataset.upload()
def test_rows_null(self): df = pd.DataFrame.from_dict({'test': [None, [None, None]]}) ds = Dataset.from_dataframe(df) rows = ds._rows(ds.df, ['test'], None, '') self.assertEqual(list(rows), [b'|\n', b'|\n'])
def test_rows(self): df = pd.DataFrame.from_dict({'test': [True, [1, 2]]}) ds = Dataset.from_dataframe(df) rows = ds._rows(ds.df, ['test'], None, '') self.assertEqual(list(rows), [b'True|\n', b'[1, 2]|\n'])
def test_dataset_schema_from_non_org_context(self): dataset = Dataset.from_table(table_name='fake_table', context=self.cc) self.assertEqual(dataset.schema, 'public')
def test_dataset_schema_from_parameter(self): schema = 'fake_schema' dataset = Dataset.from_table(table_name='fake_table', schema=schema, context=self.cc) self.assertEqual(dataset.schema, schema)
def test_dataset_constructor_validation_fails_with_dataframe_and_geodataframe( self): df = {} gdf = {} with self.assertRaises(ValueError): Dataset(df=df, gdf=gdf)
def test_dataset_constructor_validation_fails_with_query_and_dataframe( self): query = 'select * from fake_table' df = {} with self.assertRaises(ValueError): Dataset(query=query, df=df)