def test_dataset_write_geopandas(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_taxi import shapely import geopandas as gpd df = read_taxi(limit=50) df.drop(['the_geom'], axis=1, inplace=True) gdf = gpd.GeoDataFrame( df.drop(['dropoff_longitude', 'dropoff_latitude'], axis=1), crs={'init': 'epsg:4326'}, geometry=[ shapely.geometry.Point(xy) for xy in zip(df.dropoff_longitude, df.dropoff_latitude) ]) # TODO: use from_geodataframe dataset = Dataset.from_dataframe(gdf).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 50)
def test_dataset_upload_validation_df_fails_without_context(self): df = load_geojson(self.test_geojson) dataset = Dataset.from_dataframe(df=df) with self.assertRaises( ValueError, msg= 'You should provide a table_name and context to upload data.'): dataset.upload(table_name=self.test_write_table)
def test_dataset_write_if_exists_replace(self): from cartoframes.examples import read_brooklyn_poverty df = read_brooklyn_poverty() dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name dataset = Dataset.from_dataframe(df).upload( if_exists=Dataset.REPLACE, table_name=self.test_write_table, context=self.cc) self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 2049)
def test_dataset_from_dataframe(self): df = load_geojson(self.test_geojson) dataset = Dataset.from_dataframe(df=df) self.assertIsInstance(dataset, Dataset) self.assertIsNotNone(dataset.df) self.assertIsNone(dataset.table_name) self.assertIsNone(dataset.query) self.assertIsNone(dataset.gdf) self.assertIsNone(dataset.cc) self.assertEqual(dataset.state, Dataset.STATE_LOCAL)
def test_dataset_write_lines_dataset(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_ne_50m_graticules_15 df = read_ne_50m_graticules_15() dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 35)
def test_dataset_write_points_dataset(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_mcdonalds_nyc df = read_mcdonalds_nyc(limit=100) dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 100)
def test_dataset_write_if_exists_fail_by_default(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_brooklyn_poverty df = read_brooklyn_poverty() dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name err_msg = ( 'Table with name {t} and schema {s} already exists in CARTO. Please choose a different `table_name`' 'or use if_exists="replace" to overwrite it').format( t=self.test_write_table, s='public') with self.assertRaises(CartoException, msg=err_msg): dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 2049)
def test_dataset_write_with_different_geometry_column(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_brooklyn_poverty df = read_brooklyn_poverty() df.rename(columns={'the_geom': 'geometry'}, inplace=True) dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 2049)
def test_dataset_write_wkt(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_taxi df = read_taxi(limit=50) df['the_geom'] = df.apply(lambda x: 'POINT ({x} {y})'.format( x=x['dropoff_longitude'], y=x['dropoff_latitude']), axis=1) dataset = Dataset.from_dataframe(df).upload( table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 50)
def test_dataset_write_lnglat_dataset(self): self.assertNotExistsTable(self.test_write_table) from cartoframes.examples import read_taxi df = read_taxi(limit=100) lnglat = ('dropoff_longitude', 'dropoff_latitude') dataset = Dataset.from_dataframe(df).upload( with_lnglat=lnglat, table_name=self.test_write_table, context=self.cc) self.test_write_table = dataset.table_name self.assertExistsTable(self.test_write_table) result = self.cc.sql_client.send( 'SELECT * FROM {} WHERE the_geom IS NOT NULL'.format( self.test_write_table)) self.assertEqual(result['total_rows'], 100)
def test_dataset_download_validations(self): self.assertNotExistsTable(self.test_write_table) df = load_geojson(self.test_geojson) dataset = Dataset.from_dataframe(df=df) error_msg = 'You should provide a context and a table_name or query to download data.' with self.assertRaises(ValueError, msg=error_msg): dataset.download() query = 'SELECT 1 as fakec' dataset = Dataset.from_query(query=query, context=self.cc) dataset.upload(table_name=self.test_write_table) dataset.table_name = 'non_used_table' df = dataset.download() self.assertEqual('fakec' in df.columns, True) dataset = Dataset.from_table(table_name=self.test_write_table, context=self.cc) df = dataset.download() self.assertEqual('fakec' in df.columns, True)
def test_rows_null(self): df = pd.DataFrame.from_dict({'test': [None, [None, None]]}) ds = Dataset.from_dataframe(df) rows = ds._rows(ds.df, ['test'], None, '') self.assertEqual(list(rows), [b'|\n', b'|\n'])
def test_rows(self): df = pd.DataFrame.from_dict({'test': [True, [1, 2]]}) ds = Dataset.from_dataframe(df) rows = ds._rows(ds.df, ['test'], None, '') self.assertEqual(list(rows), [b'True|\n', b'[1, 2]|\n'])