def test_cartocontext(self): """context.CartoContext.__init__ normal usage""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertEqual(cc.creds.key(), self.apikey) self.assertEqual(cc.creds.base_url(), self.baseurl.strip('/')) self.assertEqual(cc.creds.username(), self.username) self.assertTrue(not cc.is_org) with self.assertRaises(CartoException): cartoframes.CartoContext(base_url=self.baseurl, api_key='notavalidkey')
def test_cartocontext_credentials(self): """context.CartoContext.__init__ Credentials argument""" creds = cartoframes.Credentials(username=self.username, key=self.apikey) cc = cartoframes.CartoContext(creds=creds) self.assertIsInstance(cc, cartoframes.CartoContext) self.assertEqual(cc.creds.username(), self.username) self.assertEqual(cc.creds.key(), self.apikey) # CartoContext pulls from saved credentials saved_creds = cartoframes.Credentials(username=self.username, key=self.apikey) saved_creds.save() cc_saved = cartoframes.CartoContext() self.assertEqual(cc_saved.creds.key(), self.apikey)
def test_cartocontext_fetch_with_cte(self): cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) df = cc.fetch(''' WITH cte AS ( SELECT CDB_LatLng(0.1, 0) as the_geom, i FROM generate_series(1, 110) as m(i) ) SELECT ST_X(the_geom) as xval, ST_Y(the_geom) as yval FROM cte ''') # same type of object self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame') # same column names requested_cols = {'xval', 'yval'} self.assertSetEqual(requested_cols, set(df.columns), msg='Should have the columns requested') # should have exected schema expected_dtypes = ('float64', 'float64') self.assertTupleEqual( tuple(str(d) for d in df.dtypes), expected_dtypes, msg='Should have same schema/types' ) # same number of rows self.assertEqual(len(df), 110, msg='Expected number or rows')
def test_data_obs_functions(self): """context.data_x""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertIsNone(cc.data_boundaries()) self.assertIsNone(cc.data_discovery())
def test_cartocontext_fetch_with_decode_geom(self): cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) df = cc.fetch(''' SELECT CDB_LatLng(0.1, 0) as the_geom, i FROM generate_series(1, 110) as m(i) ''', decode_geom=True) # same type of object self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame') # same column names requested_cols = {'geometry', 'i'} self.assertSetEqual(requested_cols, set(df.columns), msg='Should have the columns requested') # should have exected schema expected_dtypes = ('object', 'float64') self.assertTupleEqual( tuple(str(d) for d in df.dtypes), expected_dtypes, msg='Should have same schema/types' ) # same number of rows self.assertEqual(len(df), 110, msg='Expected number or rows') self.assertEqual(df.loc[0].geometry.wkt, 'POINT (0 0.1)')
def test_data(self): """context.CartoContext.data""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) meta = cc.data_discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) data = cc.data(self.test_data_table, meta) anscols = set(meta['suggested_name']) origcols = set(cc.read(self.test_data_table, limit=1).columns) self.assertSetEqual(anscols, set(data.columns) - origcols) meta = [{'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015'}, ] data = cc.data(self.test_data_table, meta) self.assertSetEqual(set(('median_income_2011_2015', )), set(data.columns) - origcols) # with self.assertRaises(NotImplementedError): # cc.data(self.test_data_table, meta, how='geom_ref') with self.assertRaises(ValueError, msg='no measures'): meta = cc.data_discovery('United States', keywords='not a measure') cc.data(self.test_read_table, meta) with self.assertRaises(ValueError, msg='too many metadata measures'): # returns ~180 measures meta = cc.data_discovery(region='united states', keywords='education') cc.data(self.test_read_table, meta)
def test_cartocontext_delete(self): """context.CartoContext.delete""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) data = {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']} df = pd.DataFrame(data) cc.write(df, self.test_delete_table) cc.delete(self.test_delete_table) # check that querying recently deleted table raises an exception with self.assertRaises(CartoException): cc.sql_client.send('select * from {}'.format( self.test_delete_table)) # try to delete a table that does not exists with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. cc.delete('non_existent_table') # Verify one warning, subclass is UserWarning, and expected message # is in warning assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Failed to delete" in str(w[-1].message)
def test_cartocontext_isorguser(self): """context.CartoContext._is_org_user""" cc = cartoframes.CartoContext( base_url=self.baseurl, api_key=self.apikey ) self.assertTrue(not cc._is_org_user())
def test_cartocontext_authenticated(self): """context.CartoContext._is_authenticated""" with self.assertRaises(ValueError): cc = cartoframes.CartoContext( base_url=self.baseurl.replace('https', 'http'), api_key=self.apikey )
def test_cartocontext_read(self): """context.CartoContext.read""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # fails if limit is smaller than zero with self.assertRaises(ValueError): df = cc.read('sea_horses', limit=-10) # fails if not an int with self.assertRaises(ValueError): df = cc.read('sea_horses', limit=3.14159) with self.assertRaises(ValueError): df = cc.read('sea_horses', limit='acadia') # fails on non-existent table with self.assertRaises(CartoException): df = cc.read('non_existent_table') # normal table df = cc.read(self.test_read_table) self.assertSetEqual(set(df.columns), self.valid_columns) self.assertTrue(len(df) == 169) # read with limit df = cc.read(self.test_read_table, limit=10) self.assertEqual(len(df), 10) self.assertIsInstance(df, pd.DataFrame) # read empty table/dataframe df = cc.read(self.test_read_table, limit=0) self.assertSetEqual(set(df.columns), self.valid_columns) self.assertEqual(len(df), 0) self.assertIsInstance(df, pd.DataFrame)
def test_batchjobstatus_methods(self): """context.BatchJobStatus methods""" from cartoframes.context import BatchJobStatus from carto.sql import BatchSQLClient cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) batch_client = BatchSQLClient(cc.auth_client) job_response = batch_client.create([ 'select 1', ]) job_status = BatchJobStatus(cc, job_response) possible_status = ( 'pending', 'running', 'done', 'canceled', 'unknown', ) self.assertTrue(job_status.get_status() in possible_status) job_status._set_status('foo') self.assertEqual(job_status.get_status(), 'foo') new_status = job_status.status() self.assertSetEqual(set(new_status.keys()), {'status', 'updated_at', 'created_at'}) # job_id as str str_bjs = BatchJobStatus(cc, 'foo') self.assertIsNone(str_bjs.get_status()) self.assertEqual(str_bjs.job_id, 'foo')
def test_cartocontext_table_exists(self): """context.CartoContext._table_exists""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertFalse(cc._table_exists('acadia_biodiversity')) with self.assertRaises(NameError): cc._table_exists(self.test_read_table)
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) ### 1. Authenticate to Carto CARTO_USER = os.environ.get('CARTO_USER') CARTO_KEY = os.environ.get('CARTO_KEY') cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER), api_key=CARTO_KEY) #check size of old table r = cartosql.getFields('datetime', CARTO_TABLE, f='csv') existing_ids = r.text.split('\r\n')[1:-1] num_existing = len(existing_ids) ### 2. Fetch data from FTP, dedupe, process df = processData() num_rows = df.shape[0] cc.write(df, CARTO_TABLE, overwrite=True, privacy='public') # Get most recent update date if num_rows > num_existing: most_recent_date = datetime.datetime.utcnow() lastUpdateDate(DATASET_ID, most_recent_date) ### 3. Notify results logging.info('Existing rows: {}'.format(num_rows)) logging.info("SUCCESS")
def test_column_name_collision_do_enrichement(self): """context.CartoContext.data column collision""" dup_col = 'female_third_level_studies_2011_by_female_pop' self.sql_client.send( """ create table {table} as ( select cdb_latlng(40.4165,-3.70256) the_geom, 1 {dup_col}) """.format( dup_col=dup_col, table=self.test_write_table ) ) self.sql_client.send( "select cdb_cartodbfytable('public', '{table}')".format( table=self.test_write_table ) ) cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) meta = cc.data_discovery(region=self.test_write_table, keywords='female') meta = meta[meta.suggested_name == dup_col] data = cc.data( self.test_write_table, meta[meta.suggested_name == dup_col] ) self.assertIn('_' + dup_col, data.keys())
def test_add_encoded_geom(self): """context._add_encoded_geom""" from cartoframes.context import _add_encoded_geom, _encode_geom cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # encode_geom=True adds a column called 'geometry' df = cc.read(self.test_read_table, limit=5, decode_geom=True) # alter the geometry df['geometry'] = df['geometry'].apply(lambda x: x.buffer(0.1)) # the_geom should reflect encoded 'geometry' column _add_encoded_geom(df, 'geometry') # geometry column should equal the_geom after function call self.assertTrue(df['the_geom'].equals( df['geometry'].apply(_encode_geom))) # don't specify geometry column (should exist since decode_geom==True) df = cc.read(self.test_read_table, limit=5, decode_geom=True) df['geometry'] = df['geometry'].apply(lambda x: x.buffer(0.2)) # the_geom should reflect encoded 'geometry' column _add_encoded_geom(df, None) # geometry column should equal the_geom after function call self.assertTrue(df['the_geom'].equals( df['geometry'].apply(_encode_geom))) df = cc.read(self.test_read_table, limit=5) # raise error if 'geometry' column does not exist with self.assertRaises(KeyError): _add_encoded_geom(df, None)
def test_cartocontext_map_time(self): """context.CartoContext.map time options""" from cartoframes import Layer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) html_map = cc.map( layers=Layer(self.test_point_table, time='cartodb_id')) self.assertIsInstance(html_map, IPython.core.display.HTML) # category map cat_map = cc.map(layers=Layer(self.test_point_table, time='actor_postedtime', color='twitter_lang')) self.assertRegexpMatches(cat_map.__html__(), '.*CDB_Math_Mode\(cf_value_twitter_lang\).*') with self.assertRaises( ValueError, msg='cannot create static torque maps currently'): cc.map(layers=Layer(self.test_point_table, time='cartodb_id'), interactive=False) with self.assertRaises(ValueError, msg='cannot have more than one torque layer'): cc.map(layers=[ Layer(self.test_point_table, time='cartodb_id'), Layer(self.test_point_table, color='cartodb_id') ]) with self.assertRaises( ValueError, msg='cannot do a torque map off a polygon dataset'): cc.map(layers=Layer(self.test_read_table, time='cartodb_id'))
def udf(): """User-defined function Needs more work on functional form. It currently only takes a table and column name for processing. """ func = request.args.get('udf') user = request.args.get('user') key = request.args.get('key') table = request.args.get('table') # col is used within the user-defined function col = request.args.get('col') if not func: return json.dumps( {'result': { 'error': 'User-defined function not specified' }}) ccontext = cartoframes.CartoContext( base_url='https://{}.carto.com'.format(user), api_key=key) dataframe = ccontext.read(table) # dangerous exec(func) outtable = table + '_analysis_service_output_udf' ccontext.write(dataframe, outtable, overwrite=True) out_url = ccontext.creds.base_url() + '/dataset/' + outtable return json.dumps( {'result': { 'success': 'Results written to {}'.format(out_url) }})
def test_vector_interactivity(self): """contrib.vector""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) layers = [ vector.Layer(self.points, interactivity='body'), vector.QueryLayer('SELECT * FROM {}'.format(self.polys), interactivity=[ 'name', 'state_name', ]), vector.QueryLayer('SELECT * FROM {}'.format(self.polys), interactivity={ 'cols': [ 'name', 'state_name', ], 'header': '<h1 class="h1">NAT</h1>', 'event': 'click' }) ] self.assertIsInstance(vector.vmap(layers, cc), HTML) # invalid entry for interactivity with self.assertRaises(ValueError): vector.vmap([ vector.Layer(self.points, interactivity=10), ])
def test_get_bounds(self): """context.CartoContext._get_bounds""" from cartoframes.layer import QueryLayer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) vals1 = {'minx': 0, 'maxx': 1, 'miny': 0, 'maxy': 2} vals2 = {'minx': 0, 'maxx': 1.5, 'miny': -0.5, 'maxy': 1.5} ans = {'west': 0, 'east': 1.5, 'south': -0.5, 'north': 2} # (MINX, MINY), (MINX, MAXY), (MAXX, MAXY), (MAXX, MINY), (MINX, MINY) # https://postgis.net/docs/ST_Envelope.html query = ''' WITH cte AS ( SELECT 'SRID=4326;POLYGON(({minx} {miny}, {minx} {maxy}, {maxx} {maxy}, {maxx} {miny}, {minx} {miny}))'::geometry AS the_geom ) SELECT 1 AS cartodb_id, the_geom, ST_Transform(the_geom, 3857) AS the_geom_webmercator FROM cte ''' layers = [ QueryLayer(query.format(**vals1)), QueryLayer(query.format(**vals2)) ] extent_ans = cc._get_bounds(layers) self.assertDictEqual(extent_ans, ans)
def test_cartocontext_mixed_case(self): """context.CartoContext.write table name mixed case""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) data = pd.DataFrame({'a': [1, 2, 3], 'B': list('abc')}) cc.write(pd.DataFrame(data), self.mixed_case_table)
def test_cartocontext(self): """CartoContext.__init__""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertEqual(cc.creds.key(), self.apikey) self.assertEqual(cc.creds.base_url(), self.baseurl.strip('/')) self.assertEqual(cc.creds.username(), self.username) self.assertTrue(not cc.is_org)
def processData(): ''' Function to download data and upload it to Carto Will first try to get the data for MAX_TRIES then quits ''' success = False tries = 0 df = None while tries < MAX_TRIES and success == False: logging.info('Try running feeds, try number = {}'.format(tries)) try: df = feeds() success = True except Exception as inst: logging.info(inst) logging.info("Error fetching data trying again") tries = tries + 1 if tries == MAX_TRIES: logging.error( "Error fetching data, and max tries reached. See source for last data update." ) success = False if success == True: if not cartosql.tableExists(CARTO_TABLE, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')): logging.info('Table {} does not exist'.format(CARTO_TABLE)) cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA) # Send dataframe to Carto logging.info('Writing to Carto') cc = cartoframes.CartoContext( base_url="https://{user}.carto.com/".format(user=CARTO_USER), api_key=CARTO_KEY) cc.write(df, CARTO_TABLE, overwrite=True, privacy='public') else: cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) # Send dataframe to Carto logging.info('Writing to Carto') cc = cartoframes.CartoContext( base_url="https://{user}.carto.com/".format(user=CARTO_USER), api_key=CARTO_KEY) cc.write(df, CARTO_TABLE, overwrite=True, privacy='public')
def test_cartocontext_execute_wrong_query(self): """context.CartoContext.execute""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) with self.assertRaises(CartoException): cc.execute(''' DROPP TABLE {table_name} '''.format(table_name=self.test_write_table))
def test_tables(self): """context.CartoContext.tables normal usage""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) tables = cc.tables() self.assertIsInstance(tables, list) self.assertIsInstance(tables[0], cartoframes.analysis.Table) self.assertIsNotNone(tables[0].name) self.assertIsInstance(tables[0].name, str)
def tearDown(self): """restore to original state""" tables = (self.test_write_lnglat_table, ) if self.apikey and self.baseurl: cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) for table in tables: cc.delete(table)
def test_cartocontext_delete_non_existent_table(self): """context.CartoContext.delete""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) table_name = 'non_existent_table' with self.assertRaises( CartoException, msg='''The table `{}` doesn't exist'''.format(table_name)): cc.delete(table_name)
def test_data_boundaries(self): """context.CartoContext.data_boundaries""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # all boundary metadata boundary_meta = cc.data_boundaries() self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') meta_cols = set(('geom_id', 'geom_tags', 'geom_type', )) self.assertTrue(meta_cols & set(boundary_meta.columns)) # boundary metadata in a region regions = ( self.test_read_table, self.test_data_table, [5.9559111595, 45.8179931641, 10.4920501709, 47.808380127], 'Australia', ) for region in regions: boundary_meta = cc.data_boundaries(region=region) self.assertTrue(meta_cols & set(boundary_meta.columns)) self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') # boundaries for world boundaries = cc.data_boundaries(boundary='us.census.tiger.state') self.assertTrue(boundaries.shape[0] > 0) self.assertEqual(boundaries.shape[1], 2) self.assertSetEqual(set(('the_geom', 'geom_refs', )), set(boundaries.columns)) # boundaries for region boundaries = ('us.census.tiger.state', ) for b in boundaries: geoms = cc.data_boundaries( boundary=b, region=self.test_data_table) self.assertTrue(geoms.shape[0] > 0) self.assertEqual(geoms.shape[1], 2) self.assertSetEqual(set(('the_geom', 'geom_refs', )), set(geoms.columns)) # presence or lack of clipped boundaries nonclipped = (True, False, ) for tf in nonclipped: meta = cc.data_boundaries(include_nonclipped=tf) self.assertEqual( 'us.census.tiger.state' in set(meta.geom_id), tf ) with self.assertRaises(ValueError): cc.data_boundaries(region=[1, 2, 3]) with self.assertRaises(ValueError): cc.data_boundaries(region=10)
def test_vector_multilayer(self): """contrib.vector""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) layers = [ vector.Layer(self.points, color='red', size=10, strokeColor='blue'), vector.QueryLayer( 'SELECT * FROM {}'.format(self.polys), time='torque($cartodb_id, 10)', strokeWidth=2) ] self.assertIsInstance(vector.vmap(layers, cc), HTML)
def test_batchjobstatus_repr(self): """context.BatchJobStatus.__repr__""" from cartoframes.context import BatchJobStatus cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) bjs = BatchJobStatus( cc, dict(job_id='foo', status='unknown', created_at=None)) self.assertMultiLineEqual(bjs.__repr__(), ("BatchJobStatus(job_id='foo', " "last_status='unknown', " "created_at='None')"))
def test_debug_print(self): """context._debug_print""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey, verbose=True) # request-response usage resp = requests.get('http://httpbin.org/get') cc._debug_print(resp=resp) cc._debug_print(resp=resp.text) # non-requests-response usage test_str = 'this is a test' long_test_str = ', '.join([test_str] * 100) self.assertIsNone(cc._debug_print(test_str=test_str)) self.assertIsNone(cc._debug_print(long_str=long_test_str)) # verbose = False test cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey, verbose=False) self.assertIsNone(cc._debug_print(resp=test_str))