def test_write_privacy(self): """context.CartoContext.write Updates the privacy of a dataset""" from carto.datasets import DatasetManager cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) ds_manager = DatasetManager(self.auth_client) df = pd.DataFrame({'ids': list('abcd'), 'vals': range(4)}) cc.write(df, self.test_write_table) dataset = ds_manager.get(self.test_write_table) self.assertEqual(dataset.privacy.lower(), 'private') df = pd.DataFrame({'ids': list('efgh'), 'vals': range(4, 8)}) cc.write(df, self.test_write_table, overwrite=True, privacy='public') dataset = ds_manager.get(self.test_write_table) self.assertEqual(dataset.privacy.lower(), 'public') privacy = cc._get_privacy('i_am_not_a_table_in_this_account') self.assertIsNone(privacy)
def _get_metadata(self, auth_client, table_name, retries=4, retry_wait_time=1): ds_manager = DatasetManager(auth_client) try: return ds_manager.get(table_name) except Exception as e: if type(e).__name__ == 'NotFoundException' and retries > 0: time.sleep(retry_wait_time) self._get_metadata(auth_client=auth_client, table_name=table_name, retries=retries-1, retry_wait_time=retry_wait_time*2) else: raise Exception('We could not get the table metadata. ' 'Please, try again in a few seconds or contact support for help')
def shapefile_to_carto(table_name, schema, gdf, privacy='LINK'): ''' Function to upload a shapefile to Carto Note: Shapefiles can also be zipped and uploaded to Carto through the upload_to_carto function Use this function when several shapefiles are processed in one single script and need to be uploaded to separate Carto tables The function should also be used when the table is too large to be exported as a shapefile INPUT table_name: the name of the newly created table on Carto (string) schema: a dictionary of column names and data types in order to upload data to Carto (dictionary) gdf: a geodataframe storing all the data to upload (geodataframe) privacy: the privacy setting of the dataset to upload to Carto (string) ''' # initiate a ThreadPoolExecutor with 10 workers with ThreadPoolExecutor(max_workers=10) as executor: futures = [] # create an empty list to store the index of the rows uploaded all_ids = [] for index, row in gdf.iterrows(): # build the sql query to send to Carto query = insert_carto_query(row, schema, table_name) # submit the task to the executor futures.append(executor.submit(insert_carto_send, query)) for future in as_completed(futures): all_ids.append(future.result()) logging.info('Upload of {} rows complete!'.format(len(all_ids))) # Change privacy of table on Carto #set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY) auth_client = APIKeyAuthClient( api_key=CARTO_KEY, base_url="https://{user}.carto.com/".format(user=CARTO_USER)) #set up dataset manager with authentication dataset_manager = DatasetManager(auth_client) #set dataset privacy dataset = dataset_manager.get(table_name) dataset.privacy = privacy dataset.save()
continue elif i != base_table and index <= len(table_name): query = "insert into " + base_table + \ "(" + dict_col['string_agg'] + ") select " + \ dict_col['string_agg'] + " from " + table_name[index] + ";" sql.send(query) time.sleep(2) else: break index = index + 1 # change name of base table myTable = dataset_manager.get(base_table) myTable.name = base_table + "_merged" myTable.save() time.sleep(2) # remove not merged datasets for i in table_name: try: myTable = dataset_manager.get(i) myTable.delete() time.sleep(2) except: continue logger.info('Tables merged') print('\nURL of dataset is: \
print('Inserting new rows for shapefile: {}'.format(shape_file)) cartosql.insertRows(dataset_name + '_edit', CARTO_SCHEMA.keys(), CARTO_SCHEMA.values(), new_rows) #save processed dataset to shapefile out_gdf.to_file(processed_data_file, driver='ESRI Shapefile') # Change privacy of table on Carto #set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY) auth_client = APIKeyAuthClient(api_key=os.getenv('CARTO_WRI_RW_KEY'), base_url="https://{user}.carto.com/".format( user=os.getenv('CARTO_WRI_RW_USER'))) #set up dataset manager with authentication dataset_manager = DatasetManager(auth_client) #set dataset privacy to 'Public with link' dataset = dataset_manager.get(dataset_name + '_edit') dataset.privacy = 'LINK' dataset.save() print('Privacy set to public with link.') ''' Upload original data and processed data to Amazon S3 storage ''' def upload_to_aws(local_file, bucket, s3_file): s3 = boto3.client('s3', aws_access_key_id=os.getenv('aws_access_key_id'), aws_secret_access_key=os.getenv('aws_secret_access_key')) try: s3.upload_file(local_file, bucket, s3_file) print("Upload Successful")
' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) dataset = dataset_manager.get(args.dataset_name) else: logger.error( 'You need to provide valid credentials, run with -h parameter for details' ) import sys sys.exit(1) # PRIVATE, PUBLIC, LINK dataset.privacy = args.privacy dataset.save() logger.info("Done!")
class client(object): def __init__(self): self.carto_api_key = os.environ['API_KEY'] self.carto_account = os.environ['ACCOUNT'] USR_BASE_URL = "https://{user}.carto.com/".format( user=self.carto_account) self.auth_client = APIKeyAuthClient(api_key=self.carto_api_key, base_url=USR_BASE_URL) #this mimics the carto docs, leave it this way self.sql = SQLClient(self.auth_client) self.dataset_manager = DatasetManager(self.auth_client) def checkconnection(self): try: query = "SELECT * from version()" data = self.sql.send(query, do_post=False) except CartoException as e: print("some error occurred", e) #occasional: #HTTPSConnectionPool(host='xxxxx.carto.com', port=443): Max retries exceeded with url: /api/v2/sql?q=SELECT... #(Caused by ProxyError('Cannot connect to proxy.', OSError('Tunnel connection failed: 407 Proxy Authentication Required'))) return False #print(data['rows']) #[{'version': 'PostgreSQL 11.5 (Ubuntu 11.5.2+carto-1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609, 64-bit'}] sqlret = data['rows'] for itr in sqlret: if itr['version'].startswith('PostgreSQL'): return True return False def getkount(self, relation): # return the count of records in any relation try: query = "SELECT count(*) as kount from {0}".format(relation) data = self.sql.send(query, do_post=False) except CartoException as e: print("some error occurred", e) raise #print(data['rows']) #[{'kount': 1}] sqlret = data['rows'] for itr in sqlret: # should be just one row with one key,val return itr['kount'] def batchsql(self, list_of_sqls, checkevery=1, maxtime=5): # pass in a list of sqls to execute # probably dont want to read a file with a million records and upload # open streets for ex is 1k rows, 500KB batchSQLClient = BatchSQLClient(self.auth_client) createJob = batchSQLClient.create(list_of_sqls) # https://github.com/CartoDB/carto-python # job_id looks like # 5171b8c4-8c03-4610-8797-5dd98ff3e61b # job looks like # { # 'user': '******', # 'status': 'done', # 'query': [{'query': 'drop table if exists foo', 'status': 'done'}, # {'query': 'create table foo (bar text)', 'status': 'done'}, # {...} {...}], # 'created_at': '2020-07-02T16:31:31.873Z', # 'updated_at': '2020-07-02T16:31:31.996Z', # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b' # } # queries are nested because you can add more sets to a running job readJob = batchSQLClient.read(createJob['job_id']) cheks = 0 while (readJob['status'] != 'done'): time.sleep(checkevery) readJob = batchSQLClient.read(createJob['job_id']) cheks += 1 if cheks > maxtime: return False return True def uploadshapefolder(self, path_tothe_zip): # I am working with tar.gz # not sure what else is good # this returns the name in carto (often tablename_XX on repeats) #print("uploading {0}".format(path_tothe_zip)) # carto will warn: This is part of a non-public CARTO API and may change in the future. cartodatasetid = self.dataset_manager.create(path_tothe_zip) #print("carto named the upload {0}".format(cartodatasetid)) #file_import_manager = FileImportJobManager(self.auth_client) #file_imports = file_import_manager.all() #print("carto says {0} imports are active".format(len(file_imports))) # always gonna make public # this one warns of a non-public API which does in fact match the docs dataset = self.dataset_manager.get(cartodatasetid) dataset.privacy = PUBLIC dataset.save() return cartodatasetid def delete(self, tablename): # careful buddy wat u doin? # copy pastin from https://github.com/CartoDB/carto-python # docs refer to table names as "dataset id" dataset = self.dataset_manager.get(tablename) #print("deleting {0} from our Carto account".format(tablename)) dataset.delete()
'property.year_ending_67011' ], inplace=True) years = data_View4['year_ending'].drop_duplicates(keep="first").tolist() for item in years: data_View5 = data_View4.loc[data_View4['year_ending'] == item] data_View5['year_ending'] = data_View5['year_ending'].replace( str(item), '12/31/' + str(item)) data_View5.set_index('pid') print(str(item).replace('.0', '')) data_View5.to_csv('CSV output\\t' + str(item).replace('.0', '') + '.csv') #Saves data CartoUser = CartoUserName USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser) auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL) dataset_manager = DatasetManager(auth_client) for item in years: print(item) try: datasets = dataset_manager.get('t' + str(item).replace('.0', '')) datasets.delete() dataset = dataset_manager.create('CSV output\\t' + str(item).replace('.0', '') + '.csv') print('works') except: pass
if key not in fieldnames: del obj[key] obj['channels'] = ' '.join(map(str, obj['channels'])).strip() fieldnames = ['date', 'opacity', 'team'] for obj in write_csv(MATCHES_POLYGONS_FILE_NAME, fieldnames, matches_polygons): keys = list(obj.keys()).copy() for key in keys: if key not in fieldnames: del obj[key] dataset_manager = DatasetManager(auth_client) # pass any parameter to generate all files if len(sys.argv) > 1: teams = dataset_manager.get('teams') if teams: teams.delete() tvchannels = dataset_manager.get('tvchannels') if tvchannels: tvchannels.delete() stadiums = dataset_manager.get('stadiums') if stadiums: stadiums.delete() dataset_manager.create(global_path(STADIUMS_FILE_NAME)) dataset_manager.create(global_path(TVCHANNELS_FILE_NAME)) dataset_manager.create(global_path(TEAMS_FILE_NAME)) sql = SQLClient(auth_client) sql.send('UPDATE teams SET the_geom = cdb_geocode_admin0_polygon(name)')
'weather_norm_site_eui_2018', 'weather_norm_site_eui_2017', 'weather_norm_site_eui_2016', 'weather_norm_site_eui_2015', 'weather_norm_site_eui_2014', 'weather_norm_site_eui_2013', 'weather_norm_site_eui_2012', 'site_eui_change_current_last', 'weather_norm_source_eui_2020', 'weather_norm_source_eui_2019', 'weather_norm_source_eui_2018', 'weather_norm_source_eui_2017', 'weather_norm_source_eui_2016', 'weather_norm_source_eui_2015', 'weather_norm_source_eui_2014', 'weather_norm_source_eui_2013', 'weather_norm_source_eui_2012', 'source_eui_change_current_last', 'total_ghg_emissions_intensity_2020', 'total_ghg_emissions_intensity_2019', 'total_ghg_emissions_intensity_2018', 'total_ghg_emissions_intensity_2017', 'total_ghg_emissions_intensity_2016', 'total_ghg_emissions_intensity_2015', 'total_ghg_emissions_intensity_2014', 'total_ghg_emissions_intensity_2013', 'total_ghg_emissions_intensity_2012', 'tot_ghg_emissions_intensity_change_current_last' ]] benchmarking_disclosure.set_index('pid', inplace=True) #benchmarking_disclosure.to_excel('Output\\tbl_Consolidated_2012_infinite.xlsx') benchmarking_disclosure.to_csv('Output\\tbl_Consolidated_2012_infinite.csv') #Saves data CartoUser = CartoUserName USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser) auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL) dataset_manager = DatasetManager(auth_client) datasets = dataset_manager.get('tbl_consolidated_2012_infinite') datasets.delete() dataset = dataset_manager.create('Output\\tbl_Consolidated_2012_infinite.csv')
# if table does not exist, create it table_name = dataset_name + '_edit' if not table_name in carto_table_names: logging.info(f'Table {table_name} does not exist, creating') # Change privacy of table on Carto # set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY) auth_client = APIKeyAuthClient( api_key=os.getenv('CARTO_WRI_RW_KEY'), base_url="https://{user}.carto.com/".format( user=os.getenv('CARTO_WRI_RW_USER'))) # set up dataset manager with authentication dataset_manager = DatasetManager(auth_client) # upload dataset to carto dataset = dataset_manager.create(processed_data_file) # set dataset privacy to 'Public with link' dataset = dataset_manager.get(table_name) dataset.privacy = 'LINK' dataset.save() logging.info('Privacy set to public with link.') # if table does exist, clear all the rows so we can upload the latest version else: logging.info(f'Table {table_name} already exists, clearing rows') # column names and types for data table # column names should be lowercase # column types should be one of the following: geometry, text, numeric, timestamp CARTO_SCHEMA = OrderedDict([('country_code', 'text'), ('country_name', 'text'), ('datetime', 'timestamp'), ('year', 'numeric')]) # Go through each type of "value" in this table # Add data column, unit, and indicator code to CARTO_SCHEMA, column_order, and dataset