def test_table(api_key_auth_client_usr): batch_client = BatchSQLClient(api_key_auth_client_usr) job = batch_client.create(SETUP_QUERIES) while not job['status'] in BATCH_TERMINAL_STATES: time.sleep(1) job = batch_client.read(job['job_id']) assert job['status'] == 'done'
def test_batch_create_and_wait_for_completion(api_key_auth_client_usr): sql = BatchSQLClient(api_key_auth_client_usr) # Create query data = sql.create_and_wait_for_completion(BATCH_SQL_SINGLE_QUERY) assert data['status'] in ['done', 'failed', 'canceled', 'unknown']
def test_batchjobstatus_methods(self): """context.BatchJobStatus methods""" from cartoframes.context import BatchJobStatus from carto.sql import BatchSQLClient cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) batch_client = BatchSQLClient(cc.auth_client) job_response = batch_client.create([ 'select 1', ]) job_status = BatchJobStatus(cc, job_response) possible_status = ( 'pending', 'running', 'done', 'canceled', 'unknown', ) self.assertTrue(job_status.get_status() in possible_status) job_status._set_status('foo') self.assertEqual(job_status.get_status(), 'foo') new_status = job_status.status() self.assertSetEqual(set(new_status.keys()), {'status', 'updated_at', 'created_at'}) # job_id as str str_bjs = BatchJobStatus(cc, 'foo') self.assertIsNone(str_bjs.get_status()) self.assertEqual(str_bjs.job_id, 'foo')
def __init__(self, credentials): self.credentials = credentials or get_default_credentials() check_credentials(self.credentials) self.auth_client = _create_auth_client(self.credentials) self.sql_client = SQLClient(self.auth_client) self.copy_client = CopySQLClient(self.auth_client) self.batch_sql_client = BatchSQLClient(self.auth_client)
def test_batch_multi_sql(api_key_auth_client_usr): sql = BatchSQLClient(api_key_auth_client_usr) # Create query data = sql.create(BATCH_SQL_MULTI_QUERY) # Get job ID job_id = data['job_id'] # Cancel if not finished cancel_job_if_not_finished(sql, job_id)
def __init__(self, carto_context, job): if isinstance(job, dict): self.job_id = job.get('job_id') self.last_status = job.get('status') self.created_at = job.get('created_at') elif isinstance(job, str): self.job_id = job self.last_status = None self.created_at = None self._batch_client = BatchSQLClient(carto_context.auth_client)
def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client)
def __init__(self, user, api_key, options={}): super().__init__(options) self.do_post = options.get('do_post', False) self.parse_json = options.get('parse_json', True) self.format = options.get('format', 'json') self.base_url_option = options.get('base_url', '') self.api_version = options.get('api_version', self.DEFAULT_API_VERSION) self.batch = options.get('batch', False) self.user = user self.api_key = api_key self.base_url = self._generate_base_url(user, self.base_url_option) # Carto Context for DataFrame handling self._carto_context = None # Carto client for COPYs self._copy_client = None self._auth_client = APIKeyAuthClient(api_key=api_key, base_url=self.base_url) self._sql_client = SQLClient(self._auth_client, api_version=self.api_version) self._batch_client = None if self.batch: self._batch_client = BatchSQLClient(self._auth_client)
def test_batch_multi_sql(api_key_auth_client_usr): sql = BatchSQLClient(api_key_auth_client_usr) # Create query data = sql.create(BATCH_SQL_MULTI_QUERY) # Update status job_id = data['job_id'] # Cancel if not finished try: confirmation = sql.cancel(job_id) except CartoException: pass else: assert confirmation == 'cancelled'
def batchsql(self, list_of_sqls, checkevery=1, maxtime=5): # pass in a list of sqls to execute # probably dont want to read a file with a million records and upload # open streets for ex is 1k rows, 500KB batchSQLClient = BatchSQLClient(self.auth_client) createJob = batchSQLClient.create(list_of_sqls) # https://github.com/CartoDB/carto-python # job_id looks like # 5171b8c4-8c03-4610-8797-5dd98ff3e61b # job looks like # { # 'user': '******', # 'status': 'done', # 'query': [{'query': 'drop table if exists foo', 'status': 'done'}, # {'query': 'create table foo (bar text)', 'status': 'done'}, # {...} {...}], # 'created_at': '2020-07-02T16:31:31.873Z', # 'updated_at': '2020-07-02T16:31:31.996Z', # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b' # } # queries are nested because you can add more sets to a running job readJob = batchSQLClient.read(createJob['job_id']) cheks = 0 while (readJob['status'] != 'done'): time.sleep(checkevery) readJob = batchSQLClient.read(createJob['job_id']) cheks += 1 if cheks > maxtime: return False return True
def __init__(self, csv_file_path, **kwargs): self.__set_max_csv_length() self.__set_defaults() for key, value in kwargs.items(): try: setattr(self, key, int(value)) except (ValueError, TypeError): if value in ("true", "True"): setattr(self, key, True) elif value in ("false", "False"): setattr(self, key, False) else: setattr(self, key, value) self.__trim_columns() self.csv_file_path = csv_file_path if self.api_key: self.api_auth = APIKeyAuthClient(self.base_url, self.api_key) self.sql = SQLClient(self.api_auth) self.bsql = BatchSQLClient(self.api_auth)
class UploadJob(object): def __init__(self, csv_file_path, **kwargs): self.__set_max_csv_length() self.__set_defaults() for key, value in kwargs.items(): try: setattr(self, key, int(value)) except (ValueError, TypeError): if value in ("true", "True"): setattr(self, key, True) elif value in ("false", "False"): setattr(self, key, False) else: setattr(self, key, value) self.__trim_columns() self.csv_file_path = csv_file_path if self.api_key: self.api_auth = APIKeyAuthClient(self.base_url, self.api_key) self.sql = SQLClient(self.api_auth) self.bsql = BatchSQLClient(self.api_auth) def __set_defaults(self): self.delimiter = DEFAULT_DELIMITER self.x_column = DEFAULT_X_COLUMN self.y_column = DEFAULT_Y_COLUMN self.srid = DEFAULT_SRID self.chunk_size = DEFAULT_CHUNK_SIZE self.max_attempts = DEFAULT_MAX_ATTEMPTS self.file_encoding = DEFAULT_FILE_ENCOFING self.force_no_geometry = DEFAULT_FORCE_NO_GEOMETRY self.force_the_geom = DEFAULT_FORCE_THE_GEOM self.date_format = DEFAULT_DATE_FORMAT self.datetime_format = DEFAULT_DATETIME_FORMAT self.float_comma_separator = DEFAULT_FLOAT_COMMA_SEPARATOR self.float_thousand_separator = DEFAULT_FLOAT_THOUSAND_SEPARATOR self.date_columns = DEFAULT_DATE_COLUMNS self.observer = None def __set_max_csv_length(self): maxInt = sys.maxsize decrement = True while decrement: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. decrement = False try: csv.field_size_limit(maxInt) except OverflowError: maxInt = int(maxInt / 10) decrement = True def __trim_columns(self): if self.columns is not None: self.columns = self.columns.replace(' ', '') if self.date_columns is not None: self.date_columns = self.date_columns.replace(' ', '') def run(self, start_chunk=1, end_chunk=None): if not isinstance(self.csv_file_path, str): self.do_run(self.csv_file_path, start_chunk, end_chunk) else: if sys.version_info <= (3, 0): with open(self.csv_file_path) as f: self.do_run(f, start_chunk, end_chunk) else: with open(self.csv_file_path, encoding=self.file_encoding) as f: self.do_run(f, start_chunk, end_chunk) def notify(self, message_type, message): observer = getattr(self, "observer", None) if callable(observer): observer({"type": message_type, "msg": str(message)}) return True return False def regenerate_overviews(self): query = 'select CDB_CreateOverviews(\'{table}\'::regclass)'.\ format(table=self.table_name) job_result = self.bsql.create(query) return job_result['job_id'] def check_job(self, job_id): return self.bsql.read(job_id) def create_geom_query(self, record): null_result = NULL_VALUE + "," if self.force_the_geom: return self.parse_column_value(record, self.force_the_geom, parse_float=False) if self.force_no_geometry: return null_result longitude = self.get_longitude(record) latitude = self.get_latitude(record) if longitude is None or latitude is None \ or longitude is DEFAULT_COORD or latitude is DEFAULT_COORD: return null_result return "st_transform(st_setsrid(st_makepoint(" + \ "{longitude}, {latitude}), {srid}), 4326),".\ format(longitude=longitude, latitude=latitude, srid=self.srid) def parse_column_value(self, record, column, parse_float=True): null_result = NULL_VALUE + "," try: value = self.escape_value(record[column]) except Exception: return null_result try: if self.is_date_column(column): try: result = "'{value}',".format( value=self.parse_date_column(record, column)) except ValueError: result = null_result elif parse_float: result = "{value},".format(value=self.parse_float_value(value)) else: raise TypeError except (ValueError, TypeError): if value is None or not value.strip(): result = null_result else: result = "'{value}',".format(value=value) return result def is_date_column(self, column): return column is not None and self.date_columns is not None and column in self.date_columns.split( ',') def parse_date_column(self, record, column): if not self.date_format or not self.datetime_format: raise ValueError try: return datetime.strptime( record[column], self.datetime_format).strftime(CARTO_DATE_FORMAT) except Exception: try: return datetime.strptime( record[column], self.date_format).strftime(CARTO_DATE_FORMAT) except Exception: raise ValueError def escape_value(self, value): return value.replace("'", "''") def get_longitude(self, record): try: longitude = self.get_coord(record, self.x_column) if abs(longitude) > MAX_LON: return None except TypeError: return DEFAULT_COORD else: return longitude def get_latitude(self, record): try: latitude = self.get_coord(record, self.y_column) if abs(latitude) > MAX_LAT: return None except TypeError: return DEFAULT_COORD else: return latitude def get_coord(self, record, type): try: coord = self.parse_float_value(record[type]) or DEFAULT_COORD except (ValueError, KeyError): coord = DEFAULT_COORD else: return coord def parse_float_value(self, value): if value.upper() in (val.upper() for val in FORBIDDEN_FLOAT_VALUES): raise ValueError if self.float_thousand_separator: value = value.replace(self.float_thousand_separator, "") if self.float_comma_separator: value = value.replace(self.float_comma_separator, ".") return float(value) def send(self, query, file_encoding, chunk_num): if sys.version_info <= (3, 0): query = query.decode(file_encoding).encode(UTF8) logger.debug("Chunk #{chunk_num}: {query}".format( chunk_num=(chunk_num + 1), query=query)) for retry in range(self.max_attempts): try: self.sql.send(query) except Exception as e: logger.warning( "Chunk #{chunk_num}: Retrying ({error_msg})".format( chunk_num=(chunk_num + 1), error_msg=e)) self.notify('error', e) else: logger.info("Chunk #{chunk_num}: Success!".format( chunk_num=(chunk_num + 1))) self.notify('progress', chunk_num + 1) break else: logger.error("Chunk #{chunk_num}: Failed!)".format( chunk_num=(chunk_num + 1))) self.notify('error', "Failed " + str(chunk_num + 1))
class BatchJobStatus(object): """Status of a write or query operation. Read more at `Batch SQL API docs <https://carto.com/docs/carto-engine/sql-api/batch-queries/>`__ about responses and how to interpret them. Example: Poll for a job's status if you've caught the :py:class:`BatchJobStatus` instance. .. code:: python import time job = cc.write(df, 'new_table', lnglat=('lng_col', 'lat_col')) while True: curr_status = job.status()['status'] if curr_status in ('done', 'failed', 'canceled', 'unknown', ): print(curr_status) break time.sleep(5) Create a :py:class:`BatchJobStatus` instance if you have a `job_id` output from a :py:meth:`CartoContext.write <cartoframes.context.CartoContext.write>` operation. .. code:: python >>> from cartoframes import CartoContext, BatchJobStatus >>> cc = CartoContext(username='******', api_key='...') >>> cc.write(df, 'new_table', lnglat=('lng', 'lat')) 'BatchJobStatus(job_id='job-id-string', ...)' >>> batch_job = BatchJobStatus(cc, 'job-id-string') Attributes: job_id (str): Job ID of the Batch SQL API job last_status (str): Status of ``job_id`` job when last polled created_at (str): Time and date when job was created Args: carto_context (:py:class:`CartoContext <cartoframes.context.CartoContext>`): :py:class:`CartoContext <cartoframes.context.CartoContext>` instance job (dict or str): If a dict, job status dict returned after sending a Batch SQL API request. If str, a Batch SQL API job id. """ def __init__(self, carto_context, job): if isinstance(job, dict): self.job_id = job.get('job_id') self.last_status = job.get('status') self.created_at = job.get('created_at') elif isinstance(job, str): self.job_id = job self.last_status = None self.created_at = None self._batch_client = BatchSQLClient(carto_context.auth_client) def __repr__(self): return ('BatchJobStatus(job_id=\'{job_id}\', ' 'last_status=\'{status}\', ' 'created_at=\'{created_at}\')'.format( job_id=self.job_id, status=self.last_status, created_at=self.created_at)) def _set_status(self, curr_status): self.last_status = curr_status def get_status(self): """return current status of job""" return self.last_status def status(self): """Checks the current status of job ``job_id`` Returns: dict: Status and time it was updated Warns: UserWarning: If the job failed, a warning is raised with information about the failure """ resp = self._batch_client.read(self.job_id) if 'failed_reason' in resp: warn('Job failed: {}'.format(resp.get('failed_reason'))) self._set_status(resp.get('status')) return dict(status=resp.get('status'), updated_at=resp.get('updated_at'), created_at=resp.get('created_at'))
class ContextManager: def __init__(self, credentials): self.credentials = credentials or get_default_credentials() check_credentials(self.credentials) self.auth_client = _create_auth_client(self.credentials) self.sql_client = SQLClient(self.auth_client) self.copy_client = CopySQLClient(self.auth_client) self.batch_sql_client = BatchSQLClient(self.auth_client) @not_found def execute_query(self, query, parse_json=True, do_post=True, format=None, **request_args): return self.sql_client.send(query.strip(), parse_json, do_post, format, **request_args) @not_found def execute_long_running_query(self, query): return self.batch_sql_client.create_and_wait_for_completion( query.strip()) def copy_to(self, source, schema=None, limit=None, retry_times=DEFAULT_RETRY_TIMES): query = self.compute_query(source, schema) columns = self._get_query_columns_info(query) copy_query = self._get_copy_query(query, columns, limit) return self._copy_to(copy_query, columns, retry_times) def copy_from(self, gdf, table_name, if_exists='fail', cartodbfy=True, retry_times=DEFAULT_RETRY_TIMES): schema = self.get_schema() table_name = self.normalize_table_name(table_name) df_columns = get_dataframe_columns_info(gdf) if self.has_table(table_name, schema): if if_exists == 'replace': table_query = self._compute_query_from_table( table_name, schema) table_columns = self._get_query_columns_info(table_query) if self._compare_columns(df_columns, table_columns): # Equal columns: truncate table self._truncate_table(table_name, schema, cartodbfy) else: # Diff columns: truncate table and drop + add columns self._truncate_and_drop_add_columns( table_name, schema, df_columns, table_columns, cartodbfy) elif if_exists == 'fail': raise Exception( 'Table "{schema}.{table_name}" already exists in your CARTO account. ' 'Please choose a different `table_name` or use ' 'if_exists="replace" to overwrite it.'.format( table_name=table_name, schema=schema)) else: # 'append' pass else: self._create_table_from_columns(table_name, schema, df_columns, cartodbfy) self._copy_from(gdf, table_name, df_columns, retry_times) return table_name def create_table_from_query(self, query, table_name, if_exists, cartodbfy=True): schema = self.get_schema() table_name = self.normalize_table_name(table_name) if self.has_table(table_name, schema): if if_exists == 'replace': # TODO: review logic copy_from self._drop_create_table_from_query(table_name, schema, query, cartodbfy) elif if_exists == 'fail': raise Exception( 'Table "{schema}.{table_name}" already exists in your CARTO account. ' 'Please choose a different `table_name` or use ' 'if_exists="replace" to overwrite it.'.format( table_name=table_name, schema=schema)) else: # 'append' pass else: self._drop_create_table_from_query(table_name, schema, query, cartodbfy) return table_name def list_tables(self, schema=None): datasets = DatasetManager(self.auth_client).filter( show_table_size_and_row_count='false', show_table='false', show_stats='false', show_likes='false', show_liked='false', show_permission='false', show_uses_builder_features='false', show_synchronization='false', load_totals='false') datasets.sort(key=lambda x: x.updated_at, reverse=True) return pd.DataFrame([dataset.name for dataset in datasets], columns=['tables']) def has_table(self, table_name, schema=None): query = self.compute_query(table_name, schema) return self._check_exists(query) def delete_table(self, table_name): query = _drop_table_query(table_name) output = self.execute_query(query) return not ('notices' in output and 'does not exist' in output['notices'][0]) def rename_table(self, table_name, new_table_name, if_exists='fail'): new_table_name = self.normalize_table_name(new_table_name) if table_name == new_table_name: raise ValueError( 'Table names are equal. Please choose a different table name.') if not self.has_table(table_name): raise Exception( 'Table "{table_name}" does not exist in your CARTO account.'. format(table_name=table_name)) if self.has_table(new_table_name): if if_exists == 'replace': log.debug('Removing table "{}"'.format(new_table_name)) self.delete_table(new_table_name) elif if_exists == 'fail': raise Exception( 'Table "{new_table_name}" already exists in your CARTO account. ' 'Please choose a different `new_table_name` or use ' 'if_exists="replace" to overwrite it.'.format( new_table_name=new_table_name)) self._rename_table(table_name, new_table_name) return new_table_name def update_privacy_table(self, table_name, privacy=None): DatasetInfo(self.auth_client, table_name).update_privacy(privacy) def get_privacy(self, table_name): return DatasetInfo(self.auth_client, table_name).privacy def get_schema(self): """Get user schema from current credentials""" query = 'SELECT current_schema()' result = self.execute_query(query, do_post=False) schema = result['rows'][0]['current_schema'] log.debug('schema: {}'.format(schema)) return schema def get_geom_type(self, query): """Fetch geom type of a remote table or query""" distict_query = ''' SELECT distinct ST_GeometryType(the_geom) AS geom_type FROM ({}) q LIMIT 5 '''.format(query) response = self.execute_query(distict_query, do_post=False) if response and response.get('rows') and len(response.get('rows')) > 0: st_geom_type = response.get('rows')[0].get('geom_type') if st_geom_type: return map_geom_type(st_geom_type[3:]) return None def get_num_rows(self, query): """Get the number of rows in the query""" result = self.execute_query( 'SELECT COUNT(*) FROM ({query}) _query'.format(query=query)) return result.get('rows')[0].get('count') def get_bounds(self, query): extent_query = ''' SELECT ARRAY[ ARRAY[st_xmin(geom_env), st_ymin(geom_env)], ARRAY[st_xmax(geom_env), st_ymax(geom_env)] ] bounds FROM ( SELECT ST_Extent(the_geom) geom_env FROM ({}) q ) q; '''.format(query) response = self.execute_query(extent_query, do_post=False) if response and response.get('rows') and len(response.get('rows')) > 0: return response.get('rows')[0].get('bounds') return None def get_column_names(self, source, schema=None, exclude=None): query = self.compute_query(source, schema) columns = [c.name for c in self._get_query_columns_info(query)] if exclude and isinstance(exclude, list): columns = list(set(columns) - set(exclude)) return columns def is_public(self, query): # Used to detect public tables in queries in the publication, # because privacy only works for tables. public_auth_client = _create_auth_client(self.credentials, public=True) public_sql_client = SQLClient(public_auth_client) exists_query = 'EXPLAIN {}'.format(query) try: public_sql_client.send(exists_query, do_post=False) return True except CartoException: return False def get_table_names(self, query): # Used to detect tables in queries in the publication. query = 'SELECT CDB_QueryTablesText($q${}$q$) as tables'.format(query) result = self.execute_query(query) tables = [] if result['total_rows'] > 0 and result['rows'][0]['tables']: # Dataset_info only works with tables without schema tables = [ table.split('.')[1] if '.' in table else table for table in result['rows'][0]['tables'] ] return tables def _compare_columns(self, a, b): a_copy = [i for i in a if _not_reserved(i.name)] b_copy = [i for i in b if _not_reserved(i.name)] a_copy.sort() b_copy.sort() return a_copy == b_copy def _drop_create_table_from_query(self, table_name, schema, query, cartodbfy): log.debug('DROP + CREATE table "{}"'.format(table_name)) query = 'BEGIN; {drop}; {create}; {cartodbfy}; COMMIT;'.format( drop=_drop_table_query(table_name), create=_create_table_from_query_query(table_name, query), cartodbfy=_cartodbfy_query(table_name, schema) if cartodbfy else '') self.execute_long_running_query(query) def _create_table_from_columns(self, table_name, schema, columns, cartodbfy): log.debug('CREATE table "{}"'.format(table_name)) query = 'BEGIN; {create}; {cartodbfy}; COMMIT;'.format( create=_create_table_from_columns_query(table_name, columns), cartodbfy=_cartodbfy_query(table_name, schema) if cartodbfy else '') self.execute_long_running_query(query) def _truncate_table(self, table_name, schema, cartodbfy): log.debug('TRUNCATE table "{}"'.format(table_name)) query = 'BEGIN; {truncate}; {cartodbfy}; COMMIT;'.format( truncate=_truncate_table_query(table_name), cartodbfy=_cartodbfy_query(table_name, schema) if cartodbfy else '') self.execute_long_running_query(query) def _truncate_and_drop_add_columns(self, table_name, schema, df_columns, table_columns, cartodbfy): log.debug( 'TRUNCATE AND DROP + ADD columns table "{}"'.format(table_name)) query = '{regenerate}; BEGIN; {truncate}; {drop_columns}; {add_columns}; {cartodbfy}; COMMIT;'.format( regenerate=_regenerate_table_query(table_name, schema) if self._check_regenerate_table_exists() else '', truncate=_truncate_table_query(table_name), drop_columns=_drop_columns_query(table_name, table_columns), add_columns=_add_columns_query(table_name, df_columns), cartodbfy=_cartodbfy_query(table_name, schema) if cartodbfy else '') self.execute_long_running_query(query) def compute_query(self, source, schema=None): if is_sql_query(source): return source schema = schema or self.get_schema() return self._compute_query_from_table(source, schema) def _compute_query_from_table(self, table_name, schema): return 'SELECT * FROM "{schema}"."{table_name}"'.format( schema=schema or 'public', table_name=table_name) def _check_exists(self, query): exists_query = 'EXPLAIN {}'.format(query) try: self.execute_query(exists_query, do_post=False) return True except CartoException: return False def _check_regenerate_table_exists(self): query = ''' SELECT 1 FROM pg_catalog.pg_proc p LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace WHERE p.proname = 'cdb_regeneratetable' AND n.nspname = 'cartodb'; ''' result = self.execute_query(query) return len(result['rows']) > 0 def _get_query_columns_info(self, query): query = 'SELECT * FROM ({}) _q LIMIT 0'.format(query) table_info = self.execute_query(query) return get_query_columns_info(table_info['fields']) def _get_copy_query(self, query, columns, limit): query_columns = [ double_quote(column.name) for column in columns if (column.name != 'the_geom_webmercator') ] query = 'SELECT {columns} FROM ({query}) _q'.format( query=query, columns=','.join(query_columns)) if limit is not None: if isinstance(limit, int) and (limit >= 0): query += ' LIMIT {limit}'.format(limit=limit) else: raise ValueError("`limit` parameter must an integer >= 0") return query @retry_copy def _copy_to(self, query, columns, retry_times=DEFAULT_RETRY_TIMES): log.debug('COPY TO') copy_query = "COPY ({0}) TO stdout WITH (FORMAT csv, HEADER true, NULL '{1}')".format( query, PG_NULL) raw_result = self.copy_client.copyto_stream(copy_query) converters = obtain_converters(columns) parse_dates = date_columns_names(columns) df = pd.read_csv(raw_result, converters=converters, parse_dates=parse_dates) return df @retry_copy def _copy_from(self, dataframe, table_name, columns, retry_times=DEFAULT_RETRY_TIMES): log.debug('COPY FROM') query = """ COPY {table_name}({columns}) FROM stdin WITH (FORMAT csv, DELIMITER '|', NULL '{null}'); """.format(table_name=table_name, null=PG_NULL, columns=','.join( double_quote(column.dbname) for column in columns)).strip() data = _compute_copy_data(dataframe, columns) self.copy_client.copyfrom(query, data) def _rename_table(self, table_name, new_table_name): query = _rename_table_query(table_name, new_table_name) self.execute_query(query) def normalize_table_name(self, table_name): norm_table_name = normalize_name(table_name) if norm_table_name != table_name: log.debug('Table name normalized: "{}"'.format(norm_table_name)) return norm_table_name
help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # Batch SQL API operations if args.operation == 'create': # create a batch api job createJob = batchSQLClient.create(args.query) for a, b in createJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'read': readJob = batchSQLClient.read(args.job_id) for a, b in readJob.items(): logger.info('{key}: {value}'.format(key=a, value=b))
class CARTOUser(object): def __init__(self, user_name=None, org_name=None, api_url=None, api_key=None, check_ssl=True): self.user_name = user_name self.org_name = org_name self.api_url = api_url self.api_key = api_key if not check_ssl: old_request = requests.Session.request requests.Session.request = partialmethod(old_request, verify=False) warnings.filterwarnings('ignore', 'Unverified HTTPS request') def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client) self.copy_client = CopySQLClient(self.client) def execute_sql(self, query, parse_json=True, format=None, do_post=False): try: try: self.client except AttributeError: self.initialize() return self.sql_client.send(query, parse_json=parse_json, format=format, do_post=do_post) except CartoException as e: raise Exception(e.args[0].args[0][0]) def batch_check(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.read(job_id) def batch_create(self, query): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.create(query) def batch_cancel(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.cancel(job_id) def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client) def get_sync_manager(self): try: self.sql_client except AttributeError: self.initialize() return SyncTableJobManager(self.client) def upload(self, uri, sync_time=None): try: self.sql_client except AttributeError: self.initialize() dataset_manager = DatasetManager(self.client) if sync_time: return dataset_manager.create(uri, sync_time) else: return dataset_manager.create(uri) def copy_from(self, path, query, tablename=None, delimiter=','): try: self.copy_client except AttributeError: self.initialize() if tablename is None: tablename = Path(path).stem if query is None: with open(path, 'rb') as myfile: headers = next(myfile).strip().decode('utf8') query = f"""COPY {tablename} ({headers}) FROM stdin (FORMAT CSV, DELIMITER '{delimiter}', HEADER false, QUOTE '"')""" return self.copy_client.copyfrom_file_object(query, myfile) return self.copy_client.copyfrom_file_path(query, path) def copy_to(self, query, output, delimiter=','): try: self.copy_client except AttributeError: self.initialize() copy_query = f"""COPY ({query}) TO stdout WITH (FORMAT CSV, DELIMITER '{delimiter}', HEADER true, QUOTE '"')""" return self.copy_client.copyto_file_path(copy_query, output)
class CARTOUser(object): def __init__(self, user_name=None, org_name=None, api_url=None, api_key=None, check_ssl=True): self.user_name = user_name self.org_name = org_name self.api_url = api_url self.api_key = api_key if not check_ssl: old_request = requests.Session.request requests.Session.request = partialmethod(old_request, verify=False) warnings.filterwarnings('ignore', 'Unverified HTTPS request') def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client) def execute_sql(self, query, parse_json=True, format=None, do_post=False): try: try: self.client except AttributeError: self.initialize() return self.sql_client.send(query, parse_json=parse_json, format=format, do_post=do_post) except CartoException as e: raise Exception(e.args[0].args[0][0]) def batch_check(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.read(job_id) def batch_create(self, query): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.create(query) def batch_cancel(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.cancel(job_id) def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client) def get_sync_manager(self): try: self.sql_client except AttributeError: self.initialize() return SyncTableJobManager(self.client) def upload(self, uri, sync_time=None): try: self.sql_client except AttributeError: self.initialize() dataset_manager = DatasetManager(self.client) if sync_time: return dataset_manager.create(uri, sync_time) else: return dataset_manager.create(uri)
parser.add_argument('--organization', type=str, dest='organization', default=os.environ['CARTO_ORG'] if 'CARTO_ORG' in os.environ else '', help='Set the name of the organization' + ' account (defaults to env variable CARTO_ORG)') parser.add_argument('--base_url', type=str, dest='CARTO_BASE_URL', default=os.environ['CARTO_API_URL'] if 'CARTO_API_URL' in os.environ else '', help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) job = batchSQLClient.create_and_wait_for_completion(args.query) logger.info('Job finished with status {status}'.format(status=job['status']))
' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'], help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) # Batch SQL API operations if args.operation == 'create': # create a batch api job createJob = batchSQLClient.create(args.query) for a, b in createJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'read': readJob = batchSQLClient.read(args.job_id) for a, b in readJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'update': updateJob = batchSQLClient.update(args.job_id, args.query) for a, b in updateJob.items(): logger.info('{key}: {value}'.format(key=a, value=b))
dest='CARTO_BASE_URL', default=os.environ['CARTO_API_URL'] if 'CARTO_API_URL' in os.environ else '', help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) else: logger.error( 'You need to provide valid credentials, run with -h parameter for details' ) import sys sys.exit(1) job = batchSQLClient.create_and_wait_for_completion(args.query) logger.info('Job finished with status {status}'.format(status=job['status']))
class ContextManager: def __init__(self, credentials): self.credentials = credentials or get_default_credentials() check_credentials(self.credentials) self.auth_client = _create_auth_client(self.credentials) self.sql_client = SQLClient(self.auth_client) self.copy_client = CopySQLClient(self.auth_client) self.batch_sql_client = BatchSQLClient(self.auth_client) def execute_query(self, query, parse_json=True, do_post=True, format=None, **request_args): return self.sql_client.send(query.strip(), parse_json, do_post, format, **request_args) def execute_long_running_query(self, query): return self.batch_sql_client.create_and_wait_for_completion( query.strip()) def copy_to(self, source, schema, limit=None, retry_times=DEFAULT_RETRY_TIMES): query = self.compute_query(source, schema) columns = self._get_query_columns_info(query) copy_query = self._get_copy_query(query, columns, limit) return self._copy_to(copy_query, columns, retry_times) def copy_from(self, gdf, table_name, if_exists='fail', cartodbfy=True): schema = self.get_schema() table_name = self.normalize_table_name(table_name) columns = get_dataframe_columns_info(gdf) if if_exists == 'replace' or not self.has_table(table_name, schema): log.debug('Creating table "{}"'.format(table_name)) self._create_table_from_columns(table_name, columns, schema, cartodbfy) elif if_exists == 'fail': raise Exception( 'Table "{schema}.{table_name}" already exists in your CARTO account. ' 'Please choose a different `table_name` or use ' 'if_exists="replace" to overwrite it.'.format( table_name=table_name, schema=schema)) else: # 'append' pass self._copy_from(gdf, table_name, columns) return table_name def create_table_from_query(self, query, table_name, if_exists, cartodbfy=True): schema = self.get_schema() table_name = self.normalize_table_name(table_name) if if_exists == 'replace' or not self.has_table(table_name, schema): log.debug('Creating table "{}"'.format(table_name)) self._create_table_from_query(query, table_name, schema, cartodbfy) elif if_exists == 'fail': raise Exception( 'Table "{schema}.{table_name}" already exists in your CARTO account. ' 'Please choose a different `table_name` or use ' 'if_exists="replace" to overwrite it.'.format( table_name=table_name, schema=schema)) else: # 'append' pass return table_name def has_table(self, table_name, schema=None): query = self.compute_query(table_name, schema) return self._check_exists(query) def delete_table(self, table_name): query = _drop_table_query(table_name) output = self.execute_query(query) return not ('notices' in output and 'does not exist' in output['notices'][0]) def rename_table(self, table_name, new_table_name, if_exists='fail'): new_table_name = self.normalize_table_name(new_table_name) if table_name == new_table_name: raise ValueError( 'Table names are equal. Please choose a different table name.') if not self.has_table(table_name): raise Exception( 'Table "{table_name}" does not exist in your CARTO account.'. format(table_name=table_name)) if self.has_table(new_table_name): if if_exists == 'replace': log.debug('Removing table "{}"'.format(new_table_name)) self.delete_table(new_table_name) elif if_exists == 'fail': raise Exception( 'Table "{new_table_name}" already exists in your CARTO account. ' 'Please choose a different `new_table_name` or use ' 'if_exists="replace" to overwrite it.'.format( new_table_name=new_table_name)) self._rename_table(table_name, new_table_name) return new_table_name def update_privacy_table(self, table_name, privacy=None): DatasetInfo(self.auth_client, table_name).update_privacy(privacy) def get_privacy(self, table_name): return DatasetInfo(self.auth_client, table_name).privacy def get_schema(self): """Get user schema from current credentials""" query = 'SELECT current_schema()' result = self.execute_query(query, do_post=False) return result['rows'][0]['current_schema'] def get_geom_type(self, query): """Fetch geom type of a remote table or query""" distict_query = ''' SELECT distinct ST_GeometryType(the_geom) AS geom_type FROM ({}) q LIMIT 5 '''.format(query) response = self.execute_query(distict_query, do_post=False) if response and response.get('rows') and len(response.get('rows')) > 0: st_geom_type = response.get('rows')[0].get('geom_type') if st_geom_type: return map_geom_type(st_geom_type[3:]) return None def get_num_rows(self, query): """Get the number of rows in the query""" result = self.execute_query( "SELECT COUNT(*) FROM ({query}) _query".format(query=query)) return result.get('rows')[0].get('count') def get_bounds(self, query): extent_query = ''' SELECT ARRAY[ ARRAY[st_xmin(geom_env), st_ymin(geom_env)], ARRAY[st_xmax(geom_env), st_ymax(geom_env)] ] bounds FROM ( SELECT ST_Extent(the_geom) geom_env FROM ({}) q ) q; '''.format(query) response = self.execute_query(extent_query, do_post=False) if response and response.get('rows') and len(response.get('rows')) > 0: return response.get('rows')[0].get('bounds') return None def get_column_names(self, source, schema=None, exclude=None): query = self.compute_query(source, schema) columns = [c.name for c in self._get_query_columns_info(query)] if exclude and isinstance(exclude, list): columns = list(set(columns) - set(exclude)) return columns def is_public(self, query): # Used to detect public tables in queries in the publication, # because privacy only works for tables. public_auth_client = _create_auth_client(self.credentials, public=True) public_sql_client = SQLClient(public_auth_client) exists_query = 'EXPLAIN {}'.format(query) try: public_sql_client.send(exists_query, do_post=False) return True except CartoException: return False def get_table_names(self, query): # Used to detect tables in queries in the publication. query = 'SELECT CDB_QueryTablesText(\'{}\') as tables'.format(query) result = self.execute_query(query) tables = [] if result['total_rows'] > 0 and result['rows'][0]['tables']: # Dataset_info only works with tables without schema tables = [ table.split('.')[1] if '.' in table else table for table in result['rows'][0]['tables'] ] return tables def _create_table_from_query(self, query, table_name, schema, cartodbfy=True): query = 'BEGIN; {drop}; {create}; {cartodbfy}; COMMIT;'.format( drop=_drop_table_query(table_name), create=_create_table_from_query_query(table_name, query), cartodbfy=_cartodbfy_query(table_name, schema) if cartodbfy else '') self.execute_long_running_query(query) def _create_table_from_columns(self, table_name, columns, schema, cartodbfy=True): query = 'BEGIN; {drop}; {create}; {cartodbfy}; COMMIT;'.format( drop=_drop_table_query(table_name), create=_create_table_from_columns_query(table_name, columns), cartodbfy=_cartodbfy_query(table_name, schema) if cartodbfy else '') self.execute_long_running_query(query) def compute_query(self, source, schema=None): if is_sql_query(source): return source schema = schema or self.get_schema() return self._compute_query_from_table(source, schema) def _compute_query_from_table(self, table_name, schema): return 'SELECT * FROM "{schema}"."{table_name}"'.format( schema=schema or 'public', table_name=table_name) def _check_exists(self, query): exists_query = 'EXPLAIN {}'.format(query) try: self.execute_query(exists_query, do_post=False) return True except CartoException: return False def _get_query_columns_info(self, query): query = 'SELECT * FROM ({}) _q LIMIT 0'.format(query) table_info = self.execute_query(query) return Column.from_sql_api_fields(table_info['fields']) def _get_copy_query(self, query, columns, limit): query_columns = [ column.name for column in columns if (column.name != 'the_geom_webmercator') ] query = 'SELECT {columns} FROM ({query}) _q'.format( query=query, columns=','.join(query_columns)) if limit is not None: if isinstance(limit, int) and (limit >= 0): query += ' LIMIT {limit}'.format(limit=limit) else: raise ValueError("`limit` parameter must an integer >= 0") return query def _copy_to(self, query, columns, retry_times): copy_query = 'COPY ({0}) TO stdout WITH (FORMAT csv, HEADER true, NULL \'{1}\')'.format( query, PG_NULL) try: raw_result = self.copy_client.copyto_stream(copy_query) except CartoRateLimitException as err: if retry_times > 0: retry_times -= 1 warn('Read call rate limited. Waiting {s} seconds'.format( s=err.retry_after)) time.sleep(err.retry_after) warn('Retrying...') return self._copy_to(query, columns, retry_times) else: warn(( 'Read call was rate-limited. ' 'This usually happens when there are multiple queries being read at the same time.' )) raise err converters = obtain_converters(columns) parse_dates = date_columns_names(columns) df = read_csv(raw_result, converters=converters, parse_dates=parse_dates) return df def _copy_from(self, dataframe, table_name, columns): query = """ COPY {table_name}({columns}) FROM stdin WITH (FORMAT csv, DELIMITER '|', NULL '{null}'); """.format(table_name=table_name, null=PG_NULL, columns=','.join(column.dbname for column in columns)).strip() data = _compute_copy_data(dataframe, columns) self.copy_client.copyfrom(query, data) def _rename_table(self, table_name, new_table_name): query = _rename_table_query(table_name, new_table_name) self.execute_query(query) def normalize_table_name(self, table_name): norm_table_name = normalize_name(table_name) if norm_table_name != table_name: log.debug('Table name normalized: "{}"'.format(norm_table_name)) return norm_table_name