def test_table(api_key_auth_client_usr): batch_client = BatchSQLClient(api_key_auth_client_usr) job = batch_client.create(SETUP_QUERIES) while not job['status'] in BATCH_TERMINAL_STATES: time.sleep(1) job = batch_client.read(job['job_id']) assert job['status'] == 'done'
def batchsql(self, list_of_sqls, checkevery=1, maxtime=5): # pass in a list of sqls to execute # probably dont want to read a file with a million records and upload # open streets for ex is 1k rows, 500KB batchSQLClient = BatchSQLClient(self.auth_client) createJob = batchSQLClient.create(list_of_sqls) # https://github.com/CartoDB/carto-python # job_id looks like # 5171b8c4-8c03-4610-8797-5dd98ff3e61b # job looks like # { # 'user': '******', # 'status': 'done', # 'query': [{'query': 'drop table if exists foo', 'status': 'done'}, # {'query': 'create table foo (bar text)', 'status': 'done'}, # {...} {...}], # 'created_at': '2020-07-02T16:31:31.873Z', # 'updated_at': '2020-07-02T16:31:31.996Z', # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b' # } # queries are nested because you can add more sets to a running job readJob = batchSQLClient.read(createJob['job_id']) cheks = 0 while (readJob['status'] != 'done'): time.sleep(checkevery) readJob = batchSQLClient.read(createJob['job_id']) cheks += 1 if cheks > maxtime: return False return True
class BatchJobStatus(object): """Status of a write or query operation. Read more at `Batch SQL API docs <https://carto.com/docs/carto-engine/sql-api/batch-queries/>`__ about responses and how to interpret them. Example: Poll for a job's status if you've caught the :py:class:`BatchJobStatus` instance. .. code:: python import time job = cc.write(df, 'new_table', lnglat=('lng_col', 'lat_col')) while True: curr_status = job.status()['status'] if curr_status in ('done', 'failed', 'canceled', 'unknown', ): print(curr_status) break time.sleep(5) Create a :py:class:`BatchJobStatus` instance if you have a `job_id` output from a :py:meth:`CartoContext.write <cartoframes.context.CartoContext.write>` operation. .. code:: python >>> from cartoframes import CartoContext, BatchJobStatus >>> cc = CartoContext(username='******', api_key='...') >>> cc.write(df, 'new_table', lnglat=('lng', 'lat')) 'BatchJobStatus(job_id='job-id-string', ...)' >>> batch_job = BatchJobStatus(cc, 'job-id-string') Attributes: job_id (str): Job ID of the Batch SQL API job last_status (str): Status of ``job_id`` job when last polled created_at (str): Time and date when job was created Args: carto_context (:py:class:`CartoContext <cartoframes.context.CartoContext>`): :py:class:`CartoContext <cartoframes.context.CartoContext>` instance job (dict or str): If a dict, job status dict returned after sending a Batch SQL API request. If str, a Batch SQL API job id. """ def __init__(self, carto_context, job): if isinstance(job, dict): self.job_id = job.get('job_id') self.last_status = job.get('status') self.created_at = job.get('created_at') elif isinstance(job, str): self.job_id = job self.last_status = None self.created_at = None self._batch_client = BatchSQLClient(carto_context.auth_client) def __repr__(self): return ('BatchJobStatus(job_id=\'{job_id}\', ' 'last_status=\'{status}\', ' 'created_at=\'{created_at}\')'.format( job_id=self.job_id, status=self.last_status, created_at=self.created_at)) def _set_status(self, curr_status): self.last_status = curr_status def get_status(self): """return current status of job""" return self.last_status def status(self): """Checks the current status of job ``job_id`` Returns: dict: Status and time it was updated Warns: UserWarning: If the job failed, a warning is raised with information about the failure """ resp = self._batch_client.read(self.job_id) if 'failed_reason' in resp: warn('Job failed: {}'.format(resp.get('failed_reason'))) self._set_status(resp.get('status')) return dict(status=resp.get('status'), updated_at=resp.get('updated_at'), created_at=resp.get('created_at'))
class CARTOUser(object): def __init__(self, user_name=None, org_name=None, api_url=None, api_key=None, check_ssl=True): self.user_name = user_name self.org_name = org_name self.api_url = api_url self.api_key = api_key if not check_ssl: old_request = requests.Session.request requests.Session.request = partialmethod(old_request, verify=False) warnings.filterwarnings('ignore', 'Unverified HTTPS request') def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client) def execute_sql(self, query, parse_json=True, format=None, do_post=False): try: try: self.client except AttributeError: self.initialize() return self.sql_client.send(query, parse_json=parse_json, format=format, do_post=do_post) except CartoException as e: raise Exception(e.args[0].args[0][0]) def batch_check(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.read(job_id) def batch_create(self, query): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.create(query) def batch_cancel(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.cancel(job_id) def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client) def get_sync_manager(self): try: self.sql_client except AttributeError: self.initialize() return SyncTableJobManager(self.client) def upload(self, uri, sync_time=None): try: self.sql_client except AttributeError: self.initialize() dataset_manager = DatasetManager(self.client) if sync_time: return dataset_manager.create(uri, sync_time) else: return dataset_manager.create(uri)
' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) # Batch SQL API operations if args.operation == 'create': # create a batch api job createJob = batchSQLClient.create(args.query) for a, b in createJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'read': readJob = batchSQLClient.read(args.job_id) for a, b in readJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'update': updateJob = batchSQLClient.update(args.job_id, args.query) for a, b in updateJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'cancel': cancelJob = batchSQLClient.cancel(args.job_id) for a, b in cancelJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) else: logger.info("You have not written a correct operation option")
class CARTOUser(object): def __init__(self, user_name=None, org_name=None, api_url=None, api_key=None, check_ssl=True): self.user_name = user_name self.org_name = org_name self.api_url = api_url self.api_key = api_key if not check_ssl: old_request = requests.Session.request requests.Session.request = partialmethod(old_request, verify=False) warnings.filterwarnings('ignore', 'Unverified HTTPS request') def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client) self.copy_client = CopySQLClient(self.client) def execute_sql(self, query, parse_json=True, format=None, do_post=False): try: try: self.client except AttributeError: self.initialize() return self.sql_client.send(query, parse_json=parse_json, format=format, do_post=do_post) except CartoException as e: raise Exception(e.args[0].args[0][0]) def batch_check(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.read(job_id) def batch_create(self, query): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.create(query) def batch_cancel(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.cancel(job_id) def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client) def get_sync_manager(self): try: self.sql_client except AttributeError: self.initialize() return SyncTableJobManager(self.client) def upload(self, uri, sync_time=None): try: self.sql_client except AttributeError: self.initialize() dataset_manager = DatasetManager(self.client) if sync_time: return dataset_manager.create(uri, sync_time) else: return dataset_manager.create(uri) def copy_from(self, path, query, tablename=None, delimiter=','): try: self.copy_client except AttributeError: self.initialize() if tablename is None: tablename = Path(path).stem if query is None: with open(path, 'rb') as myfile: headers = next(myfile).strip().decode('utf8') query = f"""COPY {tablename} ({headers}) FROM stdin (FORMAT CSV, DELIMITER '{delimiter}', HEADER false, QUOTE '"')""" return self.copy_client.copyfrom_file_object(query, myfile) return self.copy_client.copyfrom_file_path(query, path) def copy_to(self, query, output, delimiter=','): try: self.copy_client except AttributeError: self.initialize() copy_query = f"""COPY ({query}) TO stdout WITH (FORMAT CSV, DELIMITER '{delimiter}', HEADER true, QUOTE '"')""" return self.copy_client.copyto_file_path(copy_query, output)
# Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # Batch SQL API operations if args.operation == 'create': # create a batch api job createJob = batchSQLClient.create(args.query) for a, b in createJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'read': readJob = batchSQLClient.read(args.job_id) for a, b in readJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'update': updateJob = batchSQLClient.update(args.job_id, args.query) for a, b in updateJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'cancel': cancelJob = batchSQLClient.cancel(args.job_id) for a, b in cancelJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) else: logger.info("You have not written a correct operation option")
class UploadJob(object): def __init__(self, csv_file_path, **kwargs): self.__set_max_csv_length() self.__set_defaults() for key, value in kwargs.items(): try: setattr(self, key, int(value)) except (ValueError, TypeError): if value in ("true", "True"): setattr(self, key, True) elif value in ("false", "False"): setattr(self, key, False) else: setattr(self, key, value) self.__trim_columns() self.csv_file_path = csv_file_path if self.api_key: self.api_auth = APIKeyAuthClient(self.base_url, self.api_key) self.sql = SQLClient(self.api_auth) self.bsql = BatchSQLClient(self.api_auth) def __set_defaults(self): self.delimiter = DEFAULT_DELIMITER self.x_column = DEFAULT_X_COLUMN self.y_column = DEFAULT_Y_COLUMN self.srid = DEFAULT_SRID self.chunk_size = DEFAULT_CHUNK_SIZE self.max_attempts = DEFAULT_MAX_ATTEMPTS self.file_encoding = DEFAULT_FILE_ENCOFING self.force_no_geometry = DEFAULT_FORCE_NO_GEOMETRY self.force_the_geom = DEFAULT_FORCE_THE_GEOM self.date_format = DEFAULT_DATE_FORMAT self.datetime_format = DEFAULT_DATETIME_FORMAT self.float_comma_separator = DEFAULT_FLOAT_COMMA_SEPARATOR self.float_thousand_separator = DEFAULT_FLOAT_THOUSAND_SEPARATOR self.date_columns = DEFAULT_DATE_COLUMNS self.observer = None def __set_max_csv_length(self): maxInt = sys.maxsize decrement = True while decrement: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. decrement = False try: csv.field_size_limit(maxInt) except OverflowError: maxInt = int(maxInt / 10) decrement = True def __trim_columns(self): if self.columns is not None: self.columns = self.columns.replace(' ', '') if self.date_columns is not None: self.date_columns = self.date_columns.replace(' ', '') def run(self, start_chunk=1, end_chunk=None): if not isinstance(self.csv_file_path, str): self.do_run(self.csv_file_path, start_chunk, end_chunk) else: if sys.version_info <= (3, 0): with open(self.csv_file_path) as f: self.do_run(f, start_chunk, end_chunk) else: with open(self.csv_file_path, encoding=self.file_encoding) as f: self.do_run(f, start_chunk, end_chunk) def notify(self, message_type, message): observer = getattr(self, "observer", None) if callable(observer): observer({"type": message_type, "msg": str(message)}) return True return False def regenerate_overviews(self): query = 'select CDB_CreateOverviews(\'{table}\'::regclass)'.\ format(table=self.table_name) job_result = self.bsql.create(query) return job_result['job_id'] def check_job(self, job_id): return self.bsql.read(job_id) def create_geom_query(self, record): null_result = NULL_VALUE + "," if self.force_the_geom: return self.parse_column_value(record, self.force_the_geom, parse_float=False) if self.force_no_geometry: return null_result longitude = self.get_longitude(record) latitude = self.get_latitude(record) if longitude is None or latitude is None \ or longitude is DEFAULT_COORD or latitude is DEFAULT_COORD: return null_result return "st_transform(st_setsrid(st_makepoint(" + \ "{longitude}, {latitude}), {srid}), 4326),".\ format(longitude=longitude, latitude=latitude, srid=self.srid) def parse_column_value(self, record, column, parse_float=True): null_result = NULL_VALUE + "," try: value = self.escape_value(record[column]) except Exception: return null_result try: if self.is_date_column(column): try: result = "'{value}',".format( value=self.parse_date_column(record, column)) except ValueError: result = null_result elif parse_float: result = "{value},".format(value=self.parse_float_value(value)) else: raise TypeError except (ValueError, TypeError): if value is None or not value.strip(): result = null_result else: result = "'{value}',".format(value=value) return result def is_date_column(self, column): return column is not None and self.date_columns is not None and column in self.date_columns.split( ',') def parse_date_column(self, record, column): if not self.date_format or not self.datetime_format: raise ValueError try: return datetime.strptime( record[column], self.datetime_format).strftime(CARTO_DATE_FORMAT) except Exception: try: return datetime.strptime( record[column], self.date_format).strftime(CARTO_DATE_FORMAT) except Exception: raise ValueError def escape_value(self, value): return value.replace("'", "''") def get_longitude(self, record): try: longitude = self.get_coord(record, self.x_column) if abs(longitude) > MAX_LON: return None except TypeError: return DEFAULT_COORD else: return longitude def get_latitude(self, record): try: latitude = self.get_coord(record, self.y_column) if abs(latitude) > MAX_LAT: return None except TypeError: return DEFAULT_COORD else: return latitude def get_coord(self, record, type): try: coord = self.parse_float_value(record[type]) or DEFAULT_COORD except (ValueError, KeyError): coord = DEFAULT_COORD else: return coord def parse_float_value(self, value): if value.upper() in (val.upper() for val in FORBIDDEN_FLOAT_VALUES): raise ValueError if self.float_thousand_separator: value = value.replace(self.float_thousand_separator, "") if self.float_comma_separator: value = value.replace(self.float_comma_separator, ".") return float(value) def send(self, query, file_encoding, chunk_num): if sys.version_info <= (3, 0): query = query.decode(file_encoding).encode(UTF8) logger.debug("Chunk #{chunk_num}: {query}".format( chunk_num=(chunk_num + 1), query=query)) for retry in range(self.max_attempts): try: self.sql.send(query) except Exception as e: logger.warning( "Chunk #{chunk_num}: Retrying ({error_msg})".format( chunk_num=(chunk_num + 1), error_msg=e)) self.notify('error', e) else: logger.info("Chunk #{chunk_num}: Success!".format( chunk_num=(chunk_num + 1))) self.notify('progress', chunk_num + 1) break else: logger.error("Chunk #{chunk_num}: Failed!)".format( chunk_num=(chunk_num + 1))) self.notify('error', "Failed " + str(chunk_num + 1))