def test_batchjobstatus_methods(self): """context.BatchJobStatus methods""" from cartoframes.context import BatchJobStatus from carto.sql import BatchSQLClient cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) batch_client = BatchSQLClient(cc.auth_client) job_response = batch_client.create([ 'select 1', ]) job_status = BatchJobStatus(cc, job_response) possible_status = ( 'pending', 'running', 'done', 'canceled', 'unknown', ) self.assertTrue(job_status.get_status() in possible_status) job_status._set_status('foo') self.assertEqual(job_status.get_status(), 'foo') new_status = job_status.status() self.assertSetEqual(set(new_status.keys()), {'status', 'updated_at', 'created_at'}) # job_id as str str_bjs = BatchJobStatus(cc, 'foo') self.assertIsNone(str_bjs.get_status()) self.assertEqual(str_bjs.job_id, 'foo')
def test_table(api_key_auth_client_usr): batch_client = BatchSQLClient(api_key_auth_client_usr) job = batch_client.create(SETUP_QUERIES) while not job['status'] in BATCH_TERMINAL_STATES: time.sleep(1) job = batch_client.read(job['job_id']) assert job['status'] == 'done'
def test_batch_multi_sql(api_key_auth_client_usr): sql = BatchSQLClient(api_key_auth_client_usr) # Create query data = sql.create(BATCH_SQL_MULTI_QUERY) # Get job ID job_id = data['job_id'] # Cancel if not finished cancel_job_if_not_finished(sql, job_id)
def test_batch_multi_sql(api_key_auth_client_usr): sql = BatchSQLClient(api_key_auth_client_usr) # Create query data = sql.create(BATCH_SQL_MULTI_QUERY) # Update status job_id = data['job_id'] # Cancel if not finished try: confirmation = sql.cancel(job_id) except CartoException: pass else: assert confirmation == 'cancelled'
def batchsql(self, list_of_sqls, checkevery=1, maxtime=5): # pass in a list of sqls to execute # probably dont want to read a file with a million records and upload # open streets for ex is 1k rows, 500KB batchSQLClient = BatchSQLClient(self.auth_client) createJob = batchSQLClient.create(list_of_sqls) # https://github.com/CartoDB/carto-python # job_id looks like # 5171b8c4-8c03-4610-8797-5dd98ff3e61b # job looks like # { # 'user': '******', # 'status': 'done', # 'query': [{'query': 'drop table if exists foo', 'status': 'done'}, # {'query': 'create table foo (bar text)', 'status': 'done'}, # {...} {...}], # 'created_at': '2020-07-02T16:31:31.873Z', # 'updated_at': '2020-07-02T16:31:31.996Z', # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b' # } # queries are nested because you can add more sets to a running job readJob = batchSQLClient.read(createJob['job_id']) cheks = 0 while (readJob['status'] != 'done'): time.sleep(checkevery) readJob = batchSQLClient.read(createJob['job_id']) cheks += 1 if cheks > maxtime: return False return True
class CARTOUser(object): def __init__(self, user_name=None, org_name=None, api_url=None, api_key=None, check_ssl=True): self.user_name = user_name self.org_name = org_name self.api_url = api_url self.api_key = api_key if not check_ssl: old_request = requests.Session.request requests.Session.request = partialmethod(old_request, verify=False) warnings.filterwarnings('ignore', 'Unverified HTTPS request') def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client) def execute_sql(self, query, parse_json=True, format=None, do_post=False): try: try: self.client except AttributeError: self.initialize() return self.sql_client.send(query, parse_json=parse_json, format=format, do_post=do_post) except CartoException as e: raise Exception(e.args[0].args[0][0]) def batch_check(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.read(job_id) def batch_create(self, query): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.create(query) def batch_cancel(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.cancel(job_id) def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client) def get_sync_manager(self): try: self.sql_client except AttributeError: self.initialize() return SyncTableJobManager(self.client) def upload(self, uri, sync_time=None): try: self.sql_client except AttributeError: self.initialize() dataset_manager = DatasetManager(self.client) if sync_time: return dataset_manager.create(uri, sync_time) else: return dataset_manager.create(uri)
default=os.environ['CARTO_API_KEY'], help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) # Batch SQL API operations if args.operation == 'create': # create a batch api job createJob = batchSQLClient.create(args.query) for a, b in createJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'read': readJob = batchSQLClient.read(args.job_id) for a, b in readJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'update': updateJob = batchSQLClient.update(args.job_id, args.query) for a, b in updateJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'cancel': cancelJob = batchSQLClient.cancel(args.job_id) for a, b in cancelJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) else:
class CARTOUser(object): def __init__(self, user_name=None, org_name=None, api_url=None, api_key=None, check_ssl=True): self.user_name = user_name self.org_name = org_name self.api_url = api_url self.api_key = api_key if not check_ssl: old_request = requests.Session.request requests.Session.request = partialmethod(old_request, verify=False) warnings.filterwarnings('ignore', 'Unverified HTTPS request') def initialize(self): if not self.api_url and self.user_name: self.api_url = "https://{}.carto.com/api/".format(self.user_name) elif not self.api_url and not self.user_name: raise Exception( 'Not enough data provided to initialize the client') if self.org_name: self.client = APIKeyAuthClient(self.api_url, self.api_key, self.org_name) else: self.client = APIKeyAuthClient(self.api_url, self.api_key) self.sql_client = SQLClient(self.client) self.batch_client = BatchSQLClient(self.client) self.copy_client = CopySQLClient(self.client) def execute_sql(self, query, parse_json=True, format=None, do_post=False): try: try: self.client except AttributeError: self.initialize() return self.sql_client.send(query, parse_json=parse_json, format=format, do_post=do_post) except CartoException as e: raise Exception(e.args[0].args[0][0]) def batch_check(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.read(job_id) def batch_create(self, query): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.create(query) def batch_cancel(self, job_id): try: self.batch_client except AttributeError: self.initialize() return self.batch_client.cancel(job_id) def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client) def get_sync_manager(self): try: self.sql_client except AttributeError: self.initialize() return SyncTableJobManager(self.client) def upload(self, uri, sync_time=None): try: self.sql_client except AttributeError: self.initialize() dataset_manager = DatasetManager(self.client) if sync_time: return dataset_manager.create(uri, sync_time) else: return dataset_manager.create(uri) def copy_from(self, path, query, tablename=None, delimiter=','): try: self.copy_client except AttributeError: self.initialize() if tablename is None: tablename = Path(path).stem if query is None: with open(path, 'rb') as myfile: headers = next(myfile).strip().decode('utf8') query = f"""COPY {tablename} ({headers}) FROM stdin (FORMAT CSV, DELIMITER '{delimiter}', HEADER false, QUOTE '"')""" return self.copy_client.copyfrom_file_object(query, myfile) return self.copy_client.copyfrom_file_path(query, path) def copy_to(self, query, output, delimiter=','): try: self.copy_client except AttributeError: self.initialize() copy_query = f"""COPY ({query}) TO stdout WITH (FORMAT CSV, DELIMITER '{delimiter}', HEADER true, QUOTE '"')""" return self.copy_client.copyto_file_path(copy_query, output)
args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) batchSQLClient = BatchSQLClient(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # Batch SQL API operations if args.operation == 'create': # create a batch api job createJob = batchSQLClient.create(args.query) for a, b in createJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'read': readJob = batchSQLClient.read(args.job_id) for a, b in readJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'update': updateJob = batchSQLClient.update(args.job_id, args.query) for a, b in updateJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) elif args.operation == 'cancel': cancelJob = batchSQLClient.cancel(args.job_id) for a, b in cancelJob.items(): logger.info('{key}: {value}'.format(key=a, value=b)) else:
class UploadJob(object): def __init__(self, csv_file_path, **kwargs): self.__set_max_csv_length() self.__set_defaults() for key, value in kwargs.items(): try: setattr(self, key, int(value)) except (ValueError, TypeError): if value in ("true", "True"): setattr(self, key, True) elif value in ("false", "False"): setattr(self, key, False) else: setattr(self, key, value) self.__trim_columns() self.csv_file_path = csv_file_path if self.api_key: self.api_auth = APIKeyAuthClient(self.base_url, self.api_key) self.sql = SQLClient(self.api_auth) self.bsql = BatchSQLClient(self.api_auth) def __set_defaults(self): self.delimiter = DEFAULT_DELIMITER self.x_column = DEFAULT_X_COLUMN self.y_column = DEFAULT_Y_COLUMN self.srid = DEFAULT_SRID self.chunk_size = DEFAULT_CHUNK_SIZE self.max_attempts = DEFAULT_MAX_ATTEMPTS self.file_encoding = DEFAULT_FILE_ENCOFING self.force_no_geometry = DEFAULT_FORCE_NO_GEOMETRY self.force_the_geom = DEFAULT_FORCE_THE_GEOM self.date_format = DEFAULT_DATE_FORMAT self.datetime_format = DEFAULT_DATETIME_FORMAT self.float_comma_separator = DEFAULT_FLOAT_COMMA_SEPARATOR self.float_thousand_separator = DEFAULT_FLOAT_THOUSAND_SEPARATOR self.date_columns = DEFAULT_DATE_COLUMNS self.observer = None def __set_max_csv_length(self): maxInt = sys.maxsize decrement = True while decrement: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. decrement = False try: csv.field_size_limit(maxInt) except OverflowError: maxInt = int(maxInt / 10) decrement = True def __trim_columns(self): if self.columns is not None: self.columns = self.columns.replace(' ', '') if self.date_columns is not None: self.date_columns = self.date_columns.replace(' ', '') def run(self, start_chunk=1, end_chunk=None): if not isinstance(self.csv_file_path, str): self.do_run(self.csv_file_path, start_chunk, end_chunk) else: if sys.version_info <= (3, 0): with open(self.csv_file_path) as f: self.do_run(f, start_chunk, end_chunk) else: with open(self.csv_file_path, encoding=self.file_encoding) as f: self.do_run(f, start_chunk, end_chunk) def notify(self, message_type, message): observer = getattr(self, "observer", None) if callable(observer): observer({"type": message_type, "msg": str(message)}) return True return False def regenerate_overviews(self): query = 'select CDB_CreateOverviews(\'{table}\'::regclass)'.\ format(table=self.table_name) job_result = self.bsql.create(query) return job_result['job_id'] def check_job(self, job_id): return self.bsql.read(job_id) def create_geom_query(self, record): null_result = NULL_VALUE + "," if self.force_the_geom: return self.parse_column_value(record, self.force_the_geom, parse_float=False) if self.force_no_geometry: return null_result longitude = self.get_longitude(record) latitude = self.get_latitude(record) if longitude is None or latitude is None \ or longitude is DEFAULT_COORD or latitude is DEFAULT_COORD: return null_result return "st_transform(st_setsrid(st_makepoint(" + \ "{longitude}, {latitude}), {srid}), 4326),".\ format(longitude=longitude, latitude=latitude, srid=self.srid) def parse_column_value(self, record, column, parse_float=True): null_result = NULL_VALUE + "," try: value = self.escape_value(record[column]) except Exception: return null_result try: if self.is_date_column(column): try: result = "'{value}',".format( value=self.parse_date_column(record, column)) except ValueError: result = null_result elif parse_float: result = "{value},".format(value=self.parse_float_value(value)) else: raise TypeError except (ValueError, TypeError): if value is None or not value.strip(): result = null_result else: result = "'{value}',".format(value=value) return result def is_date_column(self, column): return column is not None and self.date_columns is not None and column in self.date_columns.split( ',') def parse_date_column(self, record, column): if not self.date_format or not self.datetime_format: raise ValueError try: return datetime.strptime( record[column], self.datetime_format).strftime(CARTO_DATE_FORMAT) except Exception: try: return datetime.strptime( record[column], self.date_format).strftime(CARTO_DATE_FORMAT) except Exception: raise ValueError def escape_value(self, value): return value.replace("'", "''") def get_longitude(self, record): try: longitude = self.get_coord(record, self.x_column) if abs(longitude) > MAX_LON: return None except TypeError: return DEFAULT_COORD else: return longitude def get_latitude(self, record): try: latitude = self.get_coord(record, self.y_column) if abs(latitude) > MAX_LAT: return None except TypeError: return DEFAULT_COORD else: return latitude def get_coord(self, record, type): try: coord = self.parse_float_value(record[type]) or DEFAULT_COORD except (ValueError, KeyError): coord = DEFAULT_COORD else: return coord def parse_float_value(self, value): if value.upper() in (val.upper() for val in FORBIDDEN_FLOAT_VALUES): raise ValueError if self.float_thousand_separator: value = value.replace(self.float_thousand_separator, "") if self.float_comma_separator: value = value.replace(self.float_comma_separator, ".") return float(value) def send(self, query, file_encoding, chunk_num): if sys.version_info <= (3, 0): query = query.decode(file_encoding).encode(UTF8) logger.debug("Chunk #{chunk_num}: {query}".format( chunk_num=(chunk_num + 1), query=query)) for retry in range(self.max_attempts): try: self.sql.send(query) except Exception as e: logger.warning( "Chunk #{chunk_num}: Retrying ({error_msg})".format( chunk_num=(chunk_num + 1), error_msg=e)) self.notify('error', e) else: logger.info("Chunk #{chunk_num}: Success!".format( chunk_num=(chunk_num + 1))) self.notify('progress', chunk_num + 1) break else: logger.error("Chunk #{chunk_num}: Failed!)".format( chunk_num=(chunk_num + 1))) self.notify('error', "Failed " + str(chunk_num + 1))