예제 #1
0
def test_table(api_key_auth_client_usr):
    batch_client = BatchSQLClient(api_key_auth_client_usr)
    job = batch_client.create(SETUP_QUERIES)
    while not job['status'] in BATCH_TERMINAL_STATES:
        time.sleep(1)
        job = batch_client.read(job['job_id'])
    assert job['status'] == 'done'
예제 #2
0
def test_table(api_key_auth_client_usr):
    batch_client = BatchSQLClient(api_key_auth_client_usr)
    job = batch_client.create(SETUP_QUERIES)
    while not job['status'] in BATCH_TERMINAL_STATES:
        time.sleep(1)
        job = batch_client.read(job['job_id'])
    assert job['status'] == 'done'
예제 #3
0
    def batchsql(self, list_of_sqls, checkevery=1, maxtime=5):

        # pass in a list of sqls to execute
        # probably dont want to read a file with a million records and upload
        # open streets for ex is 1k rows, 500KB

        batchSQLClient = BatchSQLClient(self.auth_client)
        createJob = batchSQLClient.create(list_of_sqls)

        # https://github.com/CartoDB/carto-python

        # job_id looks like
        # 5171b8c4-8c03-4610-8797-5dd98ff3e61b

        # job looks like
        # {
        #  'user': '******',
        #  'status': 'done',
        #  'query': [{'query': 'drop table if exists foo', 'status': 'done'},
        #            {'query': 'create table foo (bar text)', 'status': 'done'},
        #            {...} {...}],
        # 'created_at': '2020-07-02T16:31:31.873Z',
        # 'updated_at': '2020-07-02T16:31:31.996Z',
        # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b'
        # }

        # queries are nested because you can add more sets to a running job

        readJob = batchSQLClient.read(createJob['job_id'])
        cheks = 0

        while (readJob['status'] != 'done'):
            time.sleep(checkevery)
            readJob = batchSQLClient.read(createJob['job_id'])
            cheks += 1

            if cheks > maxtime:
                return False

        return True
예제 #4
0
class BatchJobStatus(object):
    """Status of a write or query operation. Read more at `Batch SQL API docs
    <https://carto.com/docs/carto-engine/sql-api/batch-queries/>`__ about
    responses and how to interpret them.

    Example:

        Poll for a job's status if you've caught the :py:class:`BatchJobStatus`
        instance.

        .. code:: python

            import time
            job = cc.write(df, 'new_table',
                           lnglat=('lng_col', 'lat_col'))
            while True:
                curr_status = job.status()['status']
                if curr_status in ('done', 'failed', 'canceled', 'unknown', ):
                    print(curr_status)
                    break
                time.sleep(5)

        Create a :py:class:`BatchJobStatus` instance if you have a `job_id`
        output from a :py:meth:`CartoContext.write
        <cartoframes.context.CartoContext.write>` operation.

        .. code:: python

            >>> from cartoframes import CartoContext, BatchJobStatus
            >>> cc = CartoContext(username='******', api_key='...')
            >>> cc.write(df, 'new_table', lnglat=('lng', 'lat'))
            'BatchJobStatus(job_id='job-id-string', ...)'
            >>> batch_job = BatchJobStatus(cc, 'job-id-string')

    Attributes:
        job_id (str): Job ID of the Batch SQL API job
        last_status (str): Status of ``job_id`` job when last polled
        created_at (str): Time and date when job was created

    Args:
        carto_context (:py:class:`CartoContext <cartoframes.context.CartoContext>`):
          :py:class:`CartoContext <cartoframes.context.CartoContext>` instance
        job (dict or str): If a dict, job status dict returned after sending
            a Batch SQL API request. If str, a Batch SQL API job id.
    """
    def __init__(self, carto_context, job):
        if isinstance(job, dict):
            self.job_id = job.get('job_id')
            self.last_status = job.get('status')
            self.created_at = job.get('created_at')
        elif isinstance(job, str):
            self.job_id = job
            self.last_status = None
            self.created_at = None

        self._batch_client = BatchSQLClient(carto_context.auth_client)

    def __repr__(self):
        return ('BatchJobStatus(job_id=\'{job_id}\', '
                'last_status=\'{status}\', '
                'created_at=\'{created_at}\')'.format(
                    job_id=self.job_id,
                    status=self.last_status,
                    created_at=self.created_at))

    def _set_status(self, curr_status):
        self.last_status = curr_status

    def get_status(self):
        """return current status of job"""
        return self.last_status

    def status(self):
        """Checks the current status of job ``job_id``

        Returns:
            dict: Status and time it was updated

        Warns:
            UserWarning: If the job failed, a warning is raised with
                information about the failure
        """
        resp = self._batch_client.read(self.job_id)
        if 'failed_reason' in resp:
            warn('Job failed: {}'.format(resp.get('failed_reason')))
        self._set_status(resp.get('status'))
        return dict(status=resp.get('status'),
                    updated_at=resp.get('updated_at'),
                    created_at=resp.get('created_at'))
예제 #5
0
class CARTOUser(object):
    def __init__(self,
                 user_name=None,
                 org_name=None,
                 api_url=None,
                 api_key=None,
                 check_ssl=True):
        self.user_name = user_name
        self.org_name = org_name
        self.api_url = api_url
        self.api_key = api_key

        if not check_ssl:
            old_request = requests.Session.request
            requests.Session.request = partialmethod(old_request, verify=False)
            warnings.filterwarnings('ignore', 'Unverified HTTPS request')

    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)

    def execute_sql(self, query, parse_json=True, format=None, do_post=False):
        try:
            try:
                self.client
            except AttributeError:
                self.initialize()
            return self.sql_client.send(query,
                                        parse_json=parse_json,
                                        format=format,
                                        do_post=do_post)
        except CartoException as e:
            raise Exception(e.args[0].args[0][0])

    def batch_check(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.read(job_id)

    def batch_create(self, query):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.create(query)

    def batch_cancel(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.cancel(job_id)

    def get_dataset_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return DatasetManager(self.client)

    def get_sync_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return SyncTableJobManager(self.client)

    def upload(self, uri, sync_time=None):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()

        dataset_manager = DatasetManager(self.client)

        if sync_time:
            return dataset_manager.create(uri, sync_time)
        else:
            return dataset_manager.create(uri)
예제 #6
0
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                               args.organization)

batchSQLClient = BatchSQLClient(auth_client)

# Batch SQL API operations
if args.operation == 'create':
    # create a batch api job
    createJob = batchSQLClient.create(args.query)
    for a, b in createJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'read':
    readJob = batchSQLClient.read(args.job_id)
    for a, b in readJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'update':
    updateJob = batchSQLClient.update(args.job_id, args.query)
    for a, b in updateJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'cancel':
    cancelJob = batchSQLClient.cancel(args.job_id)
    for a, b in cancelJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
else:
    logger.info("You have not written a correct operation option")
예제 #7
0
class CARTOUser(object):
    def __init__(self,
                 user_name=None,
                 org_name=None,
                 api_url=None,
                 api_key=None,
                 check_ssl=True):
        self.user_name = user_name
        self.org_name = org_name
        self.api_url = api_url
        self.api_key = api_key

        if not check_ssl:
            old_request = requests.Session.request
            requests.Session.request = partialmethod(old_request, verify=False)
            warnings.filterwarnings('ignore', 'Unverified HTTPS request')

    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)
        self.copy_client = CopySQLClient(self.client)

    def execute_sql(self, query, parse_json=True, format=None, do_post=False):
        try:
            try:
                self.client
            except AttributeError:
                self.initialize()
            return self.sql_client.send(query,
                                        parse_json=parse_json,
                                        format=format,
                                        do_post=do_post)
        except CartoException as e:
            raise Exception(e.args[0].args[0][0])

    def batch_check(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.read(job_id)

    def batch_create(self, query):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.create(query)

    def batch_cancel(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.cancel(job_id)

    def get_dataset_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return DatasetManager(self.client)

    def get_sync_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return SyncTableJobManager(self.client)

    def upload(self, uri, sync_time=None):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()

        dataset_manager = DatasetManager(self.client)

        if sync_time:
            return dataset_manager.create(uri, sync_time)
        else:
            return dataset_manager.create(uri)

    def copy_from(self, path, query, tablename=None, delimiter=','):
        try:
            self.copy_client
        except AttributeError:
            self.initialize()

        if tablename is None:
            tablename = Path(path).stem

        if query is None:
            with open(path, 'rb') as myfile:
                headers = next(myfile).strip().decode('utf8')
                query = f"""COPY {tablename} ({headers}) FROM stdin
                (FORMAT CSV, DELIMITER '{delimiter}', HEADER false, QUOTE '"')"""
                return self.copy_client.copyfrom_file_object(query, myfile)
        return self.copy_client.copyfrom_file_path(query, path)

    def copy_to(self, query, output, delimiter=','):
        try:
            self.copy_client
        except AttributeError:
            self.initialize()

        copy_query = f"""COPY ({query}) TO stdout WITH
        (FORMAT CSV, DELIMITER '{delimiter}', HEADER true, QUOTE '"')"""

        return self.copy_client.copyto_file_path(copy_query, output)
예제 #8
0
# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    batchSQLClient = BatchSQLClient(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# Batch SQL API operations
if args.operation == 'create':
    # create a batch api job
    createJob = batchSQLClient.create(args.query)
    for a, b in createJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'read':
    readJob = batchSQLClient.read(args.job_id)
    for a, b in readJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'update':
    updateJob = batchSQLClient.update(args.job_id, args.query)
    for a, b in updateJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'cancel':
    cancelJob = batchSQLClient.cancel(args.job_id)
    for a, b in cancelJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
else:
    logger.info("You have not written a correct operation option")
예제 #9
0
class UploadJob(object):
    def __init__(self, csv_file_path, **kwargs):
        self.__set_max_csv_length()
        self.__set_defaults()
        for key, value in kwargs.items():
            try:
                setattr(self, key, int(value))
            except (ValueError, TypeError):
                if value in ("true", "True"):
                    setattr(self, key, True)
                elif value in ("false", "False"):
                    setattr(self, key, False)
                else:
                    setattr(self, key, value)

        self.__trim_columns()

        self.csv_file_path = csv_file_path

        if self.api_key:
            self.api_auth = APIKeyAuthClient(self.base_url, self.api_key)
            self.sql = SQLClient(self.api_auth)
            self.bsql = BatchSQLClient(self.api_auth)

    def __set_defaults(self):
        self.delimiter = DEFAULT_DELIMITER
        self.x_column = DEFAULT_X_COLUMN
        self.y_column = DEFAULT_Y_COLUMN
        self.srid = DEFAULT_SRID
        self.chunk_size = DEFAULT_CHUNK_SIZE
        self.max_attempts = DEFAULT_MAX_ATTEMPTS
        self.file_encoding = DEFAULT_FILE_ENCOFING
        self.force_no_geometry = DEFAULT_FORCE_NO_GEOMETRY
        self.force_the_geom = DEFAULT_FORCE_THE_GEOM
        self.date_format = DEFAULT_DATE_FORMAT
        self.datetime_format = DEFAULT_DATETIME_FORMAT
        self.float_comma_separator = DEFAULT_FLOAT_COMMA_SEPARATOR
        self.float_thousand_separator = DEFAULT_FLOAT_THOUSAND_SEPARATOR
        self.date_columns = DEFAULT_DATE_COLUMNS
        self.observer = None

    def __set_max_csv_length(self):
        maxInt = sys.maxsize
        decrement = True

        while decrement:
            # decrease the maxInt value by factor 10
            # as long as the OverflowError occurs.
            decrement = False
            try:
                csv.field_size_limit(maxInt)
            except OverflowError:
                maxInt = int(maxInt / 10)
                decrement = True

    def __trim_columns(self):
        if self.columns is not None:
            self.columns = self.columns.replace(' ', '')

        if self.date_columns is not None:
            self.date_columns = self.date_columns.replace(' ', '')

    def run(self, start_chunk=1, end_chunk=None):
        if not isinstance(self.csv_file_path, str):
            self.do_run(self.csv_file_path, start_chunk, end_chunk)
        else:
            if sys.version_info <= (3, 0):
                with open(self.csv_file_path) as f:
                    self.do_run(f, start_chunk, end_chunk)
            else:
                with open(self.csv_file_path,
                          encoding=self.file_encoding) as f:
                    self.do_run(f, start_chunk, end_chunk)

    def notify(self, message_type, message):
        observer = getattr(self, "observer", None)
        if callable(observer):
            observer({"type": message_type, "msg": str(message)})
            return True
        return False

    def regenerate_overviews(self):
        query = 'select CDB_CreateOverviews(\'{table}\'::regclass)'.\
            format(table=self.table_name)
        job_result = self.bsql.create(query)
        return job_result['job_id']

    def check_job(self, job_id):
        return self.bsql.read(job_id)

    def create_geom_query(self, record):
        null_result = NULL_VALUE + ","
        if self.force_the_geom:
            return self.parse_column_value(record,
                                           self.force_the_geom,
                                           parse_float=False)

        if self.force_no_geometry:
            return null_result

        longitude = self.get_longitude(record)
        latitude = self.get_latitude(record)

        if longitude is None or latitude is None \
            or longitude is DEFAULT_COORD or latitude is DEFAULT_COORD:
            return null_result

        return "st_transform(st_setsrid(st_makepoint(" + \
            "{longitude}, {latitude}), {srid}), 4326),".\
            format(longitude=longitude, latitude=latitude, srid=self.srid)

    def parse_column_value(self, record, column, parse_float=True):
        null_result = NULL_VALUE + ","

        try:
            value = self.escape_value(record[column])
        except Exception:
            return null_result

        try:
            if self.is_date_column(column):
                try:
                    result = "'{value}',".format(
                        value=self.parse_date_column(record, column))
                except ValueError:
                    result = null_result
            elif parse_float:
                result = "{value},".format(value=self.parse_float_value(value))
            else:
                raise TypeError
        except (ValueError, TypeError):
            if value is None or not value.strip():
                result = null_result
            else:
                result = "'{value}',".format(value=value)
        return result

    def is_date_column(self, column):
        return column is not None and self.date_columns is not None and column in self.date_columns.split(
            ',')

    def parse_date_column(self, record, column):
        if not self.date_format or not self.datetime_format:
            raise ValueError
        try:
            return datetime.strptime(
                record[column],
                self.datetime_format).strftime(CARTO_DATE_FORMAT)
        except Exception:
            try:
                return datetime.strptime(
                    record[column],
                    self.date_format).strftime(CARTO_DATE_FORMAT)
            except Exception:
                raise ValueError

    def escape_value(self, value):
        return value.replace("'", "''")

    def get_longitude(self, record):
        try:
            longitude = self.get_coord(record, self.x_column)
            if abs(longitude) > MAX_LON:
                return None
        except TypeError:
            return DEFAULT_COORD
        else:
            return longitude

    def get_latitude(self, record):
        try:
            latitude = self.get_coord(record, self.y_column)
            if abs(latitude) > MAX_LAT:
                return None
        except TypeError:
            return DEFAULT_COORD
        else:
            return latitude

    def get_coord(self, record, type):
        try:
            coord = self.parse_float_value(record[type]) or DEFAULT_COORD
        except (ValueError, KeyError):
            coord = DEFAULT_COORD
        else:
            return coord

    def parse_float_value(self, value):
        if value.upper() in (val.upper() for val in FORBIDDEN_FLOAT_VALUES):
            raise ValueError
        if self.float_thousand_separator:
            value = value.replace(self.float_thousand_separator, "")
        if self.float_comma_separator:
            value = value.replace(self.float_comma_separator, ".")
        return float(value)

    def send(self, query, file_encoding, chunk_num):
        if sys.version_info <= (3, 0):
            query = query.decode(file_encoding).encode(UTF8)
        logger.debug("Chunk #{chunk_num}: {query}".format(
            chunk_num=(chunk_num + 1), query=query))
        for retry in range(self.max_attempts):
            try:
                self.sql.send(query)
            except Exception as e:
                logger.warning(
                    "Chunk #{chunk_num}: Retrying ({error_msg})".format(
                        chunk_num=(chunk_num + 1), error_msg=e))
                self.notify('error', e)
            else:
                logger.info("Chunk #{chunk_num}: Success!".format(
                    chunk_num=(chunk_num + 1)))
                self.notify('progress', chunk_num + 1)
                break
        else:
            logger.error("Chunk #{chunk_num}: Failed!)".format(
                chunk_num=(chunk_num + 1)))
            self.notify('error', "Failed " + str(chunk_num + 1))