Ejemplo n.º 1
0
    def test_batchjobstatus_methods(self):
        """context.BatchJobStatus methods"""
        from cartoframes.context import BatchJobStatus
        from carto.sql import BatchSQLClient

        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        batch_client = BatchSQLClient(cc.auth_client)
        job_response = batch_client.create([
            'select 1',
        ])
        job_status = BatchJobStatus(cc, job_response)

        possible_status = (
            'pending',
            'running',
            'done',
            'canceled',
            'unknown',
        )
        self.assertTrue(job_status.get_status() in possible_status)
        job_status._set_status('foo')

        self.assertEqual(job_status.get_status(), 'foo')

        new_status = job_status.status()
        self.assertSetEqual(set(new_status.keys()),
                            {'status', 'updated_at', 'created_at'})

        # job_id as str
        str_bjs = BatchJobStatus(cc, 'foo')
        self.assertIsNone(str_bjs.get_status())
        self.assertEqual(str_bjs.job_id, 'foo')
Ejemplo n.º 2
0
def test_table(api_key_auth_client_usr):
    batch_client = BatchSQLClient(api_key_auth_client_usr)
    job = batch_client.create(SETUP_QUERIES)
    while not job['status'] in BATCH_TERMINAL_STATES:
        time.sleep(1)
        job = batch_client.read(job['job_id'])
    assert job['status'] == 'done'
Ejemplo n.º 3
0
def test_table(api_key_auth_client_usr):
    batch_client = BatchSQLClient(api_key_auth_client_usr)
    job = batch_client.create(SETUP_QUERIES)
    while not job['status'] in BATCH_TERMINAL_STATES:
        time.sleep(1)
        job = batch_client.read(job['job_id'])
    assert job['status'] == 'done'
Ejemplo n.º 4
0
def test_batch_multi_sql(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create(BATCH_SQL_MULTI_QUERY)

    # Get job ID
    job_id = data['job_id']

    # Cancel if not finished
    cancel_job_if_not_finished(sql, job_id)
Ejemplo n.º 5
0
def test_batch_multi_sql(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create(BATCH_SQL_MULTI_QUERY)

    # Get job ID
    job_id = data['job_id']

    # Cancel if not finished
    cancel_job_if_not_finished(sql, job_id)
Ejemplo n.º 6
0
def test_batch_multi_sql(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create(BATCH_SQL_MULTI_QUERY)

    # Update status
    job_id = data['job_id']

    # Cancel if not finished
    try:
        confirmation = sql.cancel(job_id)
    except CartoException:
        pass
    else:
        assert confirmation == 'cancelled'
Ejemplo n.º 7
0
    def batchsql(self, list_of_sqls, checkevery=1, maxtime=5):

        # pass in a list of sqls to execute
        # probably dont want to read a file with a million records and upload
        # open streets for ex is 1k rows, 500KB

        batchSQLClient = BatchSQLClient(self.auth_client)
        createJob = batchSQLClient.create(list_of_sqls)

        # https://github.com/CartoDB/carto-python

        # job_id looks like
        # 5171b8c4-8c03-4610-8797-5dd98ff3e61b

        # job looks like
        # {
        #  'user': '******',
        #  'status': 'done',
        #  'query': [{'query': 'drop table if exists foo', 'status': 'done'},
        #            {'query': 'create table foo (bar text)', 'status': 'done'},
        #            {...} {...}],
        # 'created_at': '2020-07-02T16:31:31.873Z',
        # 'updated_at': '2020-07-02T16:31:31.996Z',
        # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b'
        # }

        # queries are nested because you can add more sets to a running job

        readJob = batchSQLClient.read(createJob['job_id'])
        cheks = 0

        while (readJob['status'] != 'done'):
            time.sleep(checkevery)
            readJob = batchSQLClient.read(createJob['job_id'])
            cheks += 1

            if cheks > maxtime:
                return False

        return True
Ejemplo n.º 8
0
class CARTOUser(object):
    def __init__(self,
                 user_name=None,
                 org_name=None,
                 api_url=None,
                 api_key=None,
                 check_ssl=True):
        self.user_name = user_name
        self.org_name = org_name
        self.api_url = api_url
        self.api_key = api_key

        if not check_ssl:
            old_request = requests.Session.request
            requests.Session.request = partialmethod(old_request, verify=False)
            warnings.filterwarnings('ignore', 'Unverified HTTPS request')

    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)

    def execute_sql(self, query, parse_json=True, format=None, do_post=False):
        try:
            try:
                self.client
            except AttributeError:
                self.initialize()
            return self.sql_client.send(query,
                                        parse_json=parse_json,
                                        format=format,
                                        do_post=do_post)
        except CartoException as e:
            raise Exception(e.args[0].args[0][0])

    def batch_check(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.read(job_id)

    def batch_create(self, query):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.create(query)

    def batch_cancel(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.cancel(job_id)

    def get_dataset_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return DatasetManager(self.client)

    def get_sync_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return SyncTableJobManager(self.client)

    def upload(self, uri, sync_time=None):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()

        dataset_manager = DatasetManager(self.client)

        if sync_time:
            return dataset_manager.create(uri, sync_time)
        else:
            return dataset_manager.create(uri)
Ejemplo n.º 9
0
                    default=os.environ['CARTO_API_KEY'],
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                               args.organization)

batchSQLClient = BatchSQLClient(auth_client)

# Batch SQL API operations
if args.operation == 'create':
    # create a batch api job
    createJob = batchSQLClient.create(args.query)
    for a, b in createJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'read':
    readJob = batchSQLClient.read(args.job_id)
    for a, b in readJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'update':
    updateJob = batchSQLClient.update(args.job_id, args.query)
    for a, b in updateJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'cancel':
    cancelJob = batchSQLClient.cancel(args.job_id)
    for a, b in cancelJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
else:
Ejemplo n.º 10
0
class CARTOUser(object):
    def __init__(self,
                 user_name=None,
                 org_name=None,
                 api_url=None,
                 api_key=None,
                 check_ssl=True):
        self.user_name = user_name
        self.org_name = org_name
        self.api_url = api_url
        self.api_key = api_key

        if not check_ssl:
            old_request = requests.Session.request
            requests.Session.request = partialmethod(old_request, verify=False)
            warnings.filterwarnings('ignore', 'Unverified HTTPS request')

    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)
        self.copy_client = CopySQLClient(self.client)

    def execute_sql(self, query, parse_json=True, format=None, do_post=False):
        try:
            try:
                self.client
            except AttributeError:
                self.initialize()
            return self.sql_client.send(query,
                                        parse_json=parse_json,
                                        format=format,
                                        do_post=do_post)
        except CartoException as e:
            raise Exception(e.args[0].args[0][0])

    def batch_check(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.read(job_id)

    def batch_create(self, query):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.create(query)

    def batch_cancel(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.cancel(job_id)

    def get_dataset_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return DatasetManager(self.client)

    def get_sync_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return SyncTableJobManager(self.client)

    def upload(self, uri, sync_time=None):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()

        dataset_manager = DatasetManager(self.client)

        if sync_time:
            return dataset_manager.create(uri, sync_time)
        else:
            return dataset_manager.create(uri)

    def copy_from(self, path, query, tablename=None, delimiter=','):
        try:
            self.copy_client
        except AttributeError:
            self.initialize()

        if tablename is None:
            tablename = Path(path).stem

        if query is None:
            with open(path, 'rb') as myfile:
                headers = next(myfile).strip().decode('utf8')
                query = f"""COPY {tablename} ({headers}) FROM stdin
                (FORMAT CSV, DELIMITER '{delimiter}', HEADER false, QUOTE '"')"""
                return self.copy_client.copyfrom_file_object(query, myfile)
        return self.copy_client.copyfrom_file_path(query, path)

    def copy_to(self, query, output, delimiter=','):
        try:
            self.copy_client
        except AttributeError:
            self.initialize()

        copy_query = f"""COPY ({query}) TO stdout WITH
        (FORMAT CSV, DELIMITER '{delimiter}', HEADER true, QUOTE '"')"""

        return self.copy_client.copyto_file_path(copy_query, output)
Ejemplo n.º 11
0
args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    batchSQLClient = BatchSQLClient(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# Batch SQL API operations
if args.operation == 'create':
    # create a batch api job
    createJob = batchSQLClient.create(args.query)
    for a, b in createJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'read':
    readJob = batchSQLClient.read(args.job_id)
    for a, b in readJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'update':
    updateJob = batchSQLClient.update(args.job_id, args.query)
    for a, b in updateJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'cancel':
    cancelJob = batchSQLClient.cancel(args.job_id)
    for a, b in cancelJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
else:
Ejemplo n.º 12
0
class UploadJob(object):
    def __init__(self, csv_file_path, **kwargs):
        self.__set_max_csv_length()
        self.__set_defaults()
        for key, value in kwargs.items():
            try:
                setattr(self, key, int(value))
            except (ValueError, TypeError):
                if value in ("true", "True"):
                    setattr(self, key, True)
                elif value in ("false", "False"):
                    setattr(self, key, False)
                else:
                    setattr(self, key, value)

        self.__trim_columns()

        self.csv_file_path = csv_file_path

        if self.api_key:
            self.api_auth = APIKeyAuthClient(self.base_url, self.api_key)
            self.sql = SQLClient(self.api_auth)
            self.bsql = BatchSQLClient(self.api_auth)

    def __set_defaults(self):
        self.delimiter = DEFAULT_DELIMITER
        self.x_column = DEFAULT_X_COLUMN
        self.y_column = DEFAULT_Y_COLUMN
        self.srid = DEFAULT_SRID
        self.chunk_size = DEFAULT_CHUNK_SIZE
        self.max_attempts = DEFAULT_MAX_ATTEMPTS
        self.file_encoding = DEFAULT_FILE_ENCOFING
        self.force_no_geometry = DEFAULT_FORCE_NO_GEOMETRY
        self.force_the_geom = DEFAULT_FORCE_THE_GEOM
        self.date_format = DEFAULT_DATE_FORMAT
        self.datetime_format = DEFAULT_DATETIME_FORMAT
        self.float_comma_separator = DEFAULT_FLOAT_COMMA_SEPARATOR
        self.float_thousand_separator = DEFAULT_FLOAT_THOUSAND_SEPARATOR
        self.date_columns = DEFAULT_DATE_COLUMNS
        self.observer = None

    def __set_max_csv_length(self):
        maxInt = sys.maxsize
        decrement = True

        while decrement:
            # decrease the maxInt value by factor 10
            # as long as the OverflowError occurs.
            decrement = False
            try:
                csv.field_size_limit(maxInt)
            except OverflowError:
                maxInt = int(maxInt / 10)
                decrement = True

    def __trim_columns(self):
        if self.columns is not None:
            self.columns = self.columns.replace(' ', '')

        if self.date_columns is not None:
            self.date_columns = self.date_columns.replace(' ', '')

    def run(self, start_chunk=1, end_chunk=None):
        if not isinstance(self.csv_file_path, str):
            self.do_run(self.csv_file_path, start_chunk, end_chunk)
        else:
            if sys.version_info <= (3, 0):
                with open(self.csv_file_path) as f:
                    self.do_run(f, start_chunk, end_chunk)
            else:
                with open(self.csv_file_path,
                          encoding=self.file_encoding) as f:
                    self.do_run(f, start_chunk, end_chunk)

    def notify(self, message_type, message):
        observer = getattr(self, "observer", None)
        if callable(observer):
            observer({"type": message_type, "msg": str(message)})
            return True
        return False

    def regenerate_overviews(self):
        query = 'select CDB_CreateOverviews(\'{table}\'::regclass)'.\
            format(table=self.table_name)
        job_result = self.bsql.create(query)
        return job_result['job_id']

    def check_job(self, job_id):
        return self.bsql.read(job_id)

    def create_geom_query(self, record):
        null_result = NULL_VALUE + ","
        if self.force_the_geom:
            return self.parse_column_value(record,
                                           self.force_the_geom,
                                           parse_float=False)

        if self.force_no_geometry:
            return null_result

        longitude = self.get_longitude(record)
        latitude = self.get_latitude(record)

        if longitude is None or latitude is None \
            or longitude is DEFAULT_COORD or latitude is DEFAULT_COORD:
            return null_result

        return "st_transform(st_setsrid(st_makepoint(" + \
            "{longitude}, {latitude}), {srid}), 4326),".\
            format(longitude=longitude, latitude=latitude, srid=self.srid)

    def parse_column_value(self, record, column, parse_float=True):
        null_result = NULL_VALUE + ","

        try:
            value = self.escape_value(record[column])
        except Exception:
            return null_result

        try:
            if self.is_date_column(column):
                try:
                    result = "'{value}',".format(
                        value=self.parse_date_column(record, column))
                except ValueError:
                    result = null_result
            elif parse_float:
                result = "{value},".format(value=self.parse_float_value(value))
            else:
                raise TypeError
        except (ValueError, TypeError):
            if value is None or not value.strip():
                result = null_result
            else:
                result = "'{value}',".format(value=value)
        return result

    def is_date_column(self, column):
        return column is not None and self.date_columns is not None and column in self.date_columns.split(
            ',')

    def parse_date_column(self, record, column):
        if not self.date_format or not self.datetime_format:
            raise ValueError
        try:
            return datetime.strptime(
                record[column],
                self.datetime_format).strftime(CARTO_DATE_FORMAT)
        except Exception:
            try:
                return datetime.strptime(
                    record[column],
                    self.date_format).strftime(CARTO_DATE_FORMAT)
            except Exception:
                raise ValueError

    def escape_value(self, value):
        return value.replace("'", "''")

    def get_longitude(self, record):
        try:
            longitude = self.get_coord(record, self.x_column)
            if abs(longitude) > MAX_LON:
                return None
        except TypeError:
            return DEFAULT_COORD
        else:
            return longitude

    def get_latitude(self, record):
        try:
            latitude = self.get_coord(record, self.y_column)
            if abs(latitude) > MAX_LAT:
                return None
        except TypeError:
            return DEFAULT_COORD
        else:
            return latitude

    def get_coord(self, record, type):
        try:
            coord = self.parse_float_value(record[type]) or DEFAULT_COORD
        except (ValueError, KeyError):
            coord = DEFAULT_COORD
        else:
            return coord

    def parse_float_value(self, value):
        if value.upper() in (val.upper() for val in FORBIDDEN_FLOAT_VALUES):
            raise ValueError
        if self.float_thousand_separator:
            value = value.replace(self.float_thousand_separator, "")
        if self.float_comma_separator:
            value = value.replace(self.float_comma_separator, ".")
        return float(value)

    def send(self, query, file_encoding, chunk_num):
        if sys.version_info <= (3, 0):
            query = query.decode(file_encoding).encode(UTF8)
        logger.debug("Chunk #{chunk_num}: {query}".format(
            chunk_num=(chunk_num + 1), query=query))
        for retry in range(self.max_attempts):
            try:
                self.sql.send(query)
            except Exception as e:
                logger.warning(
                    "Chunk #{chunk_num}: Retrying ({error_msg})".format(
                        chunk_num=(chunk_num + 1), error_msg=e))
                self.notify('error', e)
            else:
                logger.info("Chunk #{chunk_num}: Success!".format(
                    chunk_num=(chunk_num + 1)))
                self.notify('progress', chunk_num + 1)
                break
        else:
            logger.error("Chunk #{chunk_num}: Failed!)".format(
                chunk_num=(chunk_num + 1)))
            self.notify('error', "Failed " + str(chunk_num + 1))