Example #1
0
def test_table(api_key_auth_client_usr):
    batch_client = BatchSQLClient(api_key_auth_client_usr)
    job = batch_client.create(SETUP_QUERIES)
    while not job['status'] in BATCH_TERMINAL_STATES:
        time.sleep(1)
        job = batch_client.read(job['job_id'])
    assert job['status'] == 'done'
Example #2
0
def test_batch_create_and_wait_for_completion(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create_and_wait_for_completion(BATCH_SQL_SINGLE_QUERY)

    assert data['status'] in ['done', 'failed', 'canceled', 'unknown']
Example #3
0
def test_table(api_key_auth_client_usr):
    batch_client = BatchSQLClient(api_key_auth_client_usr)
    job = batch_client.create(SETUP_QUERIES)
    while not job['status'] in BATCH_TERMINAL_STATES:
        time.sleep(1)
        job = batch_client.read(job['job_id'])
    assert job['status'] == 'done'
Example #4
0
def test_batch_create_and_wait_for_completion(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create_and_wait_for_completion(BATCH_SQL_SINGLE_QUERY)

    assert data['status'] in ['done', 'failed', 'canceled', 'unknown']
Example #5
0
    def test_batchjobstatus_methods(self):
        """context.BatchJobStatus methods"""
        from cartoframes.context import BatchJobStatus
        from carto.sql import BatchSQLClient

        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        batch_client = BatchSQLClient(cc.auth_client)
        job_response = batch_client.create([
            'select 1',
        ])
        job_status = BatchJobStatus(cc, job_response)

        possible_status = (
            'pending',
            'running',
            'done',
            'canceled',
            'unknown',
        )
        self.assertTrue(job_status.get_status() in possible_status)
        job_status._set_status('foo')

        self.assertEqual(job_status.get_status(), 'foo')

        new_status = job_status.status()
        self.assertSetEqual(set(new_status.keys()),
                            {'status', 'updated_at', 'created_at'})

        # job_id as str
        str_bjs = BatchJobStatus(cc, 'foo')
        self.assertIsNone(str_bjs.get_status())
        self.assertEqual(str_bjs.job_id, 'foo')
    def __init__(self, credentials):
        self.credentials = credentials or get_default_credentials()
        check_credentials(self.credentials)

        self.auth_client = _create_auth_client(self.credentials)
        self.sql_client = SQLClient(self.auth_client)
        self.copy_client = CopySQLClient(self.auth_client)
        self.batch_sql_client = BatchSQLClient(self.auth_client)
Example #7
0
def test_batch_multi_sql(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create(BATCH_SQL_MULTI_QUERY)

    # Get job ID
    job_id = data['job_id']

    # Cancel if not finished
    cancel_job_if_not_finished(sql, job_id)
Example #8
0
def test_batch_multi_sql(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create(BATCH_SQL_MULTI_QUERY)

    # Get job ID
    job_id = data['job_id']

    # Cancel if not finished
    cancel_job_if_not_finished(sql, job_id)
Example #9
0
    def __init__(self, carto_context, job):
        if isinstance(job, dict):
            self.job_id = job.get('job_id')
            self.last_status = job.get('status')
            self.created_at = job.get('created_at')
        elif isinstance(job, str):
            self.job_id = job
            self.last_status = None
            self.created_at = None

        self._batch_client = BatchSQLClient(carto_context.auth_client)
Example #10
0
    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)
Example #11
0
    def __init__(self, user, api_key, options={}):
        super().__init__(options)

        self.do_post = options.get('do_post', False)
        self.parse_json = options.get('parse_json', True)
        self.format = options.get('format', 'json')
        self.base_url_option = options.get('base_url', '')
        self.api_version = options.get('api_version', self.DEFAULT_API_VERSION)
        self.batch = options.get('batch', False)

        self.user = user
        self.api_key = api_key
        self.base_url = self._generate_base_url(user, self.base_url_option)

        # Carto Context for DataFrame handling
        self._carto_context = None

        # Carto client for COPYs
        self._copy_client = None

        self._auth_client = APIKeyAuthClient(api_key=api_key,
                                             base_url=self.base_url)
        self._sql_client = SQLClient(self._auth_client,
                                     api_version=self.api_version)

        self._batch_client = None
        if self.batch:
            self._batch_client = BatchSQLClient(self._auth_client)
Example #12
0
def test_batch_multi_sql(api_key_auth_client_usr):
    sql = BatchSQLClient(api_key_auth_client_usr)

    # Create query
    data = sql.create(BATCH_SQL_MULTI_QUERY)

    # Update status
    job_id = data['job_id']

    # Cancel if not finished
    try:
        confirmation = sql.cancel(job_id)
    except CartoException:
        pass
    else:
        assert confirmation == 'cancelled'
Example #13
0
    def batchsql(self, list_of_sqls, checkevery=1, maxtime=5):

        # pass in a list of sqls to execute
        # probably dont want to read a file with a million records and upload
        # open streets for ex is 1k rows, 500KB

        batchSQLClient = BatchSQLClient(self.auth_client)
        createJob = batchSQLClient.create(list_of_sqls)

        # https://github.com/CartoDB/carto-python

        # job_id looks like
        # 5171b8c4-8c03-4610-8797-5dd98ff3e61b

        # job looks like
        # {
        #  'user': '******',
        #  'status': 'done',
        #  'query': [{'query': 'drop table if exists foo', 'status': 'done'},
        #            {'query': 'create table foo (bar text)', 'status': 'done'},
        #            {...} {...}],
        # 'created_at': '2020-07-02T16:31:31.873Z',
        # 'updated_at': '2020-07-02T16:31:31.996Z',
        # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b'
        # }

        # queries are nested because you can add more sets to a running job

        readJob = batchSQLClient.read(createJob['job_id'])
        cheks = 0

        while (readJob['status'] != 'done'):
            time.sleep(checkevery)
            readJob = batchSQLClient.read(createJob['job_id'])
            cheks += 1

            if cheks > maxtime:
                return False

        return True
Example #14
0
    def __init__(self, csv_file_path, **kwargs):
        self.__set_max_csv_length()
        self.__set_defaults()
        for key, value in kwargs.items():
            try:
                setattr(self, key, int(value))
            except (ValueError, TypeError):
                if value in ("true", "True"):
                    setattr(self, key, True)
                elif value in ("false", "False"):
                    setattr(self, key, False)
                else:
                    setattr(self, key, value)

        self.__trim_columns()

        self.csv_file_path = csv_file_path

        if self.api_key:
            self.api_auth = APIKeyAuthClient(self.base_url, self.api_key)
            self.sql = SQLClient(self.api_auth)
            self.bsql = BatchSQLClient(self.api_auth)
Example #15
0
class UploadJob(object):
    def __init__(self, csv_file_path, **kwargs):
        self.__set_max_csv_length()
        self.__set_defaults()
        for key, value in kwargs.items():
            try:
                setattr(self, key, int(value))
            except (ValueError, TypeError):
                if value in ("true", "True"):
                    setattr(self, key, True)
                elif value in ("false", "False"):
                    setattr(self, key, False)
                else:
                    setattr(self, key, value)

        self.__trim_columns()

        self.csv_file_path = csv_file_path

        if self.api_key:
            self.api_auth = APIKeyAuthClient(self.base_url, self.api_key)
            self.sql = SQLClient(self.api_auth)
            self.bsql = BatchSQLClient(self.api_auth)

    def __set_defaults(self):
        self.delimiter = DEFAULT_DELIMITER
        self.x_column = DEFAULT_X_COLUMN
        self.y_column = DEFAULT_Y_COLUMN
        self.srid = DEFAULT_SRID
        self.chunk_size = DEFAULT_CHUNK_SIZE
        self.max_attempts = DEFAULT_MAX_ATTEMPTS
        self.file_encoding = DEFAULT_FILE_ENCOFING
        self.force_no_geometry = DEFAULT_FORCE_NO_GEOMETRY
        self.force_the_geom = DEFAULT_FORCE_THE_GEOM
        self.date_format = DEFAULT_DATE_FORMAT
        self.datetime_format = DEFAULT_DATETIME_FORMAT
        self.float_comma_separator = DEFAULT_FLOAT_COMMA_SEPARATOR
        self.float_thousand_separator = DEFAULT_FLOAT_THOUSAND_SEPARATOR
        self.date_columns = DEFAULT_DATE_COLUMNS
        self.observer = None

    def __set_max_csv_length(self):
        maxInt = sys.maxsize
        decrement = True

        while decrement:
            # decrease the maxInt value by factor 10
            # as long as the OverflowError occurs.
            decrement = False
            try:
                csv.field_size_limit(maxInt)
            except OverflowError:
                maxInt = int(maxInt / 10)
                decrement = True

    def __trim_columns(self):
        if self.columns is not None:
            self.columns = self.columns.replace(' ', '')

        if self.date_columns is not None:
            self.date_columns = self.date_columns.replace(' ', '')

    def run(self, start_chunk=1, end_chunk=None):
        if not isinstance(self.csv_file_path, str):
            self.do_run(self.csv_file_path, start_chunk, end_chunk)
        else:
            if sys.version_info <= (3, 0):
                with open(self.csv_file_path) as f:
                    self.do_run(f, start_chunk, end_chunk)
            else:
                with open(self.csv_file_path,
                          encoding=self.file_encoding) as f:
                    self.do_run(f, start_chunk, end_chunk)

    def notify(self, message_type, message):
        observer = getattr(self, "observer", None)
        if callable(observer):
            observer({"type": message_type, "msg": str(message)})
            return True
        return False

    def regenerate_overviews(self):
        query = 'select CDB_CreateOverviews(\'{table}\'::regclass)'.\
            format(table=self.table_name)
        job_result = self.bsql.create(query)
        return job_result['job_id']

    def check_job(self, job_id):
        return self.bsql.read(job_id)

    def create_geom_query(self, record):
        null_result = NULL_VALUE + ","
        if self.force_the_geom:
            return self.parse_column_value(record,
                                           self.force_the_geom,
                                           parse_float=False)

        if self.force_no_geometry:
            return null_result

        longitude = self.get_longitude(record)
        latitude = self.get_latitude(record)

        if longitude is None or latitude is None \
            or longitude is DEFAULT_COORD or latitude is DEFAULT_COORD:
            return null_result

        return "st_transform(st_setsrid(st_makepoint(" + \
            "{longitude}, {latitude}), {srid}), 4326),".\
            format(longitude=longitude, latitude=latitude, srid=self.srid)

    def parse_column_value(self, record, column, parse_float=True):
        null_result = NULL_VALUE + ","

        try:
            value = self.escape_value(record[column])
        except Exception:
            return null_result

        try:
            if self.is_date_column(column):
                try:
                    result = "'{value}',".format(
                        value=self.parse_date_column(record, column))
                except ValueError:
                    result = null_result
            elif parse_float:
                result = "{value},".format(value=self.parse_float_value(value))
            else:
                raise TypeError
        except (ValueError, TypeError):
            if value is None or not value.strip():
                result = null_result
            else:
                result = "'{value}',".format(value=value)
        return result

    def is_date_column(self, column):
        return column is not None and self.date_columns is not None and column in self.date_columns.split(
            ',')

    def parse_date_column(self, record, column):
        if not self.date_format or not self.datetime_format:
            raise ValueError
        try:
            return datetime.strptime(
                record[column],
                self.datetime_format).strftime(CARTO_DATE_FORMAT)
        except Exception:
            try:
                return datetime.strptime(
                    record[column],
                    self.date_format).strftime(CARTO_DATE_FORMAT)
            except Exception:
                raise ValueError

    def escape_value(self, value):
        return value.replace("'", "''")

    def get_longitude(self, record):
        try:
            longitude = self.get_coord(record, self.x_column)
            if abs(longitude) > MAX_LON:
                return None
        except TypeError:
            return DEFAULT_COORD
        else:
            return longitude

    def get_latitude(self, record):
        try:
            latitude = self.get_coord(record, self.y_column)
            if abs(latitude) > MAX_LAT:
                return None
        except TypeError:
            return DEFAULT_COORD
        else:
            return latitude

    def get_coord(self, record, type):
        try:
            coord = self.parse_float_value(record[type]) or DEFAULT_COORD
        except (ValueError, KeyError):
            coord = DEFAULT_COORD
        else:
            return coord

    def parse_float_value(self, value):
        if value.upper() in (val.upper() for val in FORBIDDEN_FLOAT_VALUES):
            raise ValueError
        if self.float_thousand_separator:
            value = value.replace(self.float_thousand_separator, "")
        if self.float_comma_separator:
            value = value.replace(self.float_comma_separator, ".")
        return float(value)

    def send(self, query, file_encoding, chunk_num):
        if sys.version_info <= (3, 0):
            query = query.decode(file_encoding).encode(UTF8)
        logger.debug("Chunk #{chunk_num}: {query}".format(
            chunk_num=(chunk_num + 1), query=query))
        for retry in range(self.max_attempts):
            try:
                self.sql.send(query)
            except Exception as e:
                logger.warning(
                    "Chunk #{chunk_num}: Retrying ({error_msg})".format(
                        chunk_num=(chunk_num + 1), error_msg=e))
                self.notify('error', e)
            else:
                logger.info("Chunk #{chunk_num}: Success!".format(
                    chunk_num=(chunk_num + 1)))
                self.notify('progress', chunk_num + 1)
                break
        else:
            logger.error("Chunk #{chunk_num}: Failed!)".format(
                chunk_num=(chunk_num + 1)))
            self.notify('error', "Failed " + str(chunk_num + 1))
Example #16
0
class BatchJobStatus(object):
    """Status of a write or query operation. Read more at `Batch SQL API docs
    <https://carto.com/docs/carto-engine/sql-api/batch-queries/>`__ about
    responses and how to interpret them.

    Example:

        Poll for a job's status if you've caught the :py:class:`BatchJobStatus`
        instance.

        .. code:: python

            import time
            job = cc.write(df, 'new_table',
                           lnglat=('lng_col', 'lat_col'))
            while True:
                curr_status = job.status()['status']
                if curr_status in ('done', 'failed', 'canceled', 'unknown', ):
                    print(curr_status)
                    break
                time.sleep(5)

        Create a :py:class:`BatchJobStatus` instance if you have a `job_id`
        output from a :py:meth:`CartoContext.write
        <cartoframes.context.CartoContext.write>` operation.

        .. code:: python

            >>> from cartoframes import CartoContext, BatchJobStatus
            >>> cc = CartoContext(username='******', api_key='...')
            >>> cc.write(df, 'new_table', lnglat=('lng', 'lat'))
            'BatchJobStatus(job_id='job-id-string', ...)'
            >>> batch_job = BatchJobStatus(cc, 'job-id-string')

    Attributes:
        job_id (str): Job ID of the Batch SQL API job
        last_status (str): Status of ``job_id`` job when last polled
        created_at (str): Time and date when job was created

    Args:
        carto_context (:py:class:`CartoContext <cartoframes.context.CartoContext>`):
          :py:class:`CartoContext <cartoframes.context.CartoContext>` instance
        job (dict or str): If a dict, job status dict returned after sending
            a Batch SQL API request. If str, a Batch SQL API job id.
    """
    def __init__(self, carto_context, job):
        if isinstance(job, dict):
            self.job_id = job.get('job_id')
            self.last_status = job.get('status')
            self.created_at = job.get('created_at')
        elif isinstance(job, str):
            self.job_id = job
            self.last_status = None
            self.created_at = None

        self._batch_client = BatchSQLClient(carto_context.auth_client)

    def __repr__(self):
        return ('BatchJobStatus(job_id=\'{job_id}\', '
                'last_status=\'{status}\', '
                'created_at=\'{created_at}\')'.format(
                    job_id=self.job_id,
                    status=self.last_status,
                    created_at=self.created_at))

    def _set_status(self, curr_status):
        self.last_status = curr_status

    def get_status(self):
        """return current status of job"""
        return self.last_status

    def status(self):
        """Checks the current status of job ``job_id``

        Returns:
            dict: Status and time it was updated

        Warns:
            UserWarning: If the job failed, a warning is raised with
                information about the failure
        """
        resp = self._batch_client.read(self.job_id)
        if 'failed_reason' in resp:
            warn('Job failed: {}'.format(resp.get('failed_reason')))
        self._set_status(resp.get('status'))
        return dict(status=resp.get('status'),
                    updated_at=resp.get('updated_at'),
                    created_at=resp.get('created_at'))
Example #17
0
class ContextManager:
    def __init__(self, credentials):
        self.credentials = credentials or get_default_credentials()
        check_credentials(self.credentials)

        self.auth_client = _create_auth_client(self.credentials)
        self.sql_client = SQLClient(self.auth_client)
        self.copy_client = CopySQLClient(self.auth_client)
        self.batch_sql_client = BatchSQLClient(self.auth_client)

    @not_found
    def execute_query(self,
                      query,
                      parse_json=True,
                      do_post=True,
                      format=None,
                      **request_args):
        return self.sql_client.send(query.strip(), parse_json, do_post, format,
                                    **request_args)

    @not_found
    def execute_long_running_query(self, query):
        return self.batch_sql_client.create_and_wait_for_completion(
            query.strip())

    def copy_to(self,
                source,
                schema=None,
                limit=None,
                retry_times=DEFAULT_RETRY_TIMES):
        query = self.compute_query(source, schema)
        columns = self._get_query_columns_info(query)
        copy_query = self._get_copy_query(query, columns, limit)
        return self._copy_to(copy_query, columns, retry_times)

    def copy_from(self,
                  gdf,
                  table_name,
                  if_exists='fail',
                  cartodbfy=True,
                  retry_times=DEFAULT_RETRY_TIMES):
        schema = self.get_schema()
        table_name = self.normalize_table_name(table_name)
        df_columns = get_dataframe_columns_info(gdf)

        if self.has_table(table_name, schema):
            if if_exists == 'replace':
                table_query = self._compute_query_from_table(
                    table_name, schema)
                table_columns = self._get_query_columns_info(table_query)

                if self._compare_columns(df_columns, table_columns):
                    # Equal columns: truncate table
                    self._truncate_table(table_name, schema, cartodbfy)
                else:
                    # Diff columns: truncate table and drop + add columns
                    self._truncate_and_drop_add_columns(
                        table_name, schema, df_columns, table_columns,
                        cartodbfy)

            elif if_exists == 'fail':
                raise Exception(
                    'Table "{schema}.{table_name}" already exists in your CARTO account. '
                    'Please choose a different `table_name` or use '
                    'if_exists="replace" to overwrite it.'.format(
                        table_name=table_name, schema=schema))
            else:  # 'append'
                pass
        else:
            self._create_table_from_columns(table_name, schema, df_columns,
                                            cartodbfy)

        self._copy_from(gdf, table_name, df_columns, retry_times)
        return table_name

    def create_table_from_query(self,
                                query,
                                table_name,
                                if_exists,
                                cartodbfy=True):
        schema = self.get_schema()
        table_name = self.normalize_table_name(table_name)

        if self.has_table(table_name, schema):
            if if_exists == 'replace':
                # TODO: review logic copy_from
                self._drop_create_table_from_query(table_name, schema, query,
                                                   cartodbfy)
            elif if_exists == 'fail':
                raise Exception(
                    'Table "{schema}.{table_name}" already exists in your CARTO account. '
                    'Please choose a different `table_name` or use '
                    'if_exists="replace" to overwrite it.'.format(
                        table_name=table_name, schema=schema))
            else:  # 'append'
                pass
        else:
            self._drop_create_table_from_query(table_name, schema, query,
                                               cartodbfy)

        return table_name

    def list_tables(self, schema=None):
        datasets = DatasetManager(self.auth_client).filter(
            show_table_size_and_row_count='false',
            show_table='false',
            show_stats='false',
            show_likes='false',
            show_liked='false',
            show_permission='false',
            show_uses_builder_features='false',
            show_synchronization='false',
            load_totals='false')
        datasets.sort(key=lambda x: x.updated_at, reverse=True)
        return pd.DataFrame([dataset.name for dataset in datasets],
                            columns=['tables'])

    def has_table(self, table_name, schema=None):
        query = self.compute_query(table_name, schema)
        return self._check_exists(query)

    def delete_table(self, table_name):
        query = _drop_table_query(table_name)
        output = self.execute_query(query)
        return not ('notices' in output
                    and 'does not exist' in output['notices'][0])

    def rename_table(self, table_name, new_table_name, if_exists='fail'):
        new_table_name = self.normalize_table_name(new_table_name)

        if table_name == new_table_name:
            raise ValueError(
                'Table names are equal. Please choose a different table name.')

        if not self.has_table(table_name):
            raise Exception(
                'Table "{table_name}" does not exist in your CARTO account.'.
                format(table_name=table_name))

        if self.has_table(new_table_name):
            if if_exists == 'replace':
                log.debug('Removing table "{}"'.format(new_table_name))
                self.delete_table(new_table_name)
            elif if_exists == 'fail':
                raise Exception(
                    'Table "{new_table_name}" already exists in your CARTO account. '
                    'Please choose a different `new_table_name` or use '
                    'if_exists="replace" to overwrite it.'.format(
                        new_table_name=new_table_name))

        self._rename_table(table_name, new_table_name)
        return new_table_name

    def update_privacy_table(self, table_name, privacy=None):
        DatasetInfo(self.auth_client, table_name).update_privacy(privacy)

    def get_privacy(self, table_name):
        return DatasetInfo(self.auth_client, table_name).privacy

    def get_schema(self):
        """Get user schema from current credentials"""
        query = 'SELECT current_schema()'
        result = self.execute_query(query, do_post=False)
        schema = result['rows'][0]['current_schema']
        log.debug('schema: {}'.format(schema))
        return schema

    def get_geom_type(self, query):
        """Fetch geom type of a remote table or query"""
        distict_query = '''
            SELECT distinct ST_GeometryType(the_geom) AS geom_type
            FROM ({}) q
            LIMIT 5
        '''.format(query)
        response = self.execute_query(distict_query, do_post=False)
        if response and response.get('rows') and len(response.get('rows')) > 0:
            st_geom_type = response.get('rows')[0].get('geom_type')
            if st_geom_type:
                return map_geom_type(st_geom_type[3:])
        return None

    def get_num_rows(self, query):
        """Get the number of rows in the query"""
        result = self.execute_query(
            'SELECT COUNT(*) FROM ({query}) _query'.format(query=query))
        return result.get('rows')[0].get('count')

    def get_bounds(self, query):
        extent_query = '''
            SELECT ARRAY[
                ARRAY[st_xmin(geom_env), st_ymin(geom_env)],
                ARRAY[st_xmax(geom_env), st_ymax(geom_env)]
            ] bounds FROM (
                SELECT ST_Extent(the_geom) geom_env
                FROM ({}) q
            ) q;
        '''.format(query)
        response = self.execute_query(extent_query, do_post=False)
        if response and response.get('rows') and len(response.get('rows')) > 0:
            return response.get('rows')[0].get('bounds')
        return None

    def get_column_names(self, source, schema=None, exclude=None):
        query = self.compute_query(source, schema)
        columns = [c.name for c in self._get_query_columns_info(query)]

        if exclude and isinstance(exclude, list):
            columns = list(set(columns) - set(exclude))

        return columns

    def is_public(self, query):
        # Used to detect public tables in queries in the publication,
        # because privacy only works for tables.
        public_auth_client = _create_auth_client(self.credentials, public=True)
        public_sql_client = SQLClient(public_auth_client)
        exists_query = 'EXPLAIN {}'.format(query)
        try:
            public_sql_client.send(exists_query, do_post=False)
            return True
        except CartoException:
            return False

    def get_table_names(self, query):
        # Used to detect tables in queries in the publication.
        query = 'SELECT CDB_QueryTablesText($q${}$q$) as tables'.format(query)
        result = self.execute_query(query)
        tables = []
        if result['total_rows'] > 0 and result['rows'][0]['tables']:
            # Dataset_info only works with tables without schema
            tables = [
                table.split('.')[1] if '.' in table else table
                for table in result['rows'][0]['tables']
            ]
        return tables

    def _compare_columns(self, a, b):
        a_copy = [i for i in a if _not_reserved(i.name)]
        b_copy = [i for i in b if _not_reserved(i.name)]

        a_copy.sort()
        b_copy.sort()

        return a_copy == b_copy

    def _drop_create_table_from_query(self, table_name, schema, query,
                                      cartodbfy):
        log.debug('DROP + CREATE table "{}"'.format(table_name))
        query = 'BEGIN; {drop}; {create}; {cartodbfy}; COMMIT;'.format(
            drop=_drop_table_query(table_name),
            create=_create_table_from_query_query(table_name, query),
            cartodbfy=_cartodbfy_query(table_name, schema)
            if cartodbfy else '')
        self.execute_long_running_query(query)

    def _create_table_from_columns(self, table_name, schema, columns,
                                   cartodbfy):
        log.debug('CREATE table "{}"'.format(table_name))
        query = 'BEGIN; {create}; {cartodbfy}; COMMIT;'.format(
            create=_create_table_from_columns_query(table_name, columns),
            cartodbfy=_cartodbfy_query(table_name, schema)
            if cartodbfy else '')
        self.execute_long_running_query(query)

    def _truncate_table(self, table_name, schema, cartodbfy):
        log.debug('TRUNCATE table "{}"'.format(table_name))
        query = 'BEGIN; {truncate}; {cartodbfy}; COMMIT;'.format(
            truncate=_truncate_table_query(table_name),
            cartodbfy=_cartodbfy_query(table_name, schema)
            if cartodbfy else '')
        self.execute_long_running_query(query)

    def _truncate_and_drop_add_columns(self, table_name, schema, df_columns,
                                       table_columns, cartodbfy):
        log.debug(
            'TRUNCATE AND DROP + ADD columns table "{}"'.format(table_name))
        query = '{regenerate}; BEGIN; {truncate}; {drop_columns}; {add_columns}; {cartodbfy}; COMMIT;'.format(
            regenerate=_regenerate_table_query(table_name, schema)
            if self._check_regenerate_table_exists() else '',
            truncate=_truncate_table_query(table_name),
            drop_columns=_drop_columns_query(table_name, table_columns),
            add_columns=_add_columns_query(table_name, df_columns),
            cartodbfy=_cartodbfy_query(table_name, schema)
            if cartodbfy else '')
        self.execute_long_running_query(query)

    def compute_query(self, source, schema=None):
        if is_sql_query(source):
            return source
        schema = schema or self.get_schema()
        return self._compute_query_from_table(source, schema)

    def _compute_query_from_table(self, table_name, schema):
        return 'SELECT * FROM "{schema}"."{table_name}"'.format(
            schema=schema or 'public', table_name=table_name)

    def _check_exists(self, query):
        exists_query = 'EXPLAIN {}'.format(query)
        try:
            self.execute_query(exists_query, do_post=False)
            return True
        except CartoException:
            return False

    def _check_regenerate_table_exists(self):
        query = '''
            SELECT 1
            FROM pg_catalog.pg_proc p
            LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
            WHERE p.proname = 'cdb_regeneratetable' AND n.nspname = 'cartodb';
        '''
        result = self.execute_query(query)
        return len(result['rows']) > 0

    def _get_query_columns_info(self, query):
        query = 'SELECT * FROM ({}) _q LIMIT 0'.format(query)
        table_info = self.execute_query(query)
        return get_query_columns_info(table_info['fields'])

    def _get_copy_query(self, query, columns, limit):
        query_columns = [
            double_quote(column.name) for column in columns
            if (column.name != 'the_geom_webmercator')
        ]

        query = 'SELECT {columns} FROM ({query}) _q'.format(
            query=query, columns=','.join(query_columns))

        if limit is not None:
            if isinstance(limit, int) and (limit >= 0):
                query += ' LIMIT {limit}'.format(limit=limit)
            else:
                raise ValueError("`limit` parameter must an integer >= 0")

        return query

    @retry_copy
    def _copy_to(self, query, columns, retry_times=DEFAULT_RETRY_TIMES):
        log.debug('COPY TO')
        copy_query = "COPY ({0}) TO stdout WITH (FORMAT csv, HEADER true, NULL '{1}')".format(
            query, PG_NULL)

        raw_result = self.copy_client.copyto_stream(copy_query)

        converters = obtain_converters(columns)
        parse_dates = date_columns_names(columns)

        df = pd.read_csv(raw_result,
                         converters=converters,
                         parse_dates=parse_dates)

        return df

    @retry_copy
    def _copy_from(self,
                   dataframe,
                   table_name,
                   columns,
                   retry_times=DEFAULT_RETRY_TIMES):
        log.debug('COPY FROM')
        query = """
            COPY {table_name}({columns}) FROM stdin WITH (FORMAT csv, DELIMITER '|', NULL '{null}');
        """.format(table_name=table_name,
                   null=PG_NULL,
                   columns=','.join(
                       double_quote(column.dbname)
                       for column in columns)).strip()
        data = _compute_copy_data(dataframe, columns)

        self.copy_client.copyfrom(query, data)

    def _rename_table(self, table_name, new_table_name):
        query = _rename_table_query(table_name, new_table_name)
        self.execute_query(query)

    def normalize_table_name(self, table_name):
        norm_table_name = normalize_name(table_name)
        if norm_table_name != table_name:
            log.debug('Table name normalized: "{}"'.format(norm_table_name))
        return norm_table_name
                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    batchSQLClient = BatchSQLClient(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# Batch SQL API operations
if args.operation == 'create':
    # create a batch api job
    createJob = batchSQLClient.create(args.query)
    for a, b in createJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'read':
    readJob = batchSQLClient.read(args.job_id)
    for a, b in readJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
Example #19
0
class CARTOUser(object):
    def __init__(self,
                 user_name=None,
                 org_name=None,
                 api_url=None,
                 api_key=None,
                 check_ssl=True):
        self.user_name = user_name
        self.org_name = org_name
        self.api_url = api_url
        self.api_key = api_key

        if not check_ssl:
            old_request = requests.Session.request
            requests.Session.request = partialmethod(old_request, verify=False)
            warnings.filterwarnings('ignore', 'Unverified HTTPS request')

    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)
        self.copy_client = CopySQLClient(self.client)

    def execute_sql(self, query, parse_json=True, format=None, do_post=False):
        try:
            try:
                self.client
            except AttributeError:
                self.initialize()
            return self.sql_client.send(query,
                                        parse_json=parse_json,
                                        format=format,
                                        do_post=do_post)
        except CartoException as e:
            raise Exception(e.args[0].args[0][0])

    def batch_check(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.read(job_id)

    def batch_create(self, query):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.create(query)

    def batch_cancel(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.cancel(job_id)

    def get_dataset_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return DatasetManager(self.client)

    def get_sync_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return SyncTableJobManager(self.client)

    def upload(self, uri, sync_time=None):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()

        dataset_manager = DatasetManager(self.client)

        if sync_time:
            return dataset_manager.create(uri, sync_time)
        else:
            return dataset_manager.create(uri)

    def copy_from(self, path, query, tablename=None, delimiter=','):
        try:
            self.copy_client
        except AttributeError:
            self.initialize()

        if tablename is None:
            tablename = Path(path).stem

        if query is None:
            with open(path, 'rb') as myfile:
                headers = next(myfile).strip().decode('utf8')
                query = f"""COPY {tablename} ({headers}) FROM stdin
                (FORMAT CSV, DELIMITER '{delimiter}', HEADER false, QUOTE '"')"""
                return self.copy_client.copyfrom_file_object(query, myfile)
        return self.copy_client.copyfrom_file_path(query, path)

    def copy_to(self, query, output, delimiter=','):
        try:
            self.copy_client
        except AttributeError:
            self.initialize()

        copy_query = f"""COPY ({query}) TO stdout WITH
        (FORMAT CSV, DELIMITER '{delimiter}', HEADER true, QUOTE '"')"""

        return self.copy_client.copyto_file_path(copy_query, output)
Example #20
0
class CARTOUser(object):
    def __init__(self,
                 user_name=None,
                 org_name=None,
                 api_url=None,
                 api_key=None,
                 check_ssl=True):
        self.user_name = user_name
        self.org_name = org_name
        self.api_url = api_url
        self.api_key = api_key

        if not check_ssl:
            old_request = requests.Session.request
            requests.Session.request = partialmethod(old_request, verify=False)
            warnings.filterwarnings('ignore', 'Unverified HTTPS request')

    def initialize(self):
        if not self.api_url and self.user_name:
            self.api_url = "https://{}.carto.com/api/".format(self.user_name)
        elif not self.api_url and not self.user_name:
            raise Exception(
                'Not enough data provided to initialize the client')

        if self.org_name:
            self.client = APIKeyAuthClient(self.api_url, self.api_key,
                                           self.org_name)
        else:
            self.client = APIKeyAuthClient(self.api_url, self.api_key)

        self.sql_client = SQLClient(self.client)
        self.batch_client = BatchSQLClient(self.client)

    def execute_sql(self, query, parse_json=True, format=None, do_post=False):
        try:
            try:
                self.client
            except AttributeError:
                self.initialize()
            return self.sql_client.send(query,
                                        parse_json=parse_json,
                                        format=format,
                                        do_post=do_post)
        except CartoException as e:
            raise Exception(e.args[0].args[0][0])

    def batch_check(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.read(job_id)

    def batch_create(self, query):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.create(query)

    def batch_cancel(self, job_id):
        try:
            self.batch_client
        except AttributeError:
            self.initialize()
        return self.batch_client.cancel(job_id)

    def get_dataset_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return DatasetManager(self.client)

    def get_sync_manager(self):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()
        return SyncTableJobManager(self.client)

    def upload(self, uri, sync_time=None):
        try:
            self.sql_client
        except AttributeError:
            self.initialize()

        dataset_manager = DatasetManager(self.client)

        if sync_time:
            return dataset_manager.create(uri, sync_time)
        else:
            return dataset_manager.create(uri)
parser.add_argument('--organization', type=str, dest='organization',
                    default=os.environ['CARTO_ORG'] if 'CARTO_ORG' in os.environ else '',
                    help='Set the name of the organization' +
                    ' account (defaults to env variable CARTO_ORG)')

parser.add_argument('--base_url', type=str, dest='CARTO_BASE_URL',
                    default=os.environ['CARTO_API_URL'] if 'CARTO_API_URL' in os.environ else '',
                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    batchSQLClient = BatchSQLClient(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

job = batchSQLClient.create_and_wait_for_completion(args.query)
logger.info('Job finished with status {status}'.format(status=job['status']))
Example #22
0
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'],
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                               args.organization)

batchSQLClient = BatchSQLClient(auth_client)

# Batch SQL API operations
if args.operation == 'create':
    # create a batch api job
    createJob = batchSQLClient.create(args.query)
    for a, b in createJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'read':
    readJob = batchSQLClient.read(args.job_id)
    for a, b in readJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
elif args.operation == 'update':
    updateJob = batchSQLClient.update(args.job_id, args.query)
    for a, b in updateJob.items():
        logger.info('{key}: {value}'.format(key=a, value=b))
Example #23
0
                    dest='CARTO_BASE_URL',
                    default=os.environ['CARTO_API_URL']
                    if 'CARTO_API_URL' in os.environ else '',
                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY']
                    if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                                   args.organization)
    batchSQLClient = BatchSQLClient(auth_client)
else:
    logger.error(
        'You need to provide valid credentials, run with -h parameter for details'
    )
    import sys
    sys.exit(1)

job = batchSQLClient.create_and_wait_for_completion(args.query)
logger.info('Job finished with status {status}'.format(status=job['status']))
class ContextManager:
    def __init__(self, credentials):
        self.credentials = credentials or get_default_credentials()
        check_credentials(self.credentials)

        self.auth_client = _create_auth_client(self.credentials)
        self.sql_client = SQLClient(self.auth_client)
        self.copy_client = CopySQLClient(self.auth_client)
        self.batch_sql_client = BatchSQLClient(self.auth_client)

    def execute_query(self,
                      query,
                      parse_json=True,
                      do_post=True,
                      format=None,
                      **request_args):
        return self.sql_client.send(query.strip(), parse_json, do_post, format,
                                    **request_args)

    def execute_long_running_query(self, query):
        return self.batch_sql_client.create_and_wait_for_completion(
            query.strip())

    def copy_to(self,
                source,
                schema,
                limit=None,
                retry_times=DEFAULT_RETRY_TIMES):
        query = self.compute_query(source, schema)
        columns = self._get_query_columns_info(query)
        copy_query = self._get_copy_query(query, columns, limit)
        return self._copy_to(copy_query, columns, retry_times)

    def copy_from(self, gdf, table_name, if_exists='fail', cartodbfy=True):
        schema = self.get_schema()
        table_name = self.normalize_table_name(table_name)
        columns = get_dataframe_columns_info(gdf)

        if if_exists == 'replace' or not self.has_table(table_name, schema):
            log.debug('Creating table "{}"'.format(table_name))
            self._create_table_from_columns(table_name, columns, schema,
                                            cartodbfy)
        elif if_exists == 'fail':
            raise Exception(
                'Table "{schema}.{table_name}" already exists in your CARTO account. '
                'Please choose a different `table_name` or use '
                'if_exists="replace" to overwrite it.'.format(
                    table_name=table_name, schema=schema))
        else:  # 'append'
            pass

        self._copy_from(gdf, table_name, columns)
        return table_name

    def create_table_from_query(self,
                                query,
                                table_name,
                                if_exists,
                                cartodbfy=True):
        schema = self.get_schema()
        table_name = self.normalize_table_name(table_name)

        if if_exists == 'replace' or not self.has_table(table_name, schema):
            log.debug('Creating table "{}"'.format(table_name))
            self._create_table_from_query(query, table_name, schema, cartodbfy)
        elif if_exists == 'fail':
            raise Exception(
                'Table "{schema}.{table_name}" already exists in your CARTO account. '
                'Please choose a different `table_name` or use '
                'if_exists="replace" to overwrite it.'.format(
                    table_name=table_name, schema=schema))
        else:  # 'append'
            pass

        return table_name

    def has_table(self, table_name, schema=None):
        query = self.compute_query(table_name, schema)
        return self._check_exists(query)

    def delete_table(self, table_name):
        query = _drop_table_query(table_name)
        output = self.execute_query(query)
        return not ('notices' in output
                    and 'does not exist' in output['notices'][0])

    def rename_table(self, table_name, new_table_name, if_exists='fail'):
        new_table_name = self.normalize_table_name(new_table_name)

        if table_name == new_table_name:
            raise ValueError(
                'Table names are equal. Please choose a different table name.')

        if not self.has_table(table_name):
            raise Exception(
                'Table "{table_name}" does not exist in your CARTO account.'.
                format(table_name=table_name))

        if self.has_table(new_table_name):
            if if_exists == 'replace':
                log.debug('Removing table "{}"'.format(new_table_name))
                self.delete_table(new_table_name)
            elif if_exists == 'fail':
                raise Exception(
                    'Table "{new_table_name}" already exists in your CARTO account. '
                    'Please choose a different `new_table_name` or use '
                    'if_exists="replace" to overwrite it.'.format(
                        new_table_name=new_table_name))

        self._rename_table(table_name, new_table_name)
        return new_table_name

    def update_privacy_table(self, table_name, privacy=None):
        DatasetInfo(self.auth_client, table_name).update_privacy(privacy)

    def get_privacy(self, table_name):
        return DatasetInfo(self.auth_client, table_name).privacy

    def get_schema(self):
        """Get user schema from current credentials"""
        query = 'SELECT current_schema()'
        result = self.execute_query(query, do_post=False)
        return result['rows'][0]['current_schema']

    def get_geom_type(self, query):
        """Fetch geom type of a remote table or query"""
        distict_query = '''
            SELECT distinct ST_GeometryType(the_geom) AS geom_type
            FROM ({}) q
            LIMIT 5
        '''.format(query)
        response = self.execute_query(distict_query, do_post=False)
        if response and response.get('rows') and len(response.get('rows')) > 0:
            st_geom_type = response.get('rows')[0].get('geom_type')
            if st_geom_type:
                return map_geom_type(st_geom_type[3:])
        return None

    def get_num_rows(self, query):
        """Get the number of rows in the query"""
        result = self.execute_query(
            "SELECT COUNT(*) FROM ({query}) _query".format(query=query))
        return result.get('rows')[0].get('count')

    def get_bounds(self, query):
        extent_query = '''
            SELECT ARRAY[
                ARRAY[st_xmin(geom_env), st_ymin(geom_env)],
                ARRAY[st_xmax(geom_env), st_ymax(geom_env)]
            ] bounds FROM (
                SELECT ST_Extent(the_geom) geom_env
                FROM ({}) q
            ) q;
        '''.format(query)
        response = self.execute_query(extent_query, do_post=False)
        if response and response.get('rows') and len(response.get('rows')) > 0:
            return response.get('rows')[0].get('bounds')
        return None

    def get_column_names(self, source, schema=None, exclude=None):
        query = self.compute_query(source, schema)
        columns = [c.name for c in self._get_query_columns_info(query)]

        if exclude and isinstance(exclude, list):
            columns = list(set(columns) - set(exclude))

        return columns

    def is_public(self, query):
        # Used to detect public tables in queries in the publication,
        # because privacy only works for tables.
        public_auth_client = _create_auth_client(self.credentials, public=True)
        public_sql_client = SQLClient(public_auth_client)
        exists_query = 'EXPLAIN {}'.format(query)
        try:
            public_sql_client.send(exists_query, do_post=False)
            return True
        except CartoException:
            return False

    def get_table_names(self, query):
        # Used to detect tables in queries in the publication.
        query = 'SELECT CDB_QueryTablesText(\'{}\') as tables'.format(query)
        result = self.execute_query(query)
        tables = []
        if result['total_rows'] > 0 and result['rows'][0]['tables']:
            # Dataset_info only works with tables without schema
            tables = [
                table.split('.')[1] if '.' in table else table
                for table in result['rows'][0]['tables']
            ]
        return tables

    def _create_table_from_query(self,
                                 query,
                                 table_name,
                                 schema,
                                 cartodbfy=True):
        query = 'BEGIN; {drop}; {create}; {cartodbfy}; COMMIT;'.format(
            drop=_drop_table_query(table_name),
            create=_create_table_from_query_query(table_name, query),
            cartodbfy=_cartodbfy_query(table_name, schema)
            if cartodbfy else '')
        self.execute_long_running_query(query)

    def _create_table_from_columns(self,
                                   table_name,
                                   columns,
                                   schema,
                                   cartodbfy=True):
        query = 'BEGIN; {drop}; {create}; {cartodbfy}; COMMIT;'.format(
            drop=_drop_table_query(table_name),
            create=_create_table_from_columns_query(table_name, columns),
            cartodbfy=_cartodbfy_query(table_name, schema)
            if cartodbfy else '')
        self.execute_long_running_query(query)

    def compute_query(self, source, schema=None):
        if is_sql_query(source):
            return source
        schema = schema or self.get_schema()
        return self._compute_query_from_table(source, schema)

    def _compute_query_from_table(self, table_name, schema):
        return 'SELECT * FROM "{schema}"."{table_name}"'.format(
            schema=schema or 'public', table_name=table_name)

    def _check_exists(self, query):
        exists_query = 'EXPLAIN {}'.format(query)
        try:
            self.execute_query(exists_query, do_post=False)
            return True
        except CartoException:
            return False

    def _get_query_columns_info(self, query):
        query = 'SELECT * FROM ({}) _q LIMIT 0'.format(query)
        table_info = self.execute_query(query)
        return Column.from_sql_api_fields(table_info['fields'])

    def _get_copy_query(self, query, columns, limit):
        query_columns = [
            column.name for column in columns
            if (column.name != 'the_geom_webmercator')
        ]

        query = 'SELECT {columns} FROM ({query}) _q'.format(
            query=query, columns=','.join(query_columns))

        if limit is not None:
            if isinstance(limit, int) and (limit >= 0):
                query += ' LIMIT {limit}'.format(limit=limit)
            else:
                raise ValueError("`limit` parameter must an integer >= 0")

        return query

    def _copy_to(self, query, columns, retry_times):
        copy_query = 'COPY ({0}) TO stdout WITH (FORMAT csv, HEADER true, NULL \'{1}\')'.format(
            query, PG_NULL)

        try:
            raw_result = self.copy_client.copyto_stream(copy_query)
        except CartoRateLimitException as err:
            if retry_times > 0:
                retry_times -= 1
                warn('Read call rate limited. Waiting {s} seconds'.format(
                    s=err.retry_after))
                time.sleep(err.retry_after)
                warn('Retrying...')
                return self._copy_to(query, columns, retry_times)
            else:
                warn((
                    'Read call was rate-limited. '
                    'This usually happens when there are multiple queries being read at the same time.'
                ))
                raise err

        converters = obtain_converters(columns)
        parse_dates = date_columns_names(columns)

        df = read_csv(raw_result,
                      converters=converters,
                      parse_dates=parse_dates)

        return df

    def _copy_from(self, dataframe, table_name, columns):
        query = """
            COPY {table_name}({columns}) FROM stdin WITH (FORMAT csv, DELIMITER '|', NULL '{null}');
        """.format(table_name=table_name,
                   null=PG_NULL,
                   columns=','.join(column.dbname
                                    for column in columns)).strip()
        data = _compute_copy_data(dataframe, columns)
        self.copy_client.copyfrom(query, data)

    def _rename_table(self, table_name, new_table_name):
        query = _rename_table_query(table_name, new_table_name)
        self.execute_query(query)

    def normalize_table_name(self, table_name):
        norm_table_name = normalize_name(table_name)
        if norm_table_name != table_name:
            log.debug('Table name normalized: "{}"'.format(norm_table_name))
        return norm_table_name