コード例 #1
0
def fix_carto_geofields(asset_id=None):
    auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY,
                                   base_url=USR_BASE_URL)
    sql = SQLClient(auth_client)
    # Now the problem with pushing this data through SQL calls is that Carto does not rerun the
    # processes that add values for the_geom and the_geom_webmercator. So it kind of seems like
    # we have to do this ourselves as documented at
    # https://gis.stackexchange.com/a/201908

    q = f"UPDATE {TABLE_NAME} SET the_geom = ST_SetSRID(st_makepoint(longitude, latitude),4326)"
    if asset_id is not None:
        q += f" WHERE id = {asset_id}"  # This can significantly speed up Carto geofield updates
        # when saving a single model instance.

    # This works because 'longitude' and 'latitude' are the names of the corresponding fields in the CSV file.
    results1 = sql.send(q)  # This takes 12 seconds to run for 100,000 rows.
    # Exporting the data immediately after this is run oddly leads to the same CSV file as exporting before
    # it is run, but waiting a minute and exporting again gives something with the_geom values in the same
    # rows as the table on the Carto site. Basically, the exported CSV file can lag the view on the Carto
    # web site by a minute or two.
    q = f"SELECT ST_Transform(ST_SetSRID(st_makepoint(longitude, latitude),4326),3857) as the_geom_webmercator FROM {TABLE_NAME}"
    results2 = sql.send(q)  # This one ran much faster.
    # One improvement is that you can replace ST_SetSRID(st_makepoint(lon, lat)) with CDB_LatLng(lat, lon)
    # though I don't know if it leads to any performance improvement.
    print(
        f"Tried to add values for the the_geom and the_geom_webmercator fields in {TABLE_NAME}. The requests completed in {results1['time']} s and {results2['time']} s."
    )
コード例 #2
0
    def clean(self):

        domain = self.cleaned_data['domain']
        table_name = self.cleaned_data['table_name']
        name_col = self.cleaned_data['name_col']
        pcode_col = self.cleaned_data['pcode_col']
        parent_code_col = self.cleaned_data['parent_code_col']
        remap_table_name = self.cleaned_data['remap_table_name']
        auth_client = LocationsCartoNoAuthClient(
            base_url="https://{}.carto.com/".format(str(domain)))

        sql_client = SQLClient(auth_client)
        try:
            sites = sql_client.send(
                'select * from {} limit 1'.format(table_name))
        except CartoException:
            logger.exception("CartoDB exception occured")
            raise ValidationError(
                "Couldn't connect to CartoDB table: {}".format(table_name))
        else:
            row = sites['rows'][0]
            if name_col not in row:
                raise ValidationError(
                    'The Name column ({}) is not in table: {}'.format(
                        name_col, table_name))
            if pcode_col not in row:
                raise ValidationError(
                    'The PCode column ({}) is not in table: {}'.format(
                        pcode_col, table_name))
            if parent_code_col and parent_code_col not in row:
                raise ValidationError(
                    'The Parent Code column ({}) is not in table: {}'.format(
                        parent_code_col, table_name))

        if remap_table_name:
            try:
                remap_table = sql_client.send(
                    'select * from {} limit 1'.format(remap_table_name))
            except CartoException:  # pragma: no-cover
                logger.exception("CartoDB exception occured")
                raise ValidationError(
                    "Couldn't connect to the CartoDB remap table: {}".format(
                        remap_table_name))
            else:
                row = remap_table['rows'][0]

                if 'old_pcode' not in row.keys():
                    raise ValidationError(
                        'The Old PCode column ({}) is not in table: {}'.format(
                            'old_pcode', remap_table_name))

                if 'new_pcode' not in row.keys():
                    raise ValidationError(
                        'The New PCode column ({}) is not in table: {}'.format(
                            'new_pcode', remap_table_name))

        return self.cleaned_data
コード例 #3
0
def validate_locations_in_use(carto_table_pk):
    try:
        carto_table = CartoDBTable.objects.get(pk=carto_table_pk)
    except CartoDBTable.DoesNotExist as e:
        logger.exception('Cannot retrieve CartoDBTable with pk: %s',
                         carto_table_pk)
        raise e

    database_pcodes = []
    for row in Location.objects.all_locations().filter(
            gateway=carto_table.location_type).values('p_code'):
        database_pcodes.append(row['p_code'])

    auth_client = LocationsCartoNoAuthClient(
        base_url="https://{}.carto.com/".format(carto_table.domain))
    sql_client = SQLClient(auth_client)

    try:
        qry = sql_client.send(
            'select array_agg({}) AS aggregated_pcodes from {}'.format(
                carto_table.pcode_col,
                carto_table.table_name,
            ))
        new_carto_pcodes = qry['rows'][0]['aggregated_pcodes'] \
            if len(qry['rows']) > 0 and "aggregated_pcodes" in qry['rows'][0] else []

        remapped_pcode_pairs = []
        if carto_table.remap_table_name:
            remap_qry = 'select old_pcode::text, new_pcode::text from {}'.format(
                carto_table.remap_table_name)
            remapped_pcode_pairs = sql_client.send(remap_qry)['rows']

    except CartoException as e:
        logger.exception(
            "CartoDB exception occured during the data validation.")
        raise e

    remap_old_pcodes = [
        remap_row['old_pcode'] for remap_row in remapped_pcode_pairs
    ]
    orphaned_pcodes = set(database_pcodes) - (set(new_carto_pcodes)
                                              | set(remap_old_pcodes))
    orphaned_location_ids = Location.objects.all_locations().filter(
        p_code__in=list(orphaned_pcodes))

    # if location ids with no remap in use are found, do not continue the import
    location_ids_bnriu = get_location_ids_in_use(orphaned_location_ids)
    if location_ids_bnriu:
        msg = "Location ids in use without remap found: {}".format(','.join(
            [str(iu) for iu in location_ids_bnriu]))
        logger.exception(msg)
        raise NoRemapInUseException(msg)

    return True
コード例 #4
0
def get_carto_asset_ids(id_to_check=None):
    auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY,
                                   base_url=USR_BASE_URL)
    sql = SQLClient(auth_client)
    if id_to_check is None:
        results = sql.send(f"SELECT id FROM {TABLE_NAME}")
    else:
        results = sql.send(
            f"SELECT id FROM {TABLE_NAME} WHERE id = {id_to_check}")
    ids = [r['id'] for r in results['rows']]
    return ids
コード例 #5
0
 def is_public(self, query):
     # Used to detect public tables in queries in the publication,
     # because privacy only works for tables.
     public_auth_client = _create_auth_client(self.credentials, public=True)
     public_sql_client = SQLClient(public_auth_client)
     exists_query = 'EXPLAIN {}'.format(query)
     try:
         public_sql_client.send(exists_query, do_post=False)
         return True
     except CartoException:
         return False
コード例 #6
0
    def create_table(self, table_name=None, cartodbfy=False):
        table_name = table_name or self.carto_table_name
        client = SQLClient(self.carto_auth_client)

        client.send(
            "CREATE TABLE IF NOT EXISTS {table_name} ({columns})".format(
                table_name=table_name,
                columns=",".join((name + " " + type
                                  for (name, type) in self.carto_fields))))
        if cartodbfy is True:
            client.send(
                "SELECT CDB_CartodbfyTable('{schema}', '{table_name}')".format(
                    schema=self.carto_auth_client.username,
                    table_name=table_name))
コード例 #7
0
class DotCartoFile(object):
    replacements = []
    visualization_id = None
    json = None

    def __init__(self, dotcarto_file, endpoint_base_url, api_key):
        self.dotcarto_file = dotcarto_file
        self.sql = SQLClient(APIKeyAuthClient(endpoint_base_url, api_key))

    def replace_dataset(self, original_dataset_name, new_dataset_name):
        self.replacements.append({
            "original_dataset_name": original_dataset_name,
            "new_dataset_name": new_dataset_name
        })

    def replace_datasets_in_dotcarto_file(self, zip_buffer):
        for replacement in self.replacements:
            original_dataset_name = replacement["original_dataset_name"]
            new_dataset_name = replacement["new_dataset_name"]

            self.json = self.json.replace(original_dataset_name,
                                          new_dataset_name)

            new_dataset = self.sql.send("select * from {dataset}".format(
                dataset=replacement["new_dataset_name"]),
                                        format="gpkg")
            zip_buffer.writestr(
                join(self.visualization_id,
                     replacement["new_dataset_name"] + ".gpkg"), new_dataset)
コード例 #8
0
    def clean(self):

        domain = self.cleaned_data['domain']
        table_name = self.cleaned_data['table_name']
        name_col = self.cleaned_data['name_col']
        pcode_col = self.cleaned_data['pcode_col']
        parent_code_col = self.cleaned_data['parent_code_col']
        auth_client = EtoolsCartoNoAuthClient(
            base_url="https://{}.carto.com/".format(str(domain)))

        sql_client = SQLClient(auth_client)
        try:
            sites = sql_client.send(
                'select * from {} limit 1'.format(table_name))
        except CartoException:
            logger.exception("CartoDB exception occured")
            raise ValidationError(
                "Couldn't connect to CartoDB table: {}".format(table_name))
        else:
            row = sites['rows'][0]
            if name_col not in row:
                raise ValidationError(
                    'The Name column ({}) is not in table: {}'.format(
                        name_col, table_name))
            if pcode_col not in row:
                raise ValidationError(
                    'The PCode column ({}) is not in table: {}'.format(
                        pcode_col, table_name))
            if parent_code_col and parent_code_col not in row:
                raise ValidationError(
                    'The Parent Code column ({}) is not in table: {}'.format(
                        parent_code_col, table_name))

        return self.cleaned_data
コード例 #9
0
    def clean(self):

        domain = self.cleaned_data['domain']
        api_key = self.cleaned_data['api_key']
        table_name = self.cleaned_data['table_name']

        client = get_carto_client(api_key, domain)
        sql = SQLClient(client)

        try:
            sites = sql.send(
                'select * from {} limit 1'.format(table_name)
            )
        except CartoException:
            logging.exception("CartoDB exception occured", exc_info=True)
            raise ValidationError(
                "Couldn't connect to CartoDB table: " + table_name)
        else:
            row = sites['rows'][0]
            if 'name' not in row:
                raise ValidationError(
                    'The Name column ({}) is not in table: {}'.format(
                        'name', table_name))

            if 'pcode' not in row:
                raise ValidationError(
                    'The PCode column ({}) is not in table: {}'.format(
                        'pcode', table_name))

            if self.cleaned_data['parent'] and 'parent_pcode' not in row:
                raise ValidationError(
                    'The Parent Code column ({}) is not in table: {}'.format(
                        'parent_pcode', table_name))

        return self.cleaned_data
コード例 #10
0
def update_asset_on_carto(asset_dict, fields):
    auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY,
                                   base_url=USR_BASE_URL)
    sql = SQLClient(auth_client)
    #values_tuple_strings = [make_values_tuple_string_from_model(r, fields) for r in [asset]]
    # OR POSSIBLY
    #values_tuple_strings = [make_values_tuple_string_from_model(asset, fields)]

    #q = f"UPDATE {TABLE_NAME} SET {values_tuple_strings} WHERE asset_id = {asset.id};"

    #values_tuple_strings = [values_string_from_model(asset, fields)]
    #q = f"UPDATE {TABLE_NAME} SET ({', '.join(fields + ['the_geom', 'the_geom_webmercator'])}) " \
    #    f"VALUES {', '.join(map(lambda x: x + 1, values_tuple_strings))};" # This is throwing an
    # error, and it's really not clear why it's trying to map a math function over strings.
    # Let's ignore the the_geom* fields for now and do the update the simple way:

    # Single updates can be done like this:
    # UPDATE election_results SET votes=52, pro=24 WHERE county_id = 1;

    other_fields = copy.deepcopy(fields)
    other_fields.remove('id')
    q = f"UPDATE {TABLE_NAME} SET {set_string_from_model(asset_dict, other_fields)} WHERE id = {asset_dict['asset'].id};"
    assert len(q) < 16384
    print(q)
    results = sql.send(q)
コード例 #11
0
ファイル: test_sql.py プロジェクト: CartoDB/carto-python
def test_sql_unverified_fails_with_auth_client(wrong_onprem_auth_client):
    if wrong_onprem_auth_client is None:
        assert True is True
        return

    sql = SQLClient(wrong_onprem_auth_client)
    with pytest.raises(CartoException):
        data = sql.send('select version()')
コード例 #12
0
def test_sql_additional_params(api_key_auth_client_usr):
    sql = SQLClient(api_key_auth_client_usr)
    request_args = {"skipfields": "the_geom_webmercator"}
    data = sql.send('select * from ' + EXISTING_POINT_DATASET,
                    do_post=True,
                    **request_args)

    assert data is not None
    assert 'rows' in data
    assert 'total_rows' in data
    assert 'time' in data
    assert len(data['rows']) > 0
    assert "the_geom_webmercator" not in data['rows'][0]

    data = sql.send('select * from ' + EXISTING_POINT_DATASET, do_post=True)

    assert "the_geom_webmercator" in data['rows'][0]
コード例 #13
0
def test_sql_unverified_fails_with_auth_client(wrong_onprem_auth_client):
    if wrong_onprem_auth_client is None:
        assert True is True
        return

    sql = SQLClient(wrong_onprem_auth_client)
    with pytest.raises(CartoException):
        data = sql.send('select version()')
コード例 #14
0
ファイル: test_sql.py プロジェクト: CartoDB/carto-python
def test_sql_additional_params(api_key_auth_client_usr):
    sql = SQLClient(api_key_auth_client_usr)
    request_args = {
        "skipfields": "the_geom_webmercator"
    }
    data = sql.send('select * from ' + EXISTING_POINT_DATASET,
                        do_post=True, **request_args)

    assert data is not None
    assert 'rows' in data
    assert 'total_rows' in data
    assert 'time' in data
    assert len(data['rows']) > 0
    assert "the_geom_webmercator" not in data['rows'][0]

    data = sql.send('select * from ' + EXISTING_POINT_DATASET,
                        do_post=True)

    assert "the_geom_webmercator" in data['rows'][0]
コード例 #15
0
def dumpToCarto(eleList, table_name):

    # I am using my CARTO account
    USERNAME = "******"
    USR_BASE_URL = "https://{user}.carto.com/".format(user=USERNAME)
    auth_client = APIKeyAuthClient(
        api_key="53bb19efc968a08f7bdc2c1ffc29c31659240b39",
        base_url=USR_BASE_URL)

    sql = SQLClient(auth_client)

    table_name = 'strava_segments_' + table_name
    for segment in eleList:
        try:
            query = "UPDATE {table} SET cartodb_id={id}, the_geom=ST_SetSRID(ST_MakePoint({long}, {lat}),4326), name='{name}', value={value}, date=now() WHERE cartodb_id={id}". \
            format(table=table_name,id=segment[0],long=segment[7],lat=segment[8],name=segment[1],value=segment[2])
            logger.info(query)
            sql.send(query)
        except CartoException as e:
            logger.error(e)
コード例 #16
0
ファイル: test_sql.py プロジェクト: CartoDB/carto-python
def test_sql(api_key_auth_client_usr, mock_requests, do_post=True):
    with mock_requests.mocker:
        sql = SQLClient(api_key_auth_client_usr)
        data = sql.send('select * from ' + EXISTING_POINT_DATASET,
                        do_post=do_post)

    assert data is not None
    assert 'rows' in data
    assert 'total_rows' in data
    assert 'time' in data
    assert len(data['rows']) > 0
コード例 #17
0
def test_sql(api_key_auth_client_usr, mock_requests, do_post=True):
    with mock_requests.mocker:
        sql = SQLClient(api_key_auth_client_usr)
        data = sql.send('select * from ' + EXISTING_POINT_DATASET,
                        do_post=do_post)

    assert data is not None
    assert 'rows' in data
    assert 'total_rows' in data
    assert 'time' in data
    assert len(data['rows']) > 0
コード例 #18
0
def test_sql_unverified(non_verified_auth_client):
    if non_verified_auth_client is None:
        assert True is True
        return

    sql = SQLClient(non_verified_auth_client)
    data = sql.send('select version()')

    assert data is not None
    assert 'rows' in data
    assert 'total_rows' in data
    assert 'time' in data
    assert len(data['rows']) > 0
コード例 #19
0
ファイル: test_sql.py プロジェクト: CartoDB/carto-python
def test_sql_unverified(non_verified_auth_client):
    if non_verified_auth_client is None:
        assert True is True
        return

    sql = SQLClient(non_verified_auth_client)
    data = sql.send('select version()')

    assert data is not None
    assert 'rows' in data
    assert 'total_rows' in data
    assert 'time' in data
    assert len(data['rows']) > 0
コード例 #20
0
class DotCartoFile(object):
    replacements = []
    visualization_id = None
    json = None

    def __init__(self, dotcarto_file, endpoint_base_url, api_key):
        self.dotcarto_file = dotcarto_file
        self.sql = SQLClient(APIKeyAuthClient(endpoint_base_url, api_key))

    def replace_dataset(self, original_dataset_name, new_dataset_name):
        self.replacements.append({
            "original_dataset_name": original_dataset_name,
            "new_dataset_name": new_dataset_name
        })

    def replace_datasets_in_dotcarto_file(self, zip_buffer):
        for replacement in self.replacements:
            original_dataset_name = replacement["original_dataset_name"]
            new_dataset_name = replacement["new_dataset_name"]

            self.json = self.json.replace(original_dataset_name,
                                          new_dataset_name)

            new_dataset = self.sql.send("select * from {dataset}".format(
                dataset=replacement["new_dataset_name"]),
                                        format="gpkg")
            zip_buffer.writestr(
                join(self.visualization_id,
                     replacement["new_dataset_name"] + ".gpkg"), new_dataset)

    def get_new(self, destination_path=None):
        with ZipFile(self.dotcarto_file) as original_dotcarto_file:
            self.visualization_id = original_dotcarto_file.namelist()[0][:-1]
            json_file_relative_path = join(
                self.visualization_id, self.visualization_id + ".carto.json")
            self.json = original_dotcarto_file.read(json_file_relative_path)

        if destination_path is not None:
            with ZipFile(destination_path, mode='w') as new_dotcarto_file:
                self.replace_datasets_in_dotcarto_file(new_dotcarto_file)
                new_dotcarto_file.writestr(json_file_relative_path,
                                           self.json.encode('utf-8'))
        else:
            new_dotcarto_buffer = StringIO()
            with ZipFile(new_dotcarto_buffer, mode='w') as new_dotcarto_file:
                self.replace_datasets_in_dotcarto_file(new_dotcarto_file)
                new_dotcarto_file.writestr(json_file_relative_path,
                                           self.json.encode('utf-8'))
            new_dotcarto_buffer.seek(0)
            return new_dotcarto_buffer
コード例 #21
0
    def query(self, sql_query, parse_json=True, do_post=True, format=None, write_qry=False):
        try:
            if not write_qry and self.__is_write_query(sql_query):
                raise CartoModelException("Aborted query. No write queries allowed.")

            auth_client = self.__get_auth_client(self.__carto_api_key, self.__carto_user)
            sql = SQLClient(auth_client, api_version='v2')

            res = sql.send(sql_query, parse_json, do_post, format)
            return res['rows']

        except Exception as err:
            self.__logger.error("Error sending query to Carto: {0}\n{1}".format(err, sql_query))
            raise CartoModelException(err)
コード例 #22
0
ファイル: builder.py プロジェクト: CartoDB/smartagg-builder
    def get_carto_username(self):
        """
        Returns the user name for the client passed

        Returns:
            String with CARTO account name

        Raises:
            CartoException: an error thrown by the CARTO request
            Exception: some error in the client happened
        """
        self.logger.debug("Getting the CARTO user name...")
        sql = SQLClient(self.auth_client)
        query = "SELECT CDB_Username()"
        q = sql.send(query)
        self.logger.debug(q)
        if "rows" in q and len(q["rows"]) == 1:
            return q["rows"][0]["cdb_username"]
        else:
            raise Exception("Your client is not valid")
コード例 #23
0
def query(sql_query, parse_json=True, do_post=True, format=None, retries=5):
    log.debug(f"Query: {sql_query}")
    sql = SQLClient(_get_auth_client(), api_version="v2")
    res = None

    for retry_number in range(retries):
        try:
            res = sql.send(sql_query, parse_json, do_post, format)

            if res:
                break

        except CartoException as carto_exception:
            if retry_number == retries - 1:
                raise carto_exception
            else:
                time.sleep(5)
                continue

    if format is None:
        return res["rows"]

    return res
コード例 #24
0
def insert_new_assets_into_carto(asset_dicts, fields):
    auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY,
                                   base_url=USR_BASE_URL)
    sql = SQLClient(auth_client)
    # q = f"INSERT INTO {table_name} (id, name, asset_type, asset_type_title, category, category_title, latitude, longitude) VALUES (202020, 'Zyzzlvaria Zoo', 'zoo', 'animal places', 'cool_stuff', 'Cool Stuff', 40.5195849005734, -80.0445997570883 );"
    # results = sql.send(q)

    # Example of how to insert a single record:
    # q = f"INSERT INTO {table_name} (id, name, asset_type, asset_type_title, category, category_title, latitude, longitude) VALUES (112644, 'Ormsby Pool and Recreation Center', 'rec_centers', 'Rec Centers', 'civic', 'Civic', 40.4290817, -79.97429357);"

    # Batch inserts can be done like this:
    # INSERT INTO election_results (county_id,voters,pro)
    # VALUES  (1, 11,8),
    #        (12,21,10),
    #        (78,31,27);

    # map set of records into value tuple strings
    #values_tuple_strings = [make_values_tuple_string(r, fields) for r in records]

    extra_fields = ['the_geom', 'the_geom_webmercator']
    fields_extended = fields + extra_fields
    for a_dict in asset_dicts:
        for f in extra_fields:
            a_dict[f] = None

    values_tuple_strings = [
        batch_values_string_from_model(a_dict, fields_extended)
        for a_dict in asset_dicts
    ]

    q = f"INSERT INTO {TABLE_NAME} ({', '.join(fields_extended)}) " \
        f"VALUES {', '.join(values_tuple_strings)};"

    assert len(q) < 16384
    print(q)
    results = sql.send(q)
コード例 #25
0
def cleanup_obsolete_locations(carto_table_pk):

    try:
        carto_table = CartoDBTable.objects.get(pk=carto_table_pk)
    except CartoDBTable.DoesNotExist as e:
        logger.exception('Cannot retrieve CartoDBTable with pk: %s',
                         carto_table_pk)
        raise e

    database_pcodes = []
    for row in Location.objects.all_locations().filter(
            gateway=carto_table.location_type).values('p_code'):
        database_pcodes.append(row['p_code'])

    auth_client = LocationsCartoNoAuthClient(
        base_url="https://{}.carto.com/".format(carto_table.domain))
    sql_client = SQLClient(auth_client)

    try:
        qry = sql_client.send(
            'select array_agg({}) AS aggregated_pcodes from {}'.format(
                carto_table.pcode_col,
                carto_table.table_name,
            ))
        new_carto_pcodes = qry['rows'][0]['aggregated_pcodes'] \
            if len(qry['rows']) > 0 and "aggregated_pcodes" in qry['rows'][0] else []

        remapped_pcode_pairs = []
        if carto_table.remap_table_name:
            remap_qry = 'select old_pcode::text, new_pcode::text from {}'.format(
                carto_table.remap_table_name)
            remapped_pcode_pairs = sql_client.send(remap_qry)['rows']

    except CartoException as e:
        logger.exception(
            "CartoDB exception occured during the data validation.")
        raise e

    remapped_pcodes = [
        remap_row['old_pcode'] for remap_row in remapped_pcode_pairs
    ]
    remapped_pcodes += [
        remap_row['new_pcode'] for remap_row in remapped_pcode_pairs
    ]
    # select for deletion those pcodes which are not present in the Carto datasets in any form
    deleteable_pcodes = set(database_pcodes) - (set(new_carto_pcodes)
                                                | set(remapped_pcodes))

    # Do a few safety checks before we actually delete a location, like:
    # - ensure that the deleted locations doesn't have any children in the location tree
    # - check if the deleted location was remapped before, do not delete if yes.
    # if the checks pass, add the deleteable location ID to the `revalidated_deleteable_pcodes` array so they can be
    # deleted in one go later
    revalidated_deleteable_pcodes = []

    with transaction.atomic():
        # prevent writing into locations until the cleanup is done
        Location.objects.all_locations().select_for_update().only('id')

        for deleteable_pcode in deleteable_pcodes:
            try:
                deleteable_location = Location.objects.all_locations().get(
                    p_code=deleteable_pcode)
            except Location.DoesNotExist:
                logger.warning(
                    "Cannot find orphaned pcode {}.".format(deleteable_pcode))
            else:
                if deleteable_location.is_leaf_node():
                    secondary_parent_check = Location.objects.all_locations(
                    ).filter(parent=deleteable_location.id).exists()
                    remap_history_check = LocationRemapHistory.objects.filter(
                        Q(old_location=deleteable_location)
                        | Q(new_location=deleteable_location)).exists()
                    if not secondary_parent_check and not remap_history_check:
                        logger.info(
                            "Selecting orphaned pcode {} for deletion".format(
                                deleteable_location.p_code))
                        revalidated_deleteable_pcodes.append(
                            deleteable_location.id)

        # delete the selected locations all at once, it seems it's faster like this compared to deleting them one by one.
        if revalidated_deleteable_pcodes:
            logger.info("Deleting selected orphaned pcodes")
            Location.objects.all_locations().filter(
                id__in=revalidated_deleteable_pcodes).delete()

        # rebuild location tree after the unneeded locations are cleaned up, because it seems deleting locations
        # sometimes leaves the location tree in a `bugged` state
        Location.objects.rebuild()
コード例 #26
0
ファイル: test_sql.py プロジェクト: CartoDB/carto-python
def test_sql_error(api_key_auth_client_usr):
    sql = SQLClient(api_key_auth_client_usr)

    with pytest.raises(CartoException):
        sql.send('select * from non_existing_dataset')
コード例 #27
0
ファイル: kill_query.py プロジェクト: CartoDB/carto-python
                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

queries = "SELECT pg_cancel_backend('" + args.pid + \
    "') from pg_stat_activity where usename=current_user;"

try:
    sql.send(queries)
    logger.info('Query killed')
except:
    logger.warn('Something went wrong')
コード例 #28
0
parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY']
                    if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                                   args.organization)
else:
    logger.error(
        'You need to provide valid credentials, run with -h parameter for details'
    )
    import sys
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

queries = "SELECT pg_cancel_backend('" + args.pid + \
    "') from pg_stat_activity where usename=current_user;"

try:
    sql.send(queries)
    logger.info('Query killed')
except:
    logger.warn('Something went wrong')
コード例 #29
0
ファイル: user_info.py プロジェクト: drw/carto-python
    table_user.align['Attribute'] = 'l'
    table_user.align['Value'] = 'l'
    for row in userInfo:
        table_user.add_row(row)
    print(table_user)
    # print('{name}: {value}').format(name=i,value=str(user.__dict__[i]))
except Exception as e:
    logger.warn('User has no admin of its organization')

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# show quota of user
results = []
print('\nThe quotas of the user are:\n')
quota = sql.send(
    "SELECT * FROM cdb_dataservices_client.cdb_service_quota_info()")
for k, v in quota.items():
    if k == 'rows':
        for itr in v:
            results.append([
                itr['service'], itr['used_quota'], itr['provider'],
                itr['soft_limit'], itr['monthly_quota']
            ])

table = PrettyTable(
    ['Service', 'Provider', 'Soft limit', 'Used quota', 'Monthly quota'])
table.align['Used quota'] = 'l'
table.align['Provider'] = 'r'
table.align['Soft limit'] = 'r'
table.align['Service'] = 'r'
table.align['Monthly quota'] = 'r'
コード例 #30
0
ファイル: table_info.py プロジェクト: CartoDB/carto-python
            i.table.name,
            i.table.row_count,
            str(round(i.table.size/1048576.00, 2)),
            str(i.table.privacy),
            str(i.table.geometry_types)
        ])

        print(table_general)

        columns_table = "select column_name, data_type FROM information_schema.columns \
        WHERE table_schema = '" + i.permission.owner.username + "'\
        AND table_name ='" + i.table.name + "';"

        # print columns_table
        print('\nThe columns and their data types are: \n')
        columnAndTypes = sql.send(columns_table)
        for key, value in columnAndTypes.items():
            if key == 'rows':
                for itr in value:
                    results_col.append([
                        itr['column_name'],
                        itr['data_type']
                    ])
        table_col = PrettyTable(
            ['Column name', 'Data type'])
        table_col.align['Column name'] = 'l'
        table_col.align['Data type'] = 'r'
        for row in results_col:
            table_col.add_row(row)

        print(table_col)
コード例 #31
0
class TestCartoContext(unittest.TestCase):
    """Tests for cartoframes.CartoContext"""
    def setUp(self):
        if (os.environ.get('APIKEY') is None
                or os.environ.get('USERNAME') is None):
            try:
                creds = json.loads(open('test/secret.json').read())
                self.apikey = creds['APIKEY']
                self.username = creds['USERNAME']
            except:  # noqa: E722
                warnings.warn("Skipping CartoContext tests. To test it, "
                              "create a `secret.json` file in test/ by "
                              "renaming `secret.json.sample` to `secret.json` "
                              "and updating the credentials to match your "
                              "environment.")
                self.apikey = None
                self.username = None
        else:
            self.apikey = os.environ['APIKEY']
            self.username = os.environ['USERNAME']

        if self.username and self.apikey:
            self.baseurl = 'https://{username}.carto.com/'.format(
                username=self.username)
            self.auth_client = APIKeyAuthClient(base_url=self.baseurl,
                                                api_key=self.apikey)
            self.sql_client = SQLClient(self.auth_client)

        # sets client to be ci
        if not cartoframes.context.DEFAULT_SQL_ARGS['client']\
                .endswith('_dev_ci'):
            cartoframes.context.DEFAULT_SQL_ARGS['client'] += '_dev_ci'
        # sets skip value
        WILL_SKIP = self.apikey is None or self.username is None  # noqa: F841

        # table naming info
        has_mpl = 'mpl' if os.environ.get('MPLBACKEND') else 'nonmpl'
        pyver = sys.version[0:3].replace('.', '_')

        # test tables
        self.test_read_table = 'cb_2013_us_csa_500k'
        self.valid_columns = set([
            'affgeoid', 'aland', 'awater', 'created_at', 'csafp', 'geoid',
            'lsad', 'name', 'the_geom', 'updated_at'
        ])
        table_args = dict(ver=pyver, mpl=has_mpl)
        # torque table
        self.test_point_table = 'tweets_obama'

        # for writing to carto
        self.test_write_table = 'cartoframes_test_table_{ver}_{mpl}'.format(
            **table_args)
        self.mixed_case_table = 'AbCdEfG_{0}_{1}'.format(pyver, has_mpl)

        # for batch writing to carto
        self.test_write_batch_table = (
            'cartoframes_test_batch_table_{ver}_{mpl}'.format(**table_args))

        self.test_write_lnglat_table = (
            'cartoframes_test_write_lnglat_table_{ver}_{mpl}'.format(
                **table_args))

        self.write_named_index = (
            'cartoframes_test_write_non_default_index_{ver}_{mpl}'.format(
                **table_args))
        # for queries
        self.test_query_table = ('cartoframes_test_query_'
                                 'table_{ver}_{mpl}'.format(**table_args))
        self.test_delete_table = ('cartoframes_test_delete_'
                                  'table_{ver}_{mpl}').format(**table_args)
        # for data observatory
        self.test_data_table = 'carto_usa_offices'

    def tearDown(self):
        """restore to original state"""
        tables = (
            self.test_write_table,
            self.test_write_batch_table,
            self.test_write_lnglat_table,
            self.test_query_table,
            self.mixed_case_table.lower(),
            self.write_named_index,
        )
        sql_drop = 'DROP TABLE IF EXISTS {};'

        if self.apikey and self.baseurl:
            cc = cartoframes.CartoContext(base_url=self.baseurl,
                                          api_key=self.apikey)
            for table in tables:
                cc.delete(table)
                cc.sql_client.send(sql_drop.format(table))
        # TODO: remove the named map templates

    def add_map_template(self):
        """Add generated named map templates to class"""
        pass

    @unittest.skipIf(WILL_SKIP, 'Skipping test, no carto credentials found')
    def test_cartocontext(self):
        """context.CartoContext.__init__ normal usage"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        self.assertEqual(cc.creds.key(), self.apikey)
        self.assertEqual(cc.creds.base_url(), self.baseurl.strip('/'))
        self.assertEqual(cc.creds.username(), self.username)
        self.assertTrue(not cc.is_org)
        with self.assertRaises(CartoException):
            cartoframes.CartoContext(base_url=self.baseurl,
                                     api_key='notavalidkey')

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_credentials(self):
        """context.CartoContext.__init__ Credentials argument"""
        creds = cartoframes.Credentials(username=self.username,
                                        key=self.apikey)
        cc = cartoframes.CartoContext(creds=creds)
        self.assertIsInstance(cc, cartoframes.CartoContext)
        self.assertEqual(cc.creds.username(), self.username)
        self.assertEqual(cc.creds.key(), self.apikey)

        # CartoContext pulls from saved credentials
        saved_creds = cartoframes.Credentials(username=self.username,
                                              key=self.apikey)
        saved_creds.save()
        cc_saved = cartoframes.CartoContext()
        self.assertEqual(cc_saved.creds.key(), self.apikey)

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_isorguser(self):
        """context.CartoContext._is_org_user"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        self.assertTrue(not cc._is_org_user())

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_read(self):
        """context.CartoContext.read"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        # fails if limit is smaller than zero
        with self.assertRaises(ValueError):
            df = cc.read('sea_horses', limit=-10)
        # fails if not an int
        with self.assertRaises(ValueError):
            df = cc.read('sea_horses', limit=3.14159)
        with self.assertRaises(ValueError):
            df = cc.read('sea_horses', limit='acadia')

        # fails on non-existent table
        with self.assertRaises(CartoException):
            df = cc.read('non_existent_table')

        # normal table
        df = cc.read(self.test_read_table)
        self.assertSetEqual(set(df.columns), self.valid_columns)
        self.assertTrue(len(df) == 169)

        # read with limit
        df = cc.read(self.test_read_table, limit=10)
        self.assertEqual(len(df), 10)
        self.assertIsInstance(df, pd.DataFrame)

        # read empty table/dataframe
        df = cc.read(self.test_read_table, limit=0)
        self.assertSetEqual(set(df.columns), self.valid_columns)
        self.assertEqual(len(df), 0)
        self.assertIsInstance(df, pd.DataFrame)

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_write(self):
        """context.CartoContext.write normal usage"""
        from cartoframes.context import MAX_ROWS_LNGLAT
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        data = {
            'nums': list(range(100, 0, -1)),
            'category':
            [random.choice('abcdefghijklmnop') for _ in range(100)],
            'lat': [0.01 * i for i in range(100)],
            'long': [-0.01 * i for i in range(100)]
        }
        schema = {
            'nums': int,
            'category': 'object',
            'lat': float,
            'long': float
        }
        df = pd.DataFrame(data).astype(schema)
        cc.write(df, self.test_write_table)

        # check if table exists
        resp = self.sql_client.send('''
            SELECT *
            FROM {table}
            LIMIT 0
            '''.format(table=self.test_write_table))
        self.assertIsNotNone(resp)

        # check that table has same number of rows
        resp = self.sql_client.send('''
            SELECT count(*)
            FROM {table}'''.format(table=self.test_write_table))
        self.assertEqual(resp['rows'][0]['count'], len(df))

        # should error for existing table
        with self.assertRaises(NameError):
            cc.write(df, self.test_read_table, overwrite=False)

        # overwrite table and create the_geom column
        cc.write(df,
                 self.test_write_table,
                 overwrite=True,
                 lnglat=('long', 'lat'))

        resp = self.sql_client.send('''
            SELECT count(*) AS num_rows, count(the_geom) AS num_geoms
            FROM {table}
            '''.format(table=self.test_write_table))
        # number of geoms should equal number of rows
        self.assertEqual(resp['rows'][0]['num_rows'],
                         resp['rows'][0]['num_geoms'])

        # test batch lnglat behavior
        n_rows = MAX_ROWS_LNGLAT + 1
        df = pd.DataFrame({
            'latvals': [random.random() for r in range(n_rows)],
            'lngvals': [random.random() for r in range(n_rows)]
        })
        job = cc.write(df,
                       self.test_write_lnglat_table,
                       lnglat=('lngvals', 'latvals'))
        self.assertIsInstance(job, cartoframes.context.BatchJobStatus)

        # test batch writes
        n_rows = 550000
        df = pd.DataFrame({'vals': [random.random() for r in range(n_rows)]})

        cc.write(df, self.test_write_batch_table)

        resp = self.sql_client.send('''
            SELECT count(*) AS num_rows FROM {table}
            '''.format(table=self.test_write_batch_table))
        # number of rows same in dataframe and carto table
        self.assertEqual(n_rows, resp['rows'][0]['num_rows'])

        cols = self.sql_client.send('''
            SELECT * FROM {table} LIMIT 1
        '''.format(table=self.test_write_batch_table))
        expected_schema = {
            'vals': {
                'type': 'number'
            },
            'the_geom': {
                'type': 'geometry'
            },
            'the_geom_webmercator': {
                'type': 'geometry'
            },
            'cartodb_id': {
                'type': 'number'
            }
        }
        # table should be properly created
        # util columns + new column of type number
        self.assertDictEqual(cols['fields'], expected_schema)

        # test properly encoding
        df = pd.DataFrame({'vals': [1, 2], 'strings': ['a', 'ô']})
        cc.write(df, self.test_write_table, overwrite=True)

        # check if table exists
        resp = self.sql_client.send('''
            SELECT *
            FROM {table}
            LIMIT 0
            '''.format(table=self.test_write_table))
        self.assertIsNotNone(resp)

        cc.delete(self.test_write_table)
        df = pd.DataFrame({'vals': list('abcd'), 'ids': list('wxyz')})
        df = df.astype({'vals': str, 'ids': str})
        cc.write(df, self.test_write_table)
        schema = cc.sql_client.send('select ids, vals from {}'.format(
            self.test_write_table))['fields']
        self.assertSetEqual(set([schema[c]['type'] for c in schema]),
                            set(('string', )))

        df = pd.DataFrame({
            'vals': list('abcd'),
            'ids': list('wxyz'),
            'nums': [1.2 * i for i in range(4)],
            'boolvals': [
                True,
                False,
                None,
                True,
            ],
        })
        cc.write(df,
                 self.test_write_table,
                 overwrite=True,
                 type_guessing='true')
        resp = cc.sql_client.send('SELECT * FROM {}'.format(
            self.test_write_table))['fields']
        schema = {k: v['type'] for k, v in dict_items(resp)}
        ans = dict(vals='string',
                   ids='string',
                   nums='number',
                   boolvals='boolean',
                   the_geom='geometry',
                   the_geom_webmercator='geometry',
                   cartodb_id='number')
        self.assertDictEqual(schema, ans)

    @unittest.skipIf(WILL_SKIP, 'updates privacy of existing dataset')
    def test_write_privacy(self):
        """context.CartoContext.write Updates the privacy of a dataset"""
        from carto.datasets import DatasetManager
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        ds_manager = DatasetManager(self.auth_client)

        df = pd.DataFrame({'ids': list('abcd'), 'vals': range(4)})
        cc.write(df, self.test_write_table)
        dataset = ds_manager.get(self.test_write_table)
        self.assertEqual(dataset.privacy.lower(), 'private')

        df = pd.DataFrame({'ids': list('efgh'), 'vals': range(4, 8)})
        cc.write(df, self.test_write_table, overwrite=True, privacy='public')
        dataset = ds_manager.get(self.test_write_table)
        self.assertEqual(dataset.privacy.lower(), 'public')

        privacy = cc._get_privacy('i_am_not_a_table_in_this_account')
        self.assertIsNone(privacy)

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping')
    def test_cartocontext_write_index(self):
        """context.CartoContext.write with non-default index"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        df = pd.DataFrame({
            'vals': range(3),
            'ids': list('abc')
        },
                          index=list('xyz'))
        df.index.name = 'named_index'
        cc.write(df, self.write_named_index)

        df_index = cc.read(self.write_named_index)
        self.assertSetEqual(set(('the_geom', 'vals', 'ids', 'named_index')),
                            set(df_index.columns))

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping')
    def test_cartocontext_mixed_case(self):
        """context.CartoContext.write table name mixed case"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        data = pd.DataFrame({'a': [1, 2, 3], 'B': list('abc')})
        cc.write(pd.DataFrame(data), self.mixed_case_table)

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping')
    def test_cartocontext_table_exists(self):
        """context.CartoContext._table_exists"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        self.assertFalse(cc._table_exists('acadia_biodiversity'))
        with self.assertRaises(NameError):
            cc._table_exists(self.test_read_table)

    def test_cartocontext_delete(self):
        """context.CartoContext.delete"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        data = {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}
        df = pd.DataFrame(data)

        cc.write(df, self.test_delete_table)
        cc.delete(self.test_delete_table)

        # check that querying recently deleted table raises an exception
        with self.assertRaises(CartoException):
            cc.sql_client.send('select * from {}'.format(
                self.test_delete_table))

        # try to delete a table that does not exists
        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            # Trigger a warning.
            cc.delete('non_existent_table')
            # Verify one warning, subclass is UserWarning, and expected message
            # is in warning
            assert len(w) == 1
            assert issubclass(w[-1].category, UserWarning)
            assert "Failed to delete" in str(w[-1].message)

    def test_cartocontext_send_dataframe(self):
        """context.CartoContext._send_dataframe"""
        pass

    def test_cartocontext_handle_import(self):
        """context.CartoContext._handle_import"""

        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        import_failures = (
            dict(error_code=8001, state='failure'),
            dict(error_code=6668, state='failure'),
            dict(error_code=1234, state='failure'),
        )

        for import_job in import_failures:
            with self.assertRaises(CartoException):
                cc._handle_import(import_job, 'foo')

        diff_table_err = dict(state='complete', table_name='bar')
        with self.assertRaises(Exception):
            cc._handle_import(diff_table_err, 'foo')

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping')
    def test_cartoframes_sync(self):
        """context.CartoContext.sync"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        self.assertIsNone(cc.sync(pd.DataFrame(), 'acadia'))

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping')
    def test_cartoframes_query(self):
        """context.CartoContext.query"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        cols = (
            'link',
            'body',
            'displayname',
            'friendscount',
            'postedtime',
        )
        df = cc.query('''
            SELECT {cols}, '02-06-1429'::date as invalid_df_date
            FROM tweets_obama
            LIMIT 100
            '''.format(cols=','.join(cols)))

        # ensure columns are in expected order
        df = df[list(cols) + ['invalid_df_date']]

        # same number of rows
        self.assertEqual(len(df), 100, msg='Expected number or rows')

        # same type of object
        self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame')
        # same column names
        requested_cols = {
            'link',
            'body',
            'displayname',
            'friendscount',
            'postedtime',
            'invalid_df_date',
        }
        self.assertSetEqual(requested_cols,
                            set(df.columns),
                            msg='Should have the columns requested')

        # should have exected schema
        expected_dtypes = (
            'object',
            'object',
            'object',
            'float64',
            'datetime64[ns]',
            'object',
        )
        self.assertTupleEqual(expected_dtypes,
                              tuple(str(d) for d in df.dtypes),
                              msg='Should have expected schema')

        # empty response
        df_empty = cc.query('''
            SELECT 1
            LIMIT 0
            ''')

        # no rows, one column
        self.assertTupleEqual(df_empty.shape, (0, 1))

        # is a DataFrame
        self.assertIsInstance(df_empty, pd.DataFrame)

        # table already exists, should throw CartoException
        with self.assertRaises(CartoException):
            cc.query('''
                SELECT link, body, displayname, friendscount
                FROM tweets_obama
                LIMIT 100
                ''',
                     table_name='tweets_obama')

        # create a table from a query
        cc.query('''
            SELECT link, body, displayname, friendscount
            FROM tweets_obama
            LIMIT 100
            ''',
                 table_name=self.test_query_table)

        # read newly created table into a dataframe
        df = cc.read(self.test_query_table)
        # should be specified length
        self.assertEqual(len(df), 100)
        # should have requested columns + utility columns from CARTO
        self.assertSetEqual(
            {
                'link',
                'body',
                'displayname',
                'friendscount',
                'the_geom',
            },
            set(df.columns),
            msg='Should have the columns requested')

        # see what happens if a query fails after 100 successful rows
        with self.assertRaises(CartoException):
            cc.query('''
                WITH cte AS (
                    SELECT CDB_LatLng(0, 0) as the_geom, i
                    FROM generate_series(1, 110) as m(i)
                    UNION ALL
                    SELECT ST_Buffer(CDB_LatLng(0, 0), 0.1) as the_geom, i
                    FROM generate_series(111, 120) as i
                )
                SELECT ST_X(the_geom) as xval, ST_Y(the_geom) as yval
                FROM cte
            ''')

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_map(self):
        """context.CartoContext.map normal usage"""
        from cartoframes import Layer, QueryLayer, BaseMap
        try:
            import matplotlib
            matplotlib.use('agg')
            import matplotlib.pyplot as plt
        except ImportError:
            plt = None
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        # test with no layers - should produce basemap
        if plt:
            basemap_only_static_mpl = cc.map(interactive=False)
            cartoframes.context.HAS_MATPLOTLIB = False
        basemap_only_static = cc.map(interactive=False)
        basemap_only_interactive = cc.map(interactive=True)

        # are of the correct type instances
        if plt:
            self.assertIsInstance(basemap_only_static_mpl, plt.Axes)
        self.assertIsInstance(basemap_only_static, IPython.core.display.Image)
        self.assertIsInstance(basemap_only_interactive,
                              IPython.core.display.HTML)

        # have the HTML innards that are to be expected
        if sys.version[0] == 3:
            self.assertRegex(basemap_only_static.data,
                             ('^<img src="https://.*api/v1/map/static/named/'
                              'cartoframes_ver.*" />$'))
            self.assertRegex(basemap_only_interactive.data,
                             '^<iframe srcdoc="<!DOCTYPE html>.*')
        elif sys.version[0] == 2:
            self.assertRegexMatches(
                basemap_only_static.data,
                ('^<img src="https://.*api/v1/map/static/named/'
                 'cartoframes_ver.*" />$'))
            self.assertRegexMatches(basemap_only_interactive.data,
                                    '^<iframe srcdoc="<!DOCTYPE html>.*')

        # test with labels on front
        labels_front = cc.map(layers=BaseMap('light', labels='front'))
        self.assertIsInstance(labels_front, IPython.core.display.HTML)

        # test with one Layer
        one_layer = cc.map(layers=Layer('tweets_obama'))
        self.assertIsInstance(one_layer, IPython.core.display.HTML)

        # test with two Layers
        two_layers = cc.map(
            layers=[Layer('tweets_obama'),
                    Layer(self.test_read_table)])

        self.assertIsInstance(two_layers, IPython.core.display.HTML)

        # test with one Layer, one QueryLayer
        onelayer_onequery = cc.map(layers=[
            QueryLayer('''
                                                SELECT *
                                                FROM tweets_obama
                                                LIMIT 100'''),
            Layer(self.test_read_table)
        ])

        self.assertIsInstance(onelayer_onequery, IPython.core.display.HTML)

        # test with BaseMap, Layer, QueryLayer
        cc.map(layers=[
            BaseMap('light'),
            QueryLayer('''
                               SELECT *
                               FROM tweets_obama
                               LIMIT 100''',
                       color='favoritescount'),
            Layer(self.test_read_table)
        ])

        # Errors
        # too many layers
        with self.assertRaises(ValueError):
            layers = [Layer('tweets_obama')] * 9
            cc.map(layers=layers)

        # zoom needs to be specified with lng/lat
        with self.assertRaises(ValueError):
            cc.map(lng=44.3386, lat=68.2733)

        # only one basemap layer can be added
        with self.assertRaises(ValueError):
            cc.map(layers=[BaseMap('dark'), BaseMap('light')])

        # only one time layer can be added
        with self.assertRaises(ValueError):
            cc.map(layers=[
                Layer(self.test_read_table, time='cartodb_id'),
                Layer(self.test_read_table, time='cartodb_id')
            ])

        # no geometry
        with self.assertRaises(ValueError):
            cc.map(layers=QueryLayer('''
                SELECT
                    null::geometry as the_geom,
                    null::geometry as the_geom_webmercator,
                    row_number() OVER () as cartodb_id
                FROM generate_series(1, 10) as m(i)
                '''))

    @unittest.skipIf(WILL_SKIP, 'no cartocredentials, skipping')
    def test_cartocontext_map_time(self):
        """context.CartoContext.map time options"""
        from cartoframes import Layer
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        html_map = cc.map(
            layers=Layer(self.test_point_table, time='cartodb_id'))
        self.assertIsInstance(html_map, IPython.core.display.HTML)

        # category map
        cat_map = cc.map(layers=Layer(self.test_point_table,
                                      time='actor_postedtime',
                                      color='twitter_lang'))
        self.assertRegexpMatches(cat_map.__html__(),
                                 '.*CDB_Math_Mode\(cf_value_twitter_lang\).*')

        with self.assertRaises(
                ValueError, msg='cannot create static torque maps currently'):
            cc.map(layers=Layer(self.test_point_table, time='cartodb_id'),
                   interactive=False)

        with self.assertRaises(ValueError,
                               msg='cannot have more than one torque layer'):
            cc.map(layers=[
                Layer(self.test_point_table, time='cartodb_id'),
                Layer(self.test_point_table, color='cartodb_id')
            ])

        with self.assertRaises(
                ValueError,
                msg='cannot do a torque map off a polygon dataset'):
            cc.map(layers=Layer(self.test_read_table, time='cartodb_id'))

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_map_geom_type(self):
        """context.CartoContext.map basemap geometry type defaults"""
        from cartoframes import Layer, QueryLayer
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        # baseid1 = dark, labels1 = labels on top in named map name
        labels_polygon = cc.map(layers=Layer(self.test_read_table))
        self.assertRegexpMatches(labels_polygon.__html__(),
                                 '.*baseid2_labels1.*',
                                 msg='labels should be on top since only a '
                                 'polygon layer is present')

        # baseid2 = voyager, labels0 = labels on bottom
        labels_point = cc.map(layers=Layer(self.test_point_table))
        self.assertRegexpMatches(labels_point.__html__(),
                                 '.*baseid2_labels0.*',
                                 msg='labels should be on bottom because a '
                                 'point layer is present')

        labels_multi = cc.map(
            layers=[Layer(self.test_point_table),
                    Layer(self.test_read_table)])
        self.assertRegexpMatches(labels_multi.__html__(),
                                 '.*baseid2_labels0.*',
                                 msg='labels should be on bottom because a '
                                 'point layer is present')
        # create a layer with points and polys, but with more polys
        # should default to poly layer (labels on top)
        multi_geom_layer = QueryLayer('''
            (SELECT
                the_geom, the_geom_webmercator,
                row_number() OVER () AS cartodb_id
              FROM "{polys}" WHERE the_geom IS NOT null LIMIT 10)
            UNION ALL
            (SELECT
                the_geom, the_geom_webmercator,
                (row_number() OVER ()) + 10 AS cartodb_id
              FROM "{points}" WHERE the_geom IS NOT null LIMIT 5)
        '''.format(polys=self.test_read_table, points=self.test_point_table))
        multi_geom = cc.map(layers=multi_geom_layer)
        self.assertRegexpMatches(multi_geom.__html__(),
                                 '.*baseid2_labels1.*',
                                 msg='layer has more polys than points, so it '
                                 'should default to polys labels (on top)')

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping')
    def test_get_bounds(self):
        """context.CartoContext._get_bounds"""
        from cartoframes.layer import QueryLayer
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        vals1 = {'minx': 0, 'maxx': 1, 'miny': 0, 'maxy': 2}

        vals2 = {'minx': 0, 'maxx': 1.5, 'miny': -0.5, 'maxy': 1.5}
        ans = {'west': 0, 'east': 1.5, 'south': -0.5, 'north': 2}
        # (MINX, MINY), (MINX, MAXY), (MAXX, MAXY), (MAXX, MINY), (MINX, MINY)
        # https://postgis.net/docs/ST_Envelope.html
        query = '''
            WITH cte AS (
                SELECT
                  'SRID=4326;POLYGON(({minx} {miny},
                                      {minx} {maxy},
                                      {maxx} {maxy},
                                      {maxx} {miny},
                                      {minx} {miny}))'::geometry AS the_geom
              )
            SELECT
              1 AS cartodb_id,
              the_geom,
              ST_Transform(the_geom, 3857) AS the_geom_webmercator
            FROM cte
        '''
        layers = [
            QueryLayer(query.format(**vals1)),
            QueryLayer(query.format(**vals2))
        ]
        extent_ans = cc._get_bounds(layers)

        self.assertDictEqual(extent_ans, ans)

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_cartocontext_check_query(self):
        """context.CartoContext._check_query"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        # this table does not exist in this account
        fail_query = '''
            SELECT *
              FROM cyclists
              '''
        fail_cols = ['merckx', 'moser', 'gimondi']
        with self.assertRaises(ValueError):
            cc._check_query(fail_query, style_cols=fail_cols)

        # table exists
        success_query = '''
            SELECT *
              FROM {}
              '''.format(self.test_read_table)
        self.assertIsNone(cc._check_query(success_query))

        # table exists but columns don't
        with self.assertRaises(ValueError):
            cc._check_query(success_query, style_cols=fail_cols)

    def test_df2pg_schema(self):
        """context._df2pg_schema"""
        from cartoframes.context import _df2pg_schema
        data = [{
            'id': 'a',
            'val': 1.1,
            'truth': True,
            'idnum': 1
        }, {
            'id': 'b',
            'val': 2.2,
            'truth': True,
            'idnum': 2
        }, {
            'id': 'c',
            'val': 3.3,
            'truth': False,
            'idnum': 3
        }]
        df = pd.DataFrame(data).astype({
            'id': 'object',
            'val': float,
            'truth': bool,
            'idnum': int
        })
        # specify order of columns
        df = df[['id', 'val', 'truth', 'idnum']]
        pgcols = ['id', 'val', 'truth', 'idnum']
        ans = ('NULLIF("id", \'\')::text AS id, '
               'NULLIF("val", \'\')::numeric AS val, '
               'NULLIF("truth", \'\')::boolean AS truth, '
               'NULLIF("idnum", \'\')::numeric AS idnum')

        self.assertEqual(ans, _df2pg_schema(df, pgcols))

        # add the_geom
        df['the_geom'] = 'Point(0 0)'
        ans = '\"the_geom\", ' + ans
        pgcols.append('the_geom')
        self.assertEqual(ans, _df2pg_schema(df, pgcols))

    @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
    def test_add_encoded_geom(self):
        """context._add_encoded_geom"""
        from cartoframes.context import _add_encoded_geom, _encode_geom
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        # encode_geom=True adds a column called 'geometry'
        df = cc.read(self.test_read_table, limit=5, decode_geom=True)

        # alter the geometry
        df['geometry'] = df['geometry'].apply(lambda x: x.buffer(0.1))

        # the_geom should reflect encoded 'geometry' column
        _add_encoded_geom(df, 'geometry')

        # geometry column should equal the_geom after function call
        self.assertTrue(df['the_geom'].equals(
            df['geometry'].apply(_encode_geom)))

        # don't specify geometry column (should exist since decode_geom==True)
        df = cc.read(self.test_read_table, limit=5, decode_geom=True)
        df['geometry'] = df['geometry'].apply(lambda x: x.buffer(0.2))

        # the_geom should reflect encoded 'geometry' column
        _add_encoded_geom(df, None)

        # geometry column should equal the_geom after function call
        self.assertTrue(df['the_geom'].equals(
            df['geometry'].apply(_encode_geom)))

        df = cc.read(self.test_read_table, limit=5)

        # raise error if 'geometry' column does not exist
        with self.assertRaises(KeyError):
            _add_encoded_geom(df, None)

    def test_decode_geom(self):
        """context._decode_geom"""
        from cartoframes.context import _decode_geom
        # Point (0, 0) without SRID
        ewkb = '010100000000000000000000000000000000000000'
        decoded_geom = _decode_geom(ewkb)
        self.assertEqual(decoded_geom.wkt, 'POINT (0 0)')
        self.assertIsNone(_decode_geom(None))

    def test_encode_geom(self):
        """context._encode_geom"""
        from cartoframes.context import _encode_geom
        from shapely import wkb
        import binascii as ba
        # Point (0 0) without SRID
        ewkb = '010100000000000000000000000000000000000000'
        geom = wkb.loads(ba.unhexlify(ewkb))
        ewkb_resp = _encode_geom(geom)
        self.assertEqual(ewkb_resp, ewkb)
        self.assertIsNone(_encode_geom(None))

    def test_dtypes2pg(self):
        """context._dtypes2pg"""
        from cartoframes.context import _dtypes2pg
        results = {
            'float64': 'numeric',
            'int64': 'numeric',
            'float32': 'numeric',
            'int32': 'numeric',
            'object': 'text',
            'bool': 'boolean',
            'datetime64[ns]': 'timestamp',
            'unknown_dtype': 'text'
        }
        for i in results:
            self.assertEqual(_dtypes2pg(i), results[i])

    def test_pg2dtypes(self):
        """context._pg2dtypes"""
        from cartoframes.context import _pg2dtypes
        results = {
            'date': 'datetime64[ns]',
            'number': 'float64',
            'string': 'object',
            'boolean': 'bool',
            'geometry': 'object',
            'unknown_pgdata': 'object'
        }
        for i in results:
            result = _pg2dtypes(i)
            self.assertEqual(result, results[i])

    def test_debug_print(self):
        """context._debug_print"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey,
                                      verbose=True)
        # request-response usage
        resp = requests.get('http://httpbin.org/get')
        cc._debug_print(resp=resp)
        cc._debug_print(resp=resp.text)

        # non-requests-response usage
        test_str = 'this is a test'
        long_test_str = ', '.join([test_str] * 100)
        self.assertIsNone(cc._debug_print(test_str=test_str))
        self.assertIsNone(cc._debug_print(long_str=long_test_str))

        # verbose = False test
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey,
                                      verbose=False)
        self.assertIsNone(cc._debug_print(resp=test_str))

    def test_data_boundaries(self):
        """context.CartoContext.data_boundaries"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        # all boundary metadata
        boundary_meta = cc.data_boundaries()
        self.assertTrue(boundary_meta.shape[0] > 0,
                        msg='has non-zero number of boundaries')
        meta_cols = set((
            'geom_id',
            'geom_tags',
            'geom_type',
        ))
        self.assertTrue(meta_cols & set(boundary_meta.columns))

        # boundary metadata in a region
        regions = (
            self.test_read_table,
            self.test_data_table,
            [5.9559111595, 45.8179931641, 10.4920501709, 47.808380127],
            'Australia',
        )
        for region in regions:
            boundary_meta = cc.data_boundaries(region=region)
            self.assertTrue(meta_cols & set(boundary_meta.columns))
            self.assertTrue(boundary_meta.shape[0] > 0,
                            msg='has non-zero number of boundaries')

        #  boundaries for world
        boundaries = cc.data_boundaries(boundary='us.census.tiger.state')
        self.assertTrue(boundaries.shape[0] > 0)
        self.assertEqual(boundaries.shape[1], 2)
        self.assertSetEqual(set((
            'the_geom',
            'geom_refs',
        )), set(boundaries.columns))

        # boundaries for region
        boundaries = ('us.census.tiger.state', )
        for b in boundaries:
            geoms = cc.data_boundaries(boundary=b, region=self.test_data_table)
            self.assertTrue(geoms.shape[0] > 0)
            self.assertEqual(geoms.shape[1], 2)
            self.assertSetEqual(set((
                'the_geom',
                'geom_refs',
            )), set(geoms.columns))

        # presence or lack of clipped boundaries
        nonclipped = (
            True,
            False,
        )
        for tf in nonclipped:
            meta = cc.data_boundaries(include_nonclipped=tf)
            self.assertEqual('us.census.tiger.state' in set(meta.geom_id), tf)

        with self.assertRaises(ValueError):
            cc.data_boundaries(region=[1, 2, 3])

        with self.assertRaises(ValueError):
            cc.data_boundaries(region=10)

    def test_data_discovery(self):
        """context.CartoContext.data_discovery"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        meta = cc.data_discovery(self.test_read_table,
                                 keywords=('poverty', ),
                                 time=('2010 - 2014', ))
        meta_columns = set(
            ('denom_aggregate', 'denom_colname', 'denom_description',
             'denom_geomref_colname', 'denom_id', 'denom_name',
             'denom_reltype', 'denom_t_description', 'denom_tablename',
             'denom_type', 'geom_colname', 'geom_description',
             'geom_geomref_colname', 'geom_id', 'geom_name',
             'geom_t_description', 'geom_tablename', 'geom_timespan',
             'geom_type', 'id', 'max_score_rank', 'max_timespan_rank',
             'normalization', 'num_geoms', 'numer_aggregate', 'numer_colname',
             'numer_description', 'numer_geomref_colname', 'numer_id',
             'numer_name', 'numer_t_description', 'numer_tablename',
             'numer_timespan', 'numer_type', 'score', 'score_rank',
             'score_rownum', 'suggested_name', 'target_area', 'target_geoms',
             'timespan_rank', 'timespan_rownum'))
        self.assertSetEqual(set(meta.columns),
                            meta_columns,
                            msg='metadata columns are all there')
        self.assertTrue((meta['numer_timespan'] == '2010 - 2014').all())
        self.assertTrue(
            (meta['numer_description'].str.contains('poverty')).all())

        # test region = list of lng/lats
        with self.assertRaises(ValueError):
            cc.data_discovery([1, 2, 3])

        switzerland = [
            5.9559111595, 45.8179931641, 10.4920501709, 47.808380127
        ]
        dd = cc.data_discovery(switzerland, keywords='freight', time='2010')
        self.assertEqual(dd['numer_id'][0], 'eu.eurostat.tgs00078')

        dd = cc.data_discovery('Australia', regex='.*Torres Strait Islander.*')
        for nid in dd['numer_id'].values:
            self.assertRegexpMatches(
                nid,
                '^au\.data\.B01_Indig_[A-Za-z_]+Torres_St[A-Za-z_]+[FMP]$')

        with self.assertRaises(CartoException):
            cc.data_discovery('non_existent_table_abcdefg')

        dd = cc.data_discovery('United States',
                               boundaries='us.epa.huc.hydro_unit',
                               time=(
                                   '2006',
                                   '2010',
                               ))
        self.assertTrue(dd.shape[0] >= 1)

        poverty = cc.data_discovery('United States',
                                    boundaries='us.census.tiger.census_tract',
                                    keywords=[
                                        'poverty status',
                                    ],
                                    time='2011 - 2015',
                                    include_quantiles=False)
        df_quantiles = poverty[poverty.numer_aggregate == 'quantile']
        self.assertEqual(df_quantiles.shape[0], 0)

        poverty = cc.data_discovery('United States',
                                    boundaries='us.census.tiger.census_tract',
                                    keywords=[
                                        'poverty status',
                                    ],
                                    time='2011 - 2015',
                                    include_quantiles=True)
        df_quantiles = poverty[poverty.numer_aggregate == 'quantile']
        self.assertTrue(df_quantiles.shape[0] > 0)

    def test_data(self):
        """context.CartoContext.data"""
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        meta = cc.data_discovery(self.test_read_table,
                                 keywords=('poverty', ),
                                 time=('2010 - 2014', ))
        data = cc.data(self.test_data_table, meta)
        anscols = set(meta['suggested_name'])
        origcols = set(cc.read(self.test_data_table, limit=1).columns)
        self.assertSetEqual(anscols, set(data.columns) - origcols)

        meta = [
            {
                'numer_id': 'us.census.acs.B19013001',
                'geom_id': 'us.census.tiger.block_group',
                'numer_timespan': '2011 - 2015'
            },
        ]
        data = cc.data(self.test_data_table, meta)
        self.assertSetEqual(set(('median_income_2011_2015', )),
                            set(data.columns) - origcols)

        # with self.assertRaises(NotImplementedError):
        #     cc.data(self.test_data_table, meta, how='geom_ref')

        with self.assertRaises(ValueError, msg='no measures'):
            meta = cc.data_discovery('United States', keywords='not a measure')
            cc.data(self.test_read_table, meta)

        with self.assertRaises(ValueError, msg='too many metadata measures'):
            # returns ~180 measures
            meta = cc.data_discovery(region='united states',
                                     keywords='education')
            cc.data(self.test_read_table, meta)

        with self.assertRaises(NameError, msg='column name already exists'):
            meta = cc.data_discovery(region='united states',
                                     time='2006 - 2010',
                                     regex='.*walked to work.*',
                                     boundaries='us.census.tiger.census_tract')
            cc.data(self.test_data_table, meta)
コード例 #32
0
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# Dataset manager
dataset_manager = DatasetManager(auth_client)

# Get all datasets from account
datasets = dataset_manager.all()

# loop over all datasets from account
for tablename in datasets:
    query = 'SELECT * FROM {table_name}'.format(table_name=tablename.name) 
    try:
        result = sql.send(query, format=args.EXPORT_FORMAT)
    except Exception as e:
        logger.error(str(e))
        break
    data_folder = Path(args.SAVE_FOLDER) / "{table_name}.{format}".format(table_name=tablename.name,format=args.EXPORT_FORMAT)
    # write file to files folder
    try:
        data_folder.write_bytes(result)
    except Exception as e:
        logger.error(str(e))
        break
コード例 #33
0
ファイル: copy_example.py プロジェクト: zedauna/carto-python
args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY:
    auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)
else:
    logger.error('You need to provide valid credentials, run with '
                 '-h parameter for details')
    sys.exit(1)

# Create and cartodbfy a table
sqlClient = SQLClient(auth_client)
sqlClient.send("""
    CREATE TABLE IF NOT EXISTS copy_example (
      the_geom geometry(Geometry,4326),
      name text,
      age integer
    )
    """)
sqlClient.send("SELECT CDB_CartodbfyTable(current_schema, 'copy_example')")

copyClient = CopySQLClient(auth_client)

# COPY FROM example
logger.info("COPY'ing FROM file...")
query = ('COPY copy_example (the_geom, name, age) '
         'FROM stdin WITH (FORMAT csv, HEADER true)')
result = copyClient.copyfrom_file_path(query, 'files/copy_from.csv')
logger.info('result = %s' % result)

# COPY TO example
コード例 #34
0
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# check all table name of account
all_tables = []

tables = sql.send(
    "select pg_class.relname from pg_class, pg_roles, pg_namespace" +
    " where pg_roles.oid = pg_class.relowner and " +
    "pg_roles.rolname = current_user " +
    "and pg_namespace.oid = pg_class.relnamespace and pg_class.relkind = 'r'")

q = "select \
  'CREATE TABLE ' || relname || E'\n(\n' || \
  array_to_string( \
    array_agg( \
      '    ' || column_name || ' ' ||  type || ' '|| not_null \
    ) \
    , E',\n' \
  ) || E'\n);\n' as create_table \
from \
( \
  select  \
    distinct on (column_name) c.relname, a.attname AS column_name, \
コード例 #35
0
def test_no_auth_sql_error_get(no_auth_client):
    sql = SQLClient(no_auth_client)

    with pytest.raises(CartoException):
        sql.send('select * from non_existing_dataset', {'do_post': False})
コード例 #36
0
def delete_from_carto_by_id(asset_id):
    auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY,
                                   base_url=USR_BASE_URL)
    sql = SQLClient(auth_client)
    results = sql.send(f"DELETE from {TABLE_NAME} WHERE id ='{asset_id}'")
    return results
コード例 #37
0
class CartoContext(object):
    """Manages connections with CARTO for data and map operations. Modeled
    after `SparkContext
    <https://jaceklaskowski.gitbooks.io/mastering-apache-spark/content/spark-sparkcontext.html>`__.

    Example:
        Create a CartoContext object
        .. code:: python

            import cartoframes
            cc = cartoframes.CartoContext(BASEURL, APIKEY)

    Args:
        base_url (str): Base URL of CARTO user account. Cloud-based accounts
            are of the form ``https://{username}.carto.com`` (e.g.,
            https://eschbacher.carto.com for user ``eschbacher``). On-premises
            installation users should ask their admin.
        api_key (str): CARTO API key.
        session (requests.Session, optional): requests session. See `requests
            documentation <http://docs.python-requests.org/en/master/user/advanced/>`__
            for more information:
        verbose (bool, optional): Output underlying process states (True), or
            suppress (False, default)

    Returns:
        :obj:`CartoContext`: A CartoContext object that is authenticated against
        the user's CARTO account.
    """
    def __init__(self, base_url=None, api_key=None, session=None, verbose=0):

        self.api_key, self.base_url = _process_credentials(api_key,
                                                           base_url)
        self.auth_client = APIKeyAuthClient(base_url=self.base_url,
                                            api_key=self.api_key,
                                            session=session)
        self.sql_client = SQLClient(self.auth_client)
        self.username = self.auth_client.username
        self.is_org = self._is_org_user()

        self._map_templates = {}
        self._srcdoc = None
        self._verbose = verbose

    def _is_org_user(self):
        """Report whether user is in a multiuser CARTO organization or not"""
        res = self.sql_client.send('SHOW search_path')

        paths = [p.strip() for p in res['rows'][0]['search_path'].split(',')]
        # is an org user if first item is not `public`
        return paths[0] != 'public'

    def read(self, table_name, limit=None, index='cartodb_id',
             decode_geom=False):
        """Read tables from CARTO into pandas DataFrames.

        Example:
            .. code:: python

                import cartoframes
                cc = cartoframes.CartoContext(BASEURL, APIKEY)
                df = cc.read('acadia_biodiversity')

        Args:
            table_name (str): Name of table in user's CARTO account.
            limit (int, optional): Read only ``limit`` lines from
                ``table_name``. Defaults to `None`, which reads the full table.
            index (str, optional): Not currently in use.

        Returns:
            pandas.DataFrame: DataFrame representation of `table_name` from
            CARTO.
        """
        query = 'SELECT * FROM "{table_name}"'.format(table_name=table_name)
        if limit:
            if (limit >= 0) and isinstance(limit, int):
                query += ' LIMIT {limit}'.format(limit=limit)
            else:
                raise ValueError("`limit` parameter must an integer >= 0")

        return self.query(query, decode_geom=decode_geom)


    def write(self, df, table_name, temp_dir='/tmp', overwrite=False,
              lnglat=None, encode_geom=False, geom_col=None):
        """Write a DataFrame to a CARTO table.

        Example:
            .. code:: python

                cc.write(df, 'brooklyn_poverty', overwrite=True)

        Args:
            df (pandas.DataFrame): DataFrame to write to ``table_name`` in user
                CARTO account
            table_name (str): Table to write ``df`` to in CARTO.
            temp_dir (str, optional): Directory for temporary storage of data
                that is sent to CARTO. Defaults to ``/tmp`` (Unix-like systems).
            overwrite (bool, optional): Behavior for overwriting ``table_name``
                if it exits on CARTO. Defaults to ``False``.
            lnglat (tuple, optional): lng/lat pair that can be used for creating
                a geometry on CARTO. Defaults to ``None``. In some cases,
                geometry will be created without specifying this. See CARTO's
                `Import API <https://carto.com/docs/carto-engine/import-api/standard-tables>`__
                for more information.
            encode_geom (bool, optional): Whether to write `geom_col` to CARTO
                as `the_geom`.
            geom_col (str, optional): The name of the column where geometry
                information is stored. Used in conjunction with `encode_geom`.

        Returns:
            None
        """
        if encode_geom:
            _add_encoded_geom(df, geom_col)

        if not overwrite:
            # error if table exists and user does not want to overwrite
            self._table_exists(table_name)

        if df.shape[0] > MAX_IMPORT_ROWS:
            final_table_name = self._send_batches(df, table_name, temp_dir,
                                                  geom_col)
        else:
            final_table_name = self._send_dataframe(df, table_name, temp_dir,
                                                    geom_col)
            self._set_schema(df, final_table_name)

        # create geometry column from lat/longs if requested
        if lnglat:
            # TODO: make this a batch job if it is a large dataframe or move
            #       inside of _send_dataframe and/or batch
            tqdm.write('Creating geometry out of columns '
                       '`{lng}`/`{lat}`'.format(lng=lnglat[0],
                                                lat=lnglat[1]))
            self.sql_client.send('''
                UPDATE "{table_name}"
                SET the_geom = CDB_LatLng("{lat}"::numeric,
                                          "{lng}"::numeric)
            '''.format(table_name=final_table_name,
                       lng=lnglat[0],
                       lat=lnglat[1]))

        self._column_normalization(df, final_table_name, geom_col)
        tqdm.write('Table written to CARTO: '
                   '{base_url}dataset/{table_name}'.format(
                       base_url=self.base_url,
                       table_name=final_table_name))

    def _table_exists(self, table_name):
        """Checks to see if table exists"""
        try:
            self.sql_client.send('''
                EXPLAIN SELECT * FROM "{table_name}"
                '''.format(table_name=table_name))
            raise NameError(
                'Table `{table_name}` already exists. '
                'Run with `overwrite=True` if you wish to replace the '
                'table.'.format(table_name=table_name))
        except CartoException as err:
            # If table doesn't exist, we get an error from the SQL API
            self._debug_print(err=err)
            return False

        return False

    def _send_batches(self, df, table_name, temp_dir, geom_col):
        """Batch sending a dataframe

        Args:
            df (pandas.DataFrame): DataFrame that will be batched up for
                sending to CARTO
            table_name (str): Name of table to send DataFrame to
            temp_dir (str): Local directory for temporary storage of DataFrame
                written to file that will be sent to CARTO
            geom_col (str): Name of encoded geometry column (if any) that will
                be dropped or converted to `the_geom` column

        Returns:
            final_table_name (str): Final table name on CARTO that the
            DataFrame is stored in

        Exceptions:
            * TODO: add more (Out of storage)
        """
        subtables = []
        # send dataframe chunks to carto
        for chunk_num, chunk in tqdm(df.groupby([i // MAX_IMPORT_ROWS
                                                 for i in range(df.shape[0])]),
                                     desc='Uploading in batches: '):
            temp_table = '{orig}_cartoframes_temp_{chunk}'.format(
                orig=table_name[:40],
                chunk=chunk_num)
            try:
                # send dataframe chunk, get new name if collision
                temp_table = self._send_dataframe(chunk, temp_table,
                                                  temp_dir, geom_col)
            except CartoException as err:
                self._drop_tables(subtables)
                raise CartoException(err)

            if temp_table:
                subtables.append(temp_table)
            self._debug_print(chunk_num=chunk_num,
                              chunk_shape=str(chunk.shape),
                              temp_table=temp_table)

        # combine chunks into final table
        try:
            select_base = ('SELECT %(schema)s '
                           'FROM "{table}"') % dict(schema=_df2pg_schema(df))
            unioned_tables = '\nUNION ALL\n'.join([select_base.format(table=t)
                                                   for t in subtables])
            self._debug_print(unioned=unioned_tables)
            query = '''
                DROP TABLE IF EXISTS "{table_name}";
                CREATE TABLE "{table_name}" As {unioned_tables};
                ALTER TABLE {table_name} DROP COLUMN IF EXISTS cartodb_id;
                {drop_tables}
                SELECT CDB_CartoDBFYTable('{org}', '{table_name}');
                '''.format(table_name=table_name,
                           unioned_tables=unioned_tables,
                           org=self.username if self.is_org else 'public',
                           drop_tables=_drop_tables_query(subtables))
            self._debug_print(query=query)
            _ = self.sql_client.send(query)
        except CartoException as err:
            try:
                self._drop_tables(subtables)
            except CartoException as err:
                warn('Failed to drop the following subtables from CARTO '
                     'account: {}'.format(', '.join(subtables)))
            finally:
                raise Exception('Failed to upload dataframe: {}'.format(err))

        return table_name

    def _drop_tables(self, tables):
        """Drop all tables in tables list

        Args:
            tables (list of str): list of table names

        Returns:
            None
        """
        query = _drop_tables_query(tables)
        _ = self.sql_client.send(query)
        return None

    def _send_dataframe(self, df, table_name, temp_dir, geom_col):
        """Send a DataFrame to CARTO to be imported as a SQL table

        Args:
            df (pandas.DataFrame): DataFrame that is will be sent to CARTO
            table_name (str): Name on CARTO for the table that will have the
                data from ``df``
            temp_dir (str): Name of directory used for temporarily storing the
                DataFrame file to sent to CARTO
            geom_col (str): Name of geometry column

        Returns:
            final_table_name (str): Name of final table. This method will
            overwrite the table `table_name` if it already exists.
        """
        def remove_tempfile(filepath):
            """removes temporary file"""
            os.remove(filepath)

        tempfile = '{temp_dir}/{table_name}.csv'.format(temp_dir=temp_dir,
                                                        table_name=table_name)
        self._debug_print(tempfile=tempfile)
        df.drop(geom_col, axis=1, errors='ignore').to_csv(tempfile)

        with open(tempfile, 'rb') as f:
            res = self._auth_send('api/v1/imports', 'POST',
                                  files={'file': f},
                                  params={'type_guessing': 'false'},
                                  stream=True)
            self._debug_print(res=res)

            if not res['success']:
                remove_tempfile(tempfile)
                raise CartoException('Failed to send DataFrame')
            import_id = res['item_queue_id']

        remove_tempfile(tempfile)
        final_table_name = table_name
        while True:
            import_job = self._check_import(import_id)
            self._debug_print(import_job=import_job)
            final_table_name = self._handle_import(import_job, table_name)
            if import_job['state'] == 'complete':
                break
            # Wait a second before doing another request
            time.sleep(1.0)

        return final_table_name

    def _set_schema(self, dataframe, table_name):
        """Update a table associated with a dataframe to have the equivalent
        schema"""
        utility_cols = ('the_geom', 'the_geom_webmercator', 'cartodb_id')
        alter_temp = ('ALTER COLUMN "{col}" TYPE {ctype} USING '
                      'NULLIF("{col}", \'\')::{ctype}')
        alter_cols = ', '.join(alter_temp.format(col=c, ctype=_dtypes2pg(t))
                               for c, t in zip(dataframe.columns,
                                               dataframe.dtypes)
                               if c not in utility_cols)
        alter_query = 'ALTER TABLE "{table}" {alter_cols};'.format(
            table=table_name,
            alter_cols=alter_cols)
        self._debug_print(alter_query=alter_query)
        try:
            _ = self.sql_client.send(alter_query)
        except CartoException as err:
            warn('DataFrame written to CARTO but table schema failed to '
                 'update to match DataFrame. All columns have data type '
                 '`text`. CARTO error: `{err}`. Query: {query}'.format(
                     err=err,
                     query=alter_query))

    def _check_import(self, import_id):
        """Check the status of an Import API job"""

        res = self._auth_send('api/v1/imports/{}'.format(import_id),
                              'GET')
        return res

    def _handle_import(self, import_job, table_name):
        """Handle state of import job"""
        if import_job['state'] == 'failure':
            if import_job['error_code'] == 8001:
                raise CartoException('Over CARTO account storage limit for '
                                     'user `{}`. Try subsetting your '
                                     'DataFrame or dropping columns to reduce '
                                     'the data size.'.format(self.username))
            elif import_job['error_code'] == 6668:
                raise CartoException('Too many rows in DataFrame. Try '
                                     'subsetting DataFrame before writing to '
                                     'CARTO.')
            else:
                raise CartoException('Error code: `{}`. See CARTO Import '
                                     'API error documentation for more '
                                     'information: https://carto.com/docs/'
                                     'carto-engine/import-api/import-errors'
                                     ''.format(import_job['error_code']))
        elif import_job['state'] == 'complete':
            self._debug_print(final_table=import_job['table_name'])
            if import_job['table_name'] != table_name:
                try:
                    res = self.sql_client.send('''
                        DROP TABLE IF EXISTS {orig_table};
                        ALTER TABLE {dupe_table}
                        RENAME TO {orig_table};
                        '''.format(
                            orig_table=table_name,
                            dupe_table=import_job['table_name']))

                    self._debug_print(res=res)
                except Exception as err:
                    self._debug_print(err=err)
                    raise Exception('Cannot overwrite table `{table_name}` '
                                    '({err}). DataFrame was written to '
                                    '`{new_table}` instead.'.format(
                                        table_name=table_name,
                                        err=err,
                                        new_table=import_job['table_name']))
        return table_name

    def _column_normalization(self, dataframe, table_name, geom_col):
        """Print a warning if there is a difference between the normalized
        PostgreSQL column names and the ones in the DataFrame"""

        pgcolumns = self.sql_client.send('''
            SELECT *
            FROM "{table_name}"
            LIMIT 0'''.format(table_name=table_name))['fields'].keys()
        diff_cols = (set(dataframe.columns) ^ set(pgcolumns)) - {'cartodb_id',
                                                                 geom_col}
        if diff_cols:
            cols = ', '.join('`{}`'.format(c) for c in diff_cols)
            tqdm.write('The following columns were renamed because of '
                       'PostgreSQL column normalization requirements: '
                       '{cols}'.format(cols=cols))

    def sync(self, dataframe, table_name):
        """Depending on the size of the DataFrame or CARTO table, perform
        granular operations on a DataFrame to only update the changed cells
        instead of a bulk upload. If on the large side, perform granular
        operations, if on the smaller side use Import API.

        Note:
            Not yet implemented.
        """
        pass


    def query(self, query, table_name=None, decode_geom=False):
        """Pull the result from an arbitrary SQL query from a CARTO account
        into a pandas DataFrame. Can also be used to perform database
        operations (creating/dropping tables, adding columns, updates, etc.).

        Args:
            query (str): Query to run against CARTO user database.
            table_name (str, optional): If set, this will create a new
                table in the user's CARTO account that is the result of the
                query. Defaults to None (no table created).
        Returns:
            pandas.DataFrame: DataFrame representation of query supplied.
            Pandas data types are inferred from PostgreSQL data types.
            In the case of PostgreSQL date types, the data type 'object' is used.
        """
        self._debug_print(query=query)
        if table_name:
            create_table_query = '''
                CREATE TABLE {table_name} As
                SELECT *
                  FROM ({query}) As _wrap;
                SELECT CDB_CartodbfyTable('{org}', '{table_name}');
            '''.format(table_name=table_name,
                       query=query,
                       org=(self.username if self.is_org else 'public'))
            self._debug_print(create_table_query=create_table_query)

            create_table_res = self.sql_client.send(create_table_query)
            self._debug_print(create_table_res=create_table_res)

            new_table_name = create_table_res['rows'][0]['cdb_cartodbfytable']
            self._debug_print(new_table_name=new_table_name)

            select_res = self.sql_client.send(
                'SELECT * FROM {table_name}'.format(table_name=new_table_name))
        else:
            select_res = self.sql_client.send(query)

        self._debug_print(select_res=select_res)

        # TODO: replace this with a function
        pg2dtypes = {
            'date': 'object',
            'number': 'float64',
            'string': 'object',
            'boolean': 'bool',
            'geometry': 'object',
        }

        fields = select_res['fields']
        schema = {
            field: pg2dtypes.get(fields[field]['type'], 'object')
                   if field != 'cartodb_id' else 'int64'
            for field in fields
        }
        if not schema.keys():
            return None
        self._debug_print(fields=fields, schema=schema)

        df = pd.DataFrame(
            data=select_res['rows'],
            columns=[k for k in fields]).astype(schema)

        if 'cartodb_id' in fields:
            df.set_index('cartodb_id', inplace=True)

        if decode_geom:
            df['geometry'] = df.the_geom.apply(_decode_geom)
        return df


    def map(self, layers=None, interactive=True,
            zoom=None, lat=None, lng=None, size=(800, 400),
            ax=None):
        """Produce a CARTO map visualizing data layers.

        Example:
            Create a map with two data layers, and one BaseMap layer.
            ::

                import cartoframes
                from cartoframes import Layer, BaseMap, styling
                cc = cartoframes.CartoContext(BASEURL, APIKEY)
                cc.map(layers=[BaseMap(),
                               Layer('acadia_biodiversity',
                                     color={'column': 'simpson_index',
                                            'scheme': styling.tealRose(7)}),
                               Layer('peregrine_falcon_nest_sites',
                                     size='num_eggs',
                                     color={'column': 'bird_id',
                                            'scheme': styling.vivid(10))],
                       interactive=True)
        Args:
            layers (list, optional): List of one or more of the following:

                - Layer: cartoframes Layer object for visualizing data from a
                  CARTO table. See `layer.Layer <#layer.Layer>`__ for all
                  styling options.
                - BaseMap: Basemap for contextualizng data layers. See
                  `layer.BaseMap <#layer.BaseMap>`__ for all styling options.
                - QueryLayer: Layer from an arbitrary query. See
                  `layer.QueryLayer <#layer.QueryLayer>`__ for all styling
                  options.

            interactive (bool, optional): Defaults to ``True`` to show an
                interactive slippy map. Setting to ``False`` creates a static
                map.
            zoom (int, optional): Zoom level of map. Acceptable values are
                usually in the range 0 to 19. 0 has the entire earth on a
                single tile (256px square). Zoom 19 is the size of a city
                block. Must be used in conjunction with ``lng`` and ``lat``.
                Defaults to a view to have all data layers in view.
            lat (float, optional): Latitude value for the center of the map.
                Must be used in conjunction with ``zoom`` and ``lng``. Defaults
                to a view to have all data layers in view.
            lng (float, optional): Longitude value for the center of the map.
                Must be used in conjunction with ``zoom`` and ``lat``. Defaults
                to a view to have all data layers in view.
            size (tuple, optional): List of pixel dimensions for the map. Format
                is ``(width, height)``. Defaults to ``(800, 400)``.

        Returns:
            IPython.display.HTML: Interactive maps are rendered in an ``iframe``,
            while static maps are rendered in ``img`` tags.
        """
        # TODO: add layers preprocessing method like
        #       layers = process_layers(layers)
        #       that uses up to layer limit value error
        if not hasattr(IPython, 'display'):
            raise NotImplementedError('Nope, cannot display maps at the '
                                      'command line.')

        if layers is None:
            layers = []
        elif not isinstance(layers, collections.Iterable):
            layers = [layers]
        else:
            layers = list(layers)

        if len(layers) > 8:
            raise ValueError('map can have at most 8 layers')

        if any([zoom, lat, lng]) != all([zoom, lat, lng]):
            raise ValueError('zoom, lat, and lng must all or none be provided')

        # When no layers are passed, set default zoom
        if ((len(layers) == 0 and zoom is None) or
                (len(layers) == 1 and layers[0].is_basemap)):
            [zoom, lat, lng] = [3, 38, -99]
        has_zoom = zoom is not None

        # Check basemaps, add one if none exist
        base_layers = [idx for idx, layer in enumerate(layers)
                       if layer.is_basemap]
        if len(base_layers) > 1:
            raise ValueError('map can at most take 1 BaseMap layer')
        if len(base_layers) > 0:
            layers.insert(0, layers.pop(base_layers[0]))
        else:
            layers.insert(0, BaseMap())

        # Check for a time layer, if it exists move it to the front
        time_layers = [idx for idx, layer in enumerate(layers)
                       if not layer.is_basemap and layer.time]
        time_layer = layers[time_layers[0]] if len(time_layers) > 0 else None
        if len(time_layers) > 1:
            raise ValueError('Map can at most take 1 Layer with time '
                             'column/field')
        if time_layer:
            raise NotImplementedError('Animated maps are not yet supported')
            if not interactive:
                raise ValueError('map cannot display a static image with a '
                                 'time_column')
            layers.append(layers.pop(time_layers[0]))

        # If basemap labels are on front, add labels layer
        basemap = layers[0]
        if basemap.is_basic() and basemap.labels == 'front':
            layers.append(BaseMap(basemap.source,
                                  labels=basemap.labels,
                                  only_labels=True))

        # Setup layers
        for idx, layer in enumerate(layers):
            layer._setup(layers, idx)

        nb_layers = non_basemap_layers(layers)
        options = {'basemap_url': basemap.url}

        for idx, layer in enumerate(nb_layers):
            self._check_query(layer.query,
                              style_cols=layer.style_cols)
            options['cartocss_' + str(idx)] = layer.cartocss
            options['sql_' + str(idx)] = layer.query


        params = {
            'config': json.dumps(options),
            'anti_cache': random.random(),
        }

        if has_zoom:
            params.update({'zoom': zoom, 'lat': lat, 'lon': lng})
            options.update({'zoom': zoom, 'lat': lat, 'lng': lng})
        else:
            options.update(self._get_bounds(nb_layers))

        map_name = self._send_map_template(layers, has_zoom=has_zoom)
        api_url = '{base_url}api/v1/map'.format(base_url=self.base_url)

        static_url = ('{api_url}/static/named/{map_name}'
                      '/{width}/{height}.png?{params}').format(
                          api_url=api_url,
                          map_name=map_name,
                          width=size[0],
                          height=size[1],
                          params=urlencode(params))

        html = '<img src="{url}" />'.format(url=static_url)

        # TODO: write this as a private method
        if interactive:
            netloc = urlparse(self.base_url).netloc
            domain = 'carto.com' if netloc.endswith('.carto.com') else netloc

            def safe_quotes(text, escape_single_quotes=False):
                """htmlify string"""
                if isinstance(text, str):
                    safe_text = text.replace('"', "&quot;")
                    if escape_single_quotes:
                        safe_text = safe_text.replace("'", "&#92;'")
                    return safe_text.replace('True', 'true')
                return text

            config = {
                'user_name': self.username,
                'maps_api_template': self.base_url[:-1],
                'sql_api_template': self.base_url[:-1],
                'tiler_protocol': 'https',
                'tiler_domain': domain,
                'tiler_port': '80',
                'type': 'torque' if time_layer else 'namedmap',
                'named_map': {
                    'name': map_name,
                    'params': {
                        k: safe_quotes(v, escape_single_quotes=True)
                        for k, v in dict_items(options)
                    },
                },
            }

            map_options = {
                'filter': ['http', 'mapnik', 'torque'],
                'https': True,
            }

            if time_layer:
                config.update({
                    'order': 1,
                    'options': {
                        'query': time_layer.query,
                        'user_name': self.username,
                        'tile_style': time_layer.torque_cartocss,
                    }
                })
                config['named_map'].update({
                    'layers': [{
                        'layer_name': 't',
                    }],
                })
                map_options.update({
                    'time_slider': True,
                    'loop': True,
                })
            bounds = [] if has_zoom else [[options['north'], options['east']],
                                          [options['south'], options['west']]]

            content = self._get_iframe_srcdoc(config=config,
                                              bounds=bounds,
                                              options=options,
                                              map_options=map_options)

            img_html = html
            html = (
                '<iframe srcdoc="{content}" width={width} height={height}>'
                '  Preview image: {img_html}'
                '</iframe>'
            ).format(content=safe_quotes(content),
                     width=size[0],
                     height=size[1],
                     img_html=img_html)
            return IPython.display.HTML(html)
        else:
            try:
                import matplotlib.image as mpi
                import matplotlib.pyplot as plt
            except ImportError:
                warn('Matplotlib not detected. Saving image directly to disk')
                raise NotImplementedError
            raw_data = mpi.imread(static_url)
            f = plt.gcf()
            if ax is None:
                ax = plt.gca()
            ax.imshow(raw_data)
            ax.axis('off')
            return ax



    def data_boundaries(self, df=None, table_name=None):
        """Not currently implemented"""
        pass


    def data_discovery(self, keywords=None, regex=None, time=None,
                       boundary=None):
        """Not currently implemented"""
        pass


    def data_augment(self, table_name, metadata):
        """Augment an existing CARTO table with `Data Observatory
        <https://carto.com/data-observatory>`__ measures. See the full `Data
        Observatory catalog
        <https://cartodb.github.io/bigmetadata/index.html>`__ for all available
        measures. The result of this operation is:

        1. It updates `table_name` by adding columns from the Data Observatory
        2. It returns a pandas DataFrame representation of that newly augmented
           table.

        Note:
            This method alters `table_name` in the user's CARTO database by
            adding additional columns. To avoid this, create a copy of the
            table first and use the new copy instead.

        Example:
            Add new measures to a CARTO table and pass it to a pandas DataFrame.
            Using the "Median Household Income in the past 12 months" measure
            from the `Data Observatory Catalog
            <https://cartodb.github.io/bigmetadata/united_states/income.html#median-household-income-in-the-past-12-months>`__.
            ::

                import cartoframes
                cc = cartoframes.CartoContext(BASEURL, APIKEY)
                median_income = [{'numer_id': 'us.census.acs.B19013001',
                                  'geom_id': 'us.census.tiger.block_group',
                                  'numer_timespan': '2011 - 2015'}]
                df = cc.data_augment('transaction_events',
                                     median_income)

        Args:
            table_name (str): Name of table on CARTO account that Data
                Observatory measures are to be added to.
            metadata (list of dicts): List of all measures to add to
                `table_name`. Each `dict` has the following keys:

                - `numer_id` (str): The identifier for the desired measurement
                - `geom_id` (str, optional): Identifier for a desired
                  geographic boundary level to use when calculating measures.
                  Will be automatically assigned if undefined
                - `normalization` (str, optional): The desired normalization. One
                  of 'area', 'prenormalized', or 'denominated'. 'Area' will
                  normalize the measure per square kilometer, 'prenormalized'
                  will return the original value, and 'denominated' will
                  normalize by a denominator.
                - `denom_id` (str, optional): Measure ID from DO catalog
                - `numer_timespan` (str, optional): The desired timespan for the
                  measurement. Defaults to most recent timespan available if
                  left unspecified.
                - `geom_timespan` (str, optional): The desired timespan for the
                  geometry. Defaults to timespan matching `numer_timespan` if
                  left unspecified.
                - `target_area` (str, optional): Instead of aiming to have
                  `target_geoms` in the area of the geometry passed as extent,
                  fill this area. Unit is square degrees WGS84. Set this to
                  `0` if you want to use the smallest source geometry for this
                  element of metadata, for example if you're passing in points.
                - `target_geoms` (str, optional): Override global `target_geoms`
                  for this element of metadata
                - `max_timespan_rank` (str, optional): Override global
                  `max_timespan_rank` for this element of metadata
                - `max_score_rank` (str, optional): Override global
                  `max_score_rank` for this element of metadata

        Returns:
            pandas.DataFrame: A DataFrame representation of `table_name` which
            has new columns for each measure in `metadata`.
        """

        try:
            with open(os.path.join(os.path.dirname(__file__),
                                   'assets/data_obs_augment.sql'), 'r') as f:
                augment_functions = f.read()
            self.sql_client.send(augment_functions)
        except Exception as err:
            raise CartoException("Could not install `obs_augment_table` onto "
                                 "user account ({})".format(err))

        # augment with data observatory metadata
        augment_query = '''
            select obs_augment_table('{username}.{tablename}',
                                     '{cols_meta}');
        '''.format(username=self.username,
                   tablename=table_name,
                   cols_meta=json.dumps(metadata))
        resp = self.sql_client.send(augment_query)

        # read full augmented table
        return self.read(table_name)


    def _auth_send(self, relative_path, http_method, **kwargs):
        self._debug_print(relative_path=relative_path,
                          http_method=http_method,
                          kwargs=kwargs)
        res = self.auth_client.send(relative_path, http_method, **kwargs)
        if isinstance(res.content, str):
            return json.loads(res.content)
        return json.loads(res.content.decode('utf-8'))

    def _check_query(self, query, style_cols=None):
        """Checks if query from Layer or QueryLayer is valid"""
        try:
            self.sql_client.send('''
                EXPLAIN
                SELECT
                  {style_cols}{comma}
                  the_geom, the_geom_webmercator
                FROM ({query}) _wrap;
                '''.format(query=query,
                           comma=',' if style_cols else '',
                           style_cols=','.join(style_cols)))
        except Exception as err:
            raise ValueError(('Layer query `{query}` and/or style column(s) '
                              '{cols} are not valid: {err}.'
                              '').format(query=query,
                                         cols=', '.join(['`{}`'.format(c)
                                                         for c in style_cols]),
                                         err=err))

    def _send_map_template(self, layers, has_zoom):
        map_name = get_map_name(layers, has_zoom=has_zoom)
        if map_name not in self._map_templates:
            try:
                self._auth_send('api/v1/map/named', 'POST',
                                headers={'Content-Type': 'application/json'},
                                data=get_map_template(layers, has_zoom=has_zoom))
            except ValueError('map already exists'):
                pass

            self._map_templates[map_name] = True
        return map_name


    def _get_iframe_srcdoc(self, config, bounds, options, map_options):
        if not hasattr(self, '_srcdoc') or self._srcdoc is None:
            with open(os.path.join(os.path.dirname(__file__),
                                   'assets/cartoframes.html'), 'r') as f:
                self._srcdoc = f.read()

        return (self._srcdoc
                .replace('@@CONFIG@@', json.dumps(config))
                .replace('@@BOUNDS@@', json.dumps(bounds))
                .replace('@@OPTIONS@@', json.dumps(map_options))
                .replace('@@ZOOM@@', str(options.get('zoom', 3)))
                .replace('@@LAT@@', str(options.get('lat', 0)))
                .replace('@@LNG@@', str(options.get('lng', 0))))


    def _get_bounds(self, layers):
        """Return the bounds of all data layers involved in a cartoframes map.

        Args:
            layers (list): List of cartoframes layers. See `cartoframes.layers`
                for all types.

        Returns:
            dict: Dictionary of northern, southern, eastern, and western bounds
                of the superset of data layers. Keys are `north`, `south`,
                `east`, and `west`. Units are in WGS84.
        """
        extent_query = ('SELECT ST_EXTENT(the_geom) AS the_geom '
                        'FROM ({query}) as t{idx}\n')
        union_query = 'UNION ALL\n'.join(
            [extent_query.format(query=layer.query, idx=idx)
             for idx, layer in enumerate(layers)
             if not layer.is_basemap])

        extent = self.sql_client.send('''
                       SELECT
                         ST_XMIN(ext) AS west,
                         ST_YMIN(ext) AS south,
                         ST_XMAX(ext) AS east,
                         ST_YMAX(ext) AS north
                       FROM (
                           SELECT st_extent(the_geom) AS ext
                           FROM ({union_query}) AS _wrap1
                       ) AS _wrap2
                            '''.format(union_query=union_query))

        return extent['rows'][0]

    def _debug_print(self, **kwargs):
        if self._verbose <= 0:
            return

        for key, value in dict_items(kwargs):
            if isinstance(value, requests.Response):
                str_value = "status_code: {status_code}, content: {content}".format(
                    status_code=value.status_code, content=value.content)
            else:
                str_value = str(value)
            if self._verbose < 2 and len(str_value) > 300:
                str_value = '{}\n\n...\n\n{}'.format(str_value[:250], str_value[-50:])
            print('{key}: {value}'.format(key=key,
                                          value=str_value))
コード例 #38
0
def test_sql_error(api_key_auth_client_usr):
    sql = SQLClient(api_key_auth_client_usr)

    with pytest.raises(CartoException):
        sql.send('select * from non_existing_dataset')
コード例 #39
0
    SRC_API_KEY
)
auth_dst_client = APIKeyAuthClient(
    DST_URL,
    DST_API_KEY
)

# Get SQL API clients
sql_src_client = SQLClient(auth_src_client)
sql_dst_client = SQLClient(auth_dst_client)

# Create a SQL utility function to extract source table structure.
with open('generate_create_table_statement.sql', 'r') as f:
    query_generate_create_table_statement = f.read()
logger.info('Creating function generate_create_table_statement...')
res = sql_src_client.send(query_generate_create_table_statement)
logger.info('Response: {}'.format(res))

# Get the table structure
logger.info('Getting the CREATE TABLE statement for %s' % TABLE_NAME)
query = (
    "SELECT generate_create_table_statement('%s')"
    " AS create_table" % TABLE_NAME
)
res = sql_src_client.send(query)
create_table = res['rows'][0]['create_table']

# This is a bit of a trick: we omit the sequences to avoid
# dependencies on other objects Normally this just affects the
# cartodb_id and can optionally be fixed by cartodbfy'ing
create_table_no_seqs = re.sub(r'DEFAULT nextval\([^\)]+\)', ' ', create_table)
コード例 #40
0
    table = dataset_manager.create(i)
    logger.info(
        'Table imported: {table}'.format(table=table.name))
    table_name.append(table.name)

# define base table to insert all rows from other files
base_table = table_name[0]

# select all rows from table except cartodb_id to avoid possible errors
columns_table = "select string_agg(column_name,',')" + \
    " FROM information_schema.columns" + \
    " where table_schema = '" + username + "' and table_name = '" + \
    str(table_name[0]) + "' AND column_name <> 'cartodb_id'"


result_query = sql.send(columns_table)

for k, v in result_query.items():
    if k == 'rows':
        for itr in v:
            dict_col = itr

logging.debug(dict_col['string_agg'])

# apply operation INSERT INTO SELECT with columns from previous query
index = 1
for i in table_name:

    if i == base_table:

        continue
コード例 #41
0
for i in file_folder:
    table = dataset_manager.create(i)
    logger.info('Table imported: {table}'.format(table=table.name))
    table_name.append(table.name)

# define base table to insert all rows from other files
base_table = table_name[0]

# select all rows from table except cartodb_id to avoid possible errors
columns_table = "select string_agg(column_name,',')" + \
    " FROM information_schema.columns" + \
    " where table_schema = '" + username + "' and table_name = '" + \
    str(table_name[0]) + "' AND column_name <> 'cartodb_id'"

result_query = sql.send(columns_table)

for k, v in result_query.items():
    if k == 'rows':
        for itr in v:
            dict_col = itr

logging.debug(dict_col['string_agg'])

# apply operation INSERT INTO SELECT with columns from previous query
index = 1
for i in table_name:

    if i == base_table:

        continue
コード例 #42
0
ファイル: test_sql.py プロジェクト: CartoDB/carto-python
def test_no_auth_sql_error_get(no_auth_client):
    sql = SQLClient(no_auth_client)

    with pytest.raises(CartoException):
        sql.send('select * from non_existing_dataset', {'do_post': False})
コード例 #43
0
ファイル: nexrad_copy.py プロジェクト: CartoDB/carto-python
                    help=('Api key of the account'
                          ' (defaults to env variable CARTO_API_KEY)'))

args = parser.parse_args()

if not args.CARTO_BASE_URL or not args.CARTO_API_KEY:
    sys.exit(parser.print_usage())

auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)
sql_client = SQLClient(auth_client)
copy_client = CopySQLClient(auth_client)

# Create a table suitable to receive the data
logger.info('Creating table nexrad_copy_example...')
sql_client.send("""CREATE TABLE IF NOT EXISTS nexrad_copy_example (
  the_geom geometry(Geometry,4326),
  reflectivity numeric
)""")
sql_client.send(
    "SELECT CDB_CartodbfyTable(current_schema, 'nexrad_copy_example')")
logger.info('Done')

logger.info('Trying to connect to the THREDDS radar query service')
rs = RadarServer(
    'http://thredds.ucar.edu/thredds/radarServer/nexrad/level2/IDD/')

logger.info('Quering data from the station')
query = rs.query()
query.stations('KLVX').time(datetime.utcnow())
assert rs.validate_query(query)

catalog = rs.get_catalog(query)
コード例 #44
0
# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY)
else:
    logger.error('You need to provide valid credentials, run with '
                 '-h parameter for details')
    sys.exit(1)

# Create and cartodbfy a table
sqlClient = SQLClient(auth_client)
sqlClient.send("""
    CREATE TABLE IF NOT EXISTS copy_example (
      the_geom geometry(Geometry,4326),
      name text,
      age integer
    )
    """)
sqlClient.send("SELECT CDB_CartodbfyTable(current_schema, 'copy_example')")

copyClient = CopySQLClient(auth_client)

# COPY FROM example
logger.info("COPY'ing FROM file...")
query = ('COPY copy_example (the_geom, name, age) '
         'FROM stdin WITH (FORMAT csv, HEADER true)')
result = copyClient.copyfrom_file_path(query, 'files/copy_from.csv')
logger.info('result = %s' % result)

# COPY TO example with pandas DataFrame
コード例 #45
0
ファイル: check_query.py プロジェクト: CartoDB/carto-python
args = parser.parse_args()

# Authenticate to CARTO account
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    dataset_manager = DatasetManager(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

query = sql.send('EXPLAIN ANALYZE ' + args.queryUser)

for key, value in query.items():
    if key == 'rows':
        for itr in value:
            logger.info(itr)
    if key == 'time':
        logger.info(str(key) + ': ' + str(value))

query_arr = args.queryUser.upper().split()


for i in query_arr:
    if i == '*':
        logger.warn('Do you need all columns? ' +
                    'You can improve the performance ' +
コード例 #46
0
ファイル: user_info.py プロジェクト: CartoDB/carto-python
    table_user.align['Attribute'] = 'l'
    table_user.align['Value'] = 'l'
    for row in userInfo:
        table_user.add_row(row)
    print(table_user)
    # print('{name}: {value}').format(name=i,value=str(user.__dict__[i]))
except Exception as e:
    logger.warn('User has no admin of its organization')

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# show quota of user
results = []
print('\nThe quotas of the user are:\n')
quota = sql.send(
    "SELECT * FROM cdb_dataservices_client.cdb_service_quota_info()")
for k, v in quota.items():
    if k == 'rows':
        for itr in v:
            results.append([
                itr['service'],
                itr['used_quota'],
                itr['provider'],
                itr['soft_limit'],
                itr['monthly_quota']
            ])

table = PrettyTable(
    ['Service', 'Provider', 'Soft limit', 'Used quota', 'Monthly quota'])
table.align['Used quota'] = 'l'
table.align['Provider'] = 'r'
コード例 #47
0
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()


# Authenticate to CARTO account
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    dataset_manager = DatasetManager(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

queries = "select pid, query from pg_stat_activity \
 WHERE usename = current_user"
result = sql.send(queries)
for key, value in result.items():
    if key == 'rows':
        for itr in value:
            logger.info(itr)
コード例 #48
0
ファイル: list_tables.py プロジェクト: CartoDB/carto-python
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# check all table name of account
all_tables = []

tables = sql.send(
    "select pg_class.relname from pg_class, pg_roles, pg_namespace" +
    " where pg_roles.oid = pg_class.relowner and " +
    "pg_roles.rolname = current_user " +
    "and pg_namespace.oid = pg_class.relnamespace and pg_class.relkind = 'r'")

for k, v in tables.items():
    if k == 'rows':
        for itr in v:
            all_tables.append(itr['relname'])


# define array to store all the table sizes
arr_size = []


# create array with values of the table sizes
for i in all_tables: