Python DatasetManager.get 예제들, carto.datasets.DatasetManager.get Python 예제들

예제 #1

0

파일 보기

    def test_write_privacy(self):
        """context.CartoContext.write Updates the privacy of a dataset"""
        from carto.datasets import DatasetManager
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        ds_manager = DatasetManager(self.auth_client)

        df = pd.DataFrame({'ids': list('abcd'), 'vals': range(4)})
        cc.write(df, self.test_write_table)
        dataset = ds_manager.get(self.test_write_table)
        self.assertEqual(dataset.privacy.lower(), 'private')

        df = pd.DataFrame({'ids': list('efgh'), 'vals': range(4, 8)})
        cc.write(df, self.test_write_table, overwrite=True, privacy='public')
        dataset = ds_manager.get(self.test_write_table)
        self.assertEqual(dataset.privacy.lower(), 'public')

        privacy = cc._get_privacy('i_am_not_a_table_in_this_account')
        self.assertIsNone(privacy)

예제 #2

0

파일 보기

 def _get_metadata(self, auth_client, table_name, retries=4, retry_wait_time=1):
     ds_manager = DatasetManager(auth_client)
     try:
         return ds_manager.get(table_name)
     except Exception as e:
         if type(e).__name__ == 'NotFoundException' and retries > 0:
             time.sleep(retry_wait_time)
             self._get_metadata(auth_client=auth_client, table_name=table_name,
                                retries=retries-1, retry_wait_time=retry_wait_time*2)
         else:
             raise Exception('We could not get the table metadata. '
                             'Please, try again in a few seconds or contact support for help')

예제 #3

0

파일 보기

def shapefile_to_carto(table_name, schema, gdf, privacy='LINK'):
    '''
    Function to upload a shapefile to Carto
    Note: Shapefiles can also be zipped and uploaded to Carto through the upload_to_carto function
          Use this function when several shapefiles are processed in one single script and need
          to be uploaded to separate Carto tables
          The function should also be used when the table is too large to be exported as a shapefile
    INPUT table_name: the name of the newly created table on Carto (string)
          schema: a dictionary of column names and data types in order to upload data to Carto (dictionary)
          gdf: a geodataframe storing all the data to upload (geodataframe)
          privacy: the privacy setting of the dataset to upload to Carto (string)
    '''
    # initiate a ThreadPoolExecutor with 10 workers
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = []
        # create an empty list to store the index of the rows uploaded
        all_ids = []
        for index, row in gdf.iterrows():
            # build the sql query to send to Carto
            query = insert_carto_query(row, schema, table_name)
            # submit the task to the executor
            futures.append(executor.submit(insert_carto_send, query))
        for future in as_completed(futures):
            all_ids.append(future.result())
    logging.info('Upload of {} rows complete!'.format(len(all_ids)))

    # Change privacy of table on Carto
    #set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY)
    auth_client = APIKeyAuthClient(
        api_key=CARTO_KEY,
        base_url="https://{user}.carto.com/".format(user=CARTO_USER))
    #set up dataset manager with authentication
    dataset_manager = DatasetManager(auth_client)
    #set dataset privacy
    dataset = dataset_manager.get(table_name)
    dataset.privacy = privacy
    dataset.save()

예제 #4

0

파일 보기

        continue
    elif i != base_table and index <= len(table_name):

        query = "insert into " + base_table + \
            "(" + dict_col['string_agg'] + ") select " + \
            dict_col['string_agg'] + " from " + table_name[index] + ";"
        sql.send(query)
        time.sleep(2)

    else:
        break
    index = index + 1

# change name of base table
myTable = dataset_manager.get(base_table)
myTable.name = base_table + "_merged"
myTable.save()
time.sleep(2)

# remove not merged datasets
for i in table_name:
    try:
        myTable = dataset_manager.get(i)
        myTable.delete()
        time.sleep(2)
    except:
        continue

logger.info('Tables merged')
print('\nURL of dataset is: \

예제 #5

0

파일 보기

    print('Inserting new rows for shapefile: {}'.format(shape_file))
    cartosql.insertRows(dataset_name + '_edit', CARTO_SCHEMA.keys(),
                        CARTO_SCHEMA.values(), new_rows)

#save processed dataset to shapefile
out_gdf.to_file(processed_data_file, driver='ESRI Shapefile')

# Change privacy of table on Carto
#set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY)
auth_client = APIKeyAuthClient(api_key=os.getenv('CARTO_WRI_RW_KEY'),
                               base_url="https://{user}.carto.com/".format(
                                   user=os.getenv('CARTO_WRI_RW_USER')))
#set up dataset manager with authentication
dataset_manager = DatasetManager(auth_client)
#set dataset privacy to 'Public with link'
dataset = dataset_manager.get(dataset_name + '_edit')
dataset.privacy = 'LINK'
dataset.save()
print('Privacy set to public with link.')
'''
Upload original data and processed data to Amazon S3 storage
'''


def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3',
                      aws_access_key_id=os.getenv('aws_access_key_id'),
                      aws_secret_access_key=os.getenv('aws_secret_access_key'))
    try:
        s3.upload_file(local_file, bucket, s3_file)
        print("Upload Successful")

예제 #6

0

파일 보기

                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY']
                    if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                                   args.organization)
    dataset_manager = DatasetManager(auth_client)
    dataset = dataset_manager.get(args.dataset_name)
else:
    logger.error(
        'You need to provide valid credentials, run with -h parameter for details'
    )
    import sys
    sys.exit(1)

# PRIVATE, PUBLIC, LINK
dataset.privacy = args.privacy
dataset.save()

logger.info("Done!")

예제 #7

0

파일 보기

파일: cartomgr.py 프로젝트: CityOfNewYork/open-streets

class client(object):
    def __init__(self):

        self.carto_api_key = os.environ['API_KEY']
        self.carto_account = os.environ['ACCOUNT']

        USR_BASE_URL = "https://{user}.carto.com/".format(
            user=self.carto_account)

        self.auth_client = APIKeyAuthClient(api_key=self.carto_api_key,
                                            base_url=USR_BASE_URL)

        #this mimics the carto docs, leave it this way
        self.sql = SQLClient(self.auth_client)

        self.dataset_manager = DatasetManager(self.auth_client)

    def checkconnection(self):

        try:
            query = "SELECT * from version()"
            data = self.sql.send(query, do_post=False)
        except CartoException as e:
            print("some error occurred", e)
            #occasional:
            #HTTPSConnectionPool(host='xxxxx.carto.com', port=443): Max retries exceeded with url: /api/v2/sql?q=SELECT...
            #(Caused by ProxyError('Cannot connect to proxy.', OSError('Tunnel connection failed: 407 Proxy Authentication Required')))
            return False

        #print(data['rows'])
        #[{'version': 'PostgreSQL 11.5 (Ubuntu 11.5.2+carto-1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609, 64-bit'}]

        sqlret = data['rows']
        for itr in sqlret:
            if itr['version'].startswith('PostgreSQL'):
                return True

        return False

    def getkount(self, relation):

        # return the count of records in any relation

        try:
            query = "SELECT count(*) as kount from {0}".format(relation)
            data = self.sql.send(query, do_post=False)
        except CartoException as e:
            print("some error occurred", e)
            raise

        #print(data['rows'])
        #[{'kount': 1}]

        sqlret = data['rows']
        for itr in sqlret:
            # should be just one row with one key,val
            return itr['kount']

    def batchsql(self, list_of_sqls, checkevery=1, maxtime=5):

        # pass in a list of sqls to execute
        # probably dont want to read a file with a million records and upload
        # open streets for ex is 1k rows, 500KB

        batchSQLClient = BatchSQLClient(self.auth_client)
        createJob = batchSQLClient.create(list_of_sqls)

        # https://github.com/CartoDB/carto-python

        # job_id looks like
        # 5171b8c4-8c03-4610-8797-5dd98ff3e61b

        # job looks like
        # {
        #  'user': '******',
        #  'status': 'done',
        #  'query': [{'query': 'drop table if exists foo', 'status': 'done'},
        #            {'query': 'create table foo (bar text)', 'status': 'done'},
        #            {...} {...}],
        # 'created_at': '2020-07-02T16:31:31.873Z',
        # 'updated_at': '2020-07-02T16:31:31.996Z',
        # 'job_id': '5171b8c4-8c03-4610-8797-5dd98ff3e61b'
        # }

        # queries are nested because you can add more sets to a running job

        readJob = batchSQLClient.read(createJob['job_id'])
        cheks = 0

        while (readJob['status'] != 'done'):
            time.sleep(checkevery)
            readJob = batchSQLClient.read(createJob['job_id'])
            cheks += 1

            if cheks > maxtime:
                return False

        return True

    def uploadshapefolder(self, path_tothe_zip):

        # I am working with tar.gz
        # not sure what else is good
        # this returns the name in carto (often tablename_XX on repeats)

        #print("uploading {0}".format(path_tothe_zip))
        # carto will warn:   This is part of a non-public CARTO API and may change in the future.
        cartodatasetid = self.dataset_manager.create(path_tothe_zip)

        #print("carto named the upload {0}".format(cartodatasetid))

        #file_import_manager = FileImportJobManager(self.auth_client)
        #file_imports = file_import_manager.all()
        #print("carto says {0} imports are active".format(len(file_imports)))

        # always gonna make public
        # this one warns of a non-public API which does in fact match the docs

        dataset = self.dataset_manager.get(cartodatasetid)

        dataset.privacy = PUBLIC
        dataset.save()

        return cartodatasetid

    def delete(self, tablename):

        # careful buddy wat u doin?
        # copy pastin from https://github.com/CartoDB/carto-python

        # docs refer to table names as "dataset id"

        dataset = self.dataset_manager.get(tablename)
        #print("deleting {0} from our Carto account".format(tablename))
        dataset.delete()

예제 #8

0

파일 보기

    'property.year_ending_67011'
],
                inplace=True)
years = data_View4['year_ending'].drop_duplicates(keep="first").tolist()

for item in years:
    data_View5 = data_View4.loc[data_View4['year_ending'] == item]
    data_View5['year_ending'] = data_View5['year_ending'].replace(
        str(item), '12/31/' + str(item))
    data_View5.set_index('pid')
    print(str(item).replace('.0', ''))
    data_View5.to_csv('CSV output\\t' + str(item).replace('.0', '') + '.csv')

#Saves data

CartoUser = CartoUserName
USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser)
auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL)

dataset_manager = DatasetManager(auth_client)
for item in years:
    print(item)
    try:
        datasets = dataset_manager.get('t' + str(item).replace('.0', ''))
        datasets.delete()
        dataset = dataset_manager.create('CSV output\\t' +
                                         str(item).replace('.0', '') + '.csv')
        print('works')
    except:
        pass

예제 #9

0

파일 보기

파일: jurgol.py 프로젝트: alrocar/jurgol

        if key not in fieldnames:
            del obj[key]
    obj['channels'] = ' '.join(map(str, obj['channels'])).strip()

fieldnames = ['date', 'opacity', 'team']
for obj in write_csv(MATCHES_POLYGONS_FILE_NAME, fieldnames, matches_polygons):
    keys = list(obj.keys()).copy()
    for key in keys:
        if key not in fieldnames:
            del obj[key]

dataset_manager = DatasetManager(auth_client)

# pass any parameter to generate all files
if len(sys.argv) > 1:
    teams = dataset_manager.get('teams')
    if teams:
        teams.delete()
    tvchannels = dataset_manager.get('tvchannels')
    if tvchannels:
        tvchannels.delete()
    stadiums = dataset_manager.get('stadiums')
    if stadiums:
        stadiums.delete()
    dataset_manager.create(global_path(STADIUMS_FILE_NAME))
    dataset_manager.create(global_path(TVCHANNELS_FILE_NAME))
    dataset_manager.create(global_path(TEAMS_FILE_NAME))

    sql = SQLClient(auth_client)

    sql.send('UPDATE teams SET the_geom = cdb_geocode_admin0_polygon(name)')

예제 #10

0

파일 보기

파일: import_and_merge.py 프로젝트: CartoDB/carto-python

        continue
    elif i != base_table and index <= len(table_name):

        query = "insert into " + base_table + \
            "(" + dict_col['string_agg'] + ") select " + \
            dict_col['string_agg'] + " from " + table_name[index] + ";"
        sql.send(query)
        time.sleep(2)

    else:
        break
    index = index + 1

# change name of base table
myTable = dataset_manager.get(base_table)
myTable.name = base_table + "_merged"
myTable.save()
time.sleep(2)

# remove not merged datasets
for i in table_name:
    try:
        myTable = dataset_manager.get(i)
        myTable.delete()
        time.sleep(2)
    except:
        continue

logger.info('Tables merged')
print('\nURL of dataset is: \

예제 #11

0

파일 보기

    'weather_norm_site_eui_2018', 'weather_norm_site_eui_2017',
    'weather_norm_site_eui_2016', 'weather_norm_site_eui_2015',
    'weather_norm_site_eui_2014', 'weather_norm_site_eui_2013',
    'weather_norm_site_eui_2012', 'site_eui_change_current_last',
    'weather_norm_source_eui_2020', 'weather_norm_source_eui_2019',
    'weather_norm_source_eui_2018', 'weather_norm_source_eui_2017',
    'weather_norm_source_eui_2016', 'weather_norm_source_eui_2015',
    'weather_norm_source_eui_2014', 'weather_norm_source_eui_2013',
    'weather_norm_source_eui_2012', 'source_eui_change_current_last',
    'total_ghg_emissions_intensity_2020', 'total_ghg_emissions_intensity_2019',
    'total_ghg_emissions_intensity_2018', 'total_ghg_emissions_intensity_2017',
    'total_ghg_emissions_intensity_2016', 'total_ghg_emissions_intensity_2015',
    'total_ghg_emissions_intensity_2014', 'total_ghg_emissions_intensity_2013',
    'total_ghg_emissions_intensity_2012',
    'tot_ghg_emissions_intensity_change_current_last'
]]
benchmarking_disclosure.set_index('pid', inplace=True)
#benchmarking_disclosure.to_excel('Output\\tbl_Consolidated_2012_infinite.xlsx')
benchmarking_disclosure.to_csv('Output\\tbl_Consolidated_2012_infinite.csv')

#Saves data

CartoUser = CartoUserName
USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser)
auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL)

dataset_manager = DatasetManager(auth_client)
datasets = dataset_manager.get('tbl_consolidated_2012_infinite')
datasets.delete()
dataset = dataset_manager.create('Output\\tbl_Consolidated_2012_infinite.csv')

예제 #12

0

파일 보기

파일: main.py 프로젝트: loganbyers/data-pre-processing

 # if table does not exist, create it
 table_name = dataset_name + '_edit'
 if not table_name in carto_table_names:
     logging.info(f'Table {table_name} does not exist, creating')
     # Change privacy of table on Carto
     # set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY)
     auth_client = APIKeyAuthClient(
         api_key=os.getenv('CARTO_WRI_RW_KEY'),
         base_url="https://{user}.carto.com/".format(
             user=os.getenv('CARTO_WRI_RW_USER')))
     # set up dataset manager with authentication
     dataset_manager = DatasetManager(auth_client)
     # upload dataset to carto
     dataset = dataset_manager.create(processed_data_file)
     # set dataset privacy to 'Public with link'
     dataset = dataset_manager.get(table_name)
     dataset.privacy = 'LINK'
     dataset.save()
     logging.info('Privacy set to public with link.')
 # if table does exist, clear all the rows so we can upload the latest version
 else:
     logging.info(f'Table {table_name} already exists, clearing rows')
     # column names and types for data table
     # column names should be lowercase
     # column types should be one of the following: geometry, text, numeric, timestamp
     CARTO_SCHEMA = OrderedDict([('country_code', 'text'),
                                 ('country_name', 'text'),
                                 ('datetime', 'timestamp'),
                                 ('year', 'numeric')])
     # Go through each type of "value" in this table
     # Add data column, unit, and indicator code to CARTO_SCHEMA, column_order, and dataset