Python DatasetManager Examples, carto.datasets.DatasetManager Python Examples

Example #1

0

Show file

File: list_tables.py Project: drw/carto-python

                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'],
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Authenticate to CARTO account
auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                               args.organization)
dataset_manager = DatasetManager(auth_client)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# check all table name of account
all_tables = []

tables = sql.send(
    "select pg_class.relname from pg_class, pg_roles, pg_namespace" +
    " where pg_roles.oid = pg_class.relowner and " +

Example #2

0

Show file

File: export_all_datasets.py Project: zedauna/carto-python


# Authenticate to CARTO account
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# Dataset manager
dataset_manager = DatasetManager(auth_client)

# Get all datasets from account
datasets = dataset_manager.all()

# loop over all datasets from account
for tablename in datasets:
    query = 'SELECT * FROM {table_name}'.format(table_name=tablename.name) 
    try:
        result = sql.send(query, format=args.EXPORT_FORMAT)
    except Exception as e:
        logger.error(str(e))
        break
    data_folder = Path(args.SAVE_FOLDER) / "{table_name}.{format}".format(table_name=tablename.name,format=args.EXPORT_FORMAT)
    # write file to files folder
    try:

Example #3

0

Show file

File: ene_034_electricity_consumption_processing.py Project: loganbyers/data-pre-processing

df_long.electricity_consumption_billionkwh = df_long.electricity_consumption_billionkwh.astype(
    'float64')

#save processed dataset to csv
processed_data_file = os.path.join(data_dir, dataset_name + '_edit.csv')
df_long.to_csv(processed_data_file, index=False)
'''
Upload processed data to Carto
'''
print('Uploading processed data to Carto.')
#set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY)
auth_client = APIKeyAuthClient(api_key=os.getenv('CARTO_WRI_RW_KEY'),
                               base_url="https://{user}.carto.com/".format(
                                   user=os.getenv('CARTO_WRI_RW_USER')))
#set up dataset manager with authentication
dataset_manager = DatasetManager(auth_client)
#upload dataset to carto
dataset = dataset_manager.create(processed_data_file)
print('Carto table created: {}'.format(
    os.path.basename(processed_data_file).split('.')[0]))
#set dataset privacy to 'Public with link'
dataset.privacy = 'LINK'
dataset.save()
print('Privacy set to public with link.')
'''
Upload original data and processed data to Amazon S3 storage
'''


def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3',

Example #4

0

Show file

File: import_sync_table_as_dataset.py Project: zongtong009/carto-python

                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY']
                    if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                                   args.organization)
    dataset_manager = DatasetManager(auth_client)
    table = dataset_manager.create(args.url, args.sync_time)
else:
    logger.error(
        'You need to provide valid credentials, run with -h parameter for details'
    )
    import sys
    sys.exit(1)

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# return the id of the sync
logger.info('Name of table: ' + str(table.name))

Example #5

0

Show file

File: change_dataset_privacy.py Project: drw/carto-python

                    help='Set the name of the organization' +
                    ' account (defaults to env variable CARTO_ORG)')

parser.add_argument('--base_url',
                    type=str,
                    dest='CARTO_BASE_URL',
                    default=os.environ['CARTO_API_URL'],
                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'],
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                               args.organization)

dataset_manager = DatasetManager(auth_client)
dataset = dataset_manager.get(args.dataset_name)
# PRIVATE, PUBLIC, LINK
dataset.privacy = args.privacy
dataset.save()

logger.info("Done!")

Example #6

0

Show file

    
# create empty table for dataset on Carto
CARTO_SCHEMA = carto_schema(gdf)
checkCreateTable(os.path.basename(processed_data_file).split('.')[0], CARTO_SCHEMA)

# convert the geometry of the file from shapely to geojson
gdf['geometry'] = convert_geometry(gdf['geometry'])

# upload the shapefile to the empty carto table
cartosql.insertRows(os.path.basename(processed_data_file).split('.')[0], CARTO_SCHEMA.keys(), CARTO_SCHEMA.values(), gdf.values.tolist(), user=CARTO_USER, key=CARTO_KEY)

# Change privacy of table on Carto
#set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY)
auth_client = APIKeyAuthClient(api_key=os.getenv('CARTO_WRI_RW_KEY'), base_url="https://{user}.carto.com/".format(user=os.getenv('CARTO_WRI_RW_USER')))
#set up dataset manager with authentication
dataset_manager = DatasetManager(auth_client)
#set dataset privacy to 'Public with link'
dataset = dataset_manager.get(dataset_name+'_edit')
dataset.privacy = 'LINK'
dataset.save()
print('Privacy set to public with link.')

'''
Upload original data and processed data to Amazon S3 storage
'''
def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3', aws_access_key_id=os.getenv('aws_access_key_id'),
                      aws_secret_access_key=os.getenv('aws_secret_access_key'))
    try:
        s3.upload_file(local_file, bucket, s3_file)
        print("Upload Successful")

Example #7

0

Show file

                    default=os.environ['CARTO_API_URL'],
                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key',
                    dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'],
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY,
                               "organization")

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# Dataset manager
dataset_manager = DatasetManager(auth_client)

connection = json.loads(args.connection.replace("\\", ""))
logger.info(connection)

table = dataset_manager.create(None, None, connection=connection)
logger.info('Table imported: {table}'.format(table=table.name))

Example #8

0

Show file

File: import_sync_table_as_dataset.py Project: CartoDB/carto-python

                    help='Set the base URL. For example:' +
                    ' https://username.carto.com/ ' +
                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    dataset_manager = DatasetManager(auth_client)
    table = dataset_manager.create(args.url, args.sync_time)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# return the id of the sync
logger.info('Name of table: ' + str(table.name))
print('\nURL of dataset is: \
      https://{org}.carto.com/u/{username}/dataset/{data}'). \

Example #9

0

Show file

    'weather_norm_site_eui_2018', 'weather_norm_site_eui_2017',
    'weather_norm_site_eui_2016', 'weather_norm_site_eui_2015',
    'weather_norm_site_eui_2014', 'weather_norm_site_eui_2013',
    'weather_norm_site_eui_2012', 'site_eui_change_current_last',
    'weather_norm_source_eui_2020', 'weather_norm_source_eui_2019',
    'weather_norm_source_eui_2018', 'weather_norm_source_eui_2017',
    'weather_norm_source_eui_2016', 'weather_norm_source_eui_2015',
    'weather_norm_source_eui_2014', 'weather_norm_source_eui_2013',
    'weather_norm_source_eui_2012', 'source_eui_change_current_last',
    'total_ghg_emissions_intensity_2020', 'total_ghg_emissions_intensity_2019',
    'total_ghg_emissions_intensity_2018', 'total_ghg_emissions_intensity_2017',
    'total_ghg_emissions_intensity_2016', 'total_ghg_emissions_intensity_2015',
    'total_ghg_emissions_intensity_2014', 'total_ghg_emissions_intensity_2013',
    'total_ghg_emissions_intensity_2012',
    'tot_ghg_emissions_intensity_change_current_last'
]]
benchmarking_disclosure.set_index('pid', inplace=True)
#benchmarking_disclosure.to_excel('Output\\tbl_Consolidated_2012_infinite.xlsx')
benchmarking_disclosure.to_csv('Output\\tbl_Consolidated_2012_infinite.csv')

#Saves data

CartoUser = CartoUserName
USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser)
auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL)

dataset_manager = DatasetManager(auth_client)
datasets = dataset_manager.get('tbl_consolidated_2012_infinite')
datasets.delete()
dataset = dataset_manager.create('Output\\tbl_Consolidated_2012_infinite.csv')

Example #10

0

Show file

        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# SQL wrapper
sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))

# Dataset manager
dataset_manager = DatasetManager(auth_client)

# define path of the files
path = os.getcwd()

file_folder = glob.glob(path + '/' + args.folder_name)

# import files from the path to CARTO
table_name = []

for i in file_folder:
    table = dataset_manager.create(i)
    logger.info(
        'Table imported: {table}'.format(table=table.name))
    table_name.append(table.name)

Example #11

0

Show file

File: import_from_database.py Project: CartoDB/carto-python

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# Dataset manager
dataset_manager = DatasetManager(auth_client)

connection = json.loads(args.connection.replace("\\", ""))
logger.info(connection)

table = dataset_manager.create(None, None, connection=connection)
logger.info(
    'Table imported: {table}'.format(table=table.name))

Example #12

0

Show file

 def get_dataset_manager(self):
     try:
         self.sql_client
     except AttributeError:
         self.initialize()
     return DatasetManager(self.client)

Example #13

0

Show file

File: testing-upload-carto-new-carto-cleint.py Project: nygeog/osm_walkability

from carto.auth import APIKeyAuthClient
from carto.datasets import DatasetManager
from _settings import USERNAME, APIKEY

USR_BASE_URL = "https://{user}.carto.com/".format(user=USERNAME)
auth_client = APIKeyAuthClient(api_key=APIKEY, base_url=USR_BASE_URL)

USR_BASE_URL = 'https://carto.com/user/dms2203'

#BASE_URL = "https://{organization}.carto.com/user/{user}/". \
#format(user=USERNAME) # organization=ORGANIZATION
USR_BASE_URL = "https://{user}.carto.com/".format(user=USERNAME)
auth_client = APIKeyAuthClient(api_key=APIKEY,
                               base_url=USR_BASE_URL,
                               organization='dms2203')

# write here the path to a local file or remote URL
LOCAL_FILE_OR_URL = "data/processing/new-york_new-york_points.csv"

dataset_manager = DatasetManager(auth_client)
dataset = dataset_manager.create(LOCAL_FILE_OR_URL)

Example #14

0

Show file

    'property.year_ending_67011'
],
                inplace=True)
years = data_View4['year_ending'].drop_duplicates(keep="first").tolist()

for item in years:
    data_View5 = data_View4.loc[data_View4['year_ending'] == item]
    data_View5['year_ending'] = data_View5['year_ending'].replace(
        str(item), '12/31/' + str(item))
    data_View5.set_index('pid')
    print(str(item).replace('.0', ''))
    data_View5.to_csv('CSV output\\t' + str(item).replace('.0', '') + '.csv')

#Saves data

CartoUser = CartoUserName
USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser)
auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL)

dataset_manager = DatasetManager(auth_client)
for item in years:
    print(item)
    try:
        datasets = dataset_manager.get('t' + str(item).replace('.0', ''))
        datasets.delete()
        dataset = dataset_manager.create('CSV output\\t' +
                                         str(item).replace('.0', '') + '.csv')
        print('works')
    except:
        pass

Example #15

0

Show file

File: jurgol.py Project: alrocar/jurgol

]
for obj in write_csv(MATCHES_FILE_NAME, fieldnames, matches):
    keys = list(obj.keys()).copy()
    for key in keys:
        if key not in fieldnames:
            del obj[key]
    obj['channels'] = ' '.join(map(str, obj['channels'])).strip()

fieldnames = ['date', 'opacity', 'team']
for obj in write_csv(MATCHES_POLYGONS_FILE_NAME, fieldnames, matches_polygons):
    keys = list(obj.keys()).copy()
    for key in keys:
        if key not in fieldnames:
            del obj[key]

dataset_manager = DatasetManager(auth_client)

# pass any parameter to generate all files
if len(sys.argv) > 1:
    teams = dataset_manager.get('teams')
    if teams:
        teams.delete()
    tvchannels = dataset_manager.get('tvchannels')
    if tvchannels:
        tvchannels.delete()
    stadiums = dataset_manager.get('stadiums')
    if stadiums:
        stadiums.delete()
    dataset_manager.create(global_path(STADIUMS_FILE_NAME))
    dataset_manager.create(global_path(TVCHANNELS_FILE_NAME))
    dataset_manager.create(global_path(TEAMS_FILE_NAME))

Example #16

0

Show file

File: import_standard_table.py Project: CartoDB/carto-python

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()

# Set authentification to CARTO
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# get username from base_url
substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL)
if substring:
    username = substring.group(1)

# imports the file to CARTO
dataset_manager = DatasetManager(auth_client)
table = dataset_manager.create(args.url)
logger.info('Name of table: ' + str(table.name))
print('URL of dataset: \
      https://{org}.carto.com/u/{username}/dataset/{data}'). \
      format(org=args.organization,
             username=username,
             data=str(table.name))

Example #17

0

Show file

File: upload.py Project: Ronyonka/madjbox

def upload_dataset(auth_client, LOCAL_FILE_OR_URL):
    dataset_manager = DatasetManager(auth_client)
    dataset = dataset_manager.create(LOCAL_FILE_OR_URL)
    print(
        '-------------------------------file uploaded-------------------------------------'
    )

Example #18

0

Show file

File: table_info.py Project: CartoDB/carto-python

                    '(defaults to env variable CARTO_API_URL)')

parser.add_argument('--api_key', dest='CARTO_API_KEY',
                    default=os.environ['CARTO_API_KEY']  if 'CARTO_API_KEY' in os.environ else '',
                    help='Api key of the account' +
                    ' (defaults to env variable CARTO_API_KEY)')

args = parser.parse_args()


# Authenticate to CARTO account

if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
    auth_client = APIKeyAuthClient(
        args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
    dataset_manager = DatasetManager(auth_client)
else:
    logger.error('You need to provide valid credentials, run with -h parameter for details')
    import sys
    sys.exit(1)

# SQL wrapper

sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY))


# display and count all datasets of account
all_datasets = dataset_manager.all()

# set the arrays to store the values that will be used to display tables
results_col = []

Example #19

0

Show file

File: report.py Project: oss-spanish-geoserver/labs-db-metrics

class Reporter(object):
    def __init__(self, CARTO_USER, CARTO_API_URL, CARTO_ORG, CARTO_API_KEY,
                 USER_QUOTA):
        self.CARTO_USER = CARTO_USER
        self.CARTO_ORG = CARTO_ORG
        self.USER_QUOTA = USER_QUOTA

        ### CARTO clients
        auth_client = APIKeyAuthClient(CARTO_API_URL, CARTO_API_KEY, CARTO_ORG)
        self.sql = SQLClient(auth_client)
        self.vm = VisualizationManager(auth_client)
        self.dm = DatasetManager(auth_client)

        ### logger, variables and CARTO clients
        self.logger = logging.getLogger('carto_report')
        self.logger.addHandler(logging.NullHandler())

    def report(self):
        '''
        Main method to get the full report
        '''
        start = time.time()

        vizs = self.vm.all()
        dsets = self.dm.all()
        user = self.CARTO_USER
        org = self.CARTO_ORG
        quota = self.USER_QUOTA

        #maps
        maps_df = self.getMaps(vizs)
        top_5_maps_date = self.getTop5(maps_df, 'created', 'name')

        #datasets
        dsets_df = self.getDatasets(dsets)
        top_5_dsets_date = self.getTop5(dsets_df, 'created', 'name')
        sync = self.getSync(dsets_df)
        (private, link, public) = self.getPrivacy(dsets_df)
        (points, lines, polys, none_tbls, geo) = self.getGeometry(dsets_df)
        all_tables_df = self.getSizes(dsets_df)
        tables_sizes = all_tables_df.loc[all_tables_df['cartodbfied'] == 'Yes']
        top_5_dsets_size = self.getTop5(all_tables_df, 'size', 'name')

        #lds
        (lds_df) = self.getQuota(user, quota)

        #analysis
        (analysis_df,
         analysis_types_df) = self.getCachedAnalysisNames(all_tables_df)

        #plots
        fig_analysis = self.plotAnalysis(analysis_types_df)
        fig_lds = self.plotQuota(lds_df)

        #date
        today = self.getDate()

        #report
        report = self.generateReport(
            user, org, today, lds_df, maps_df, top_5_maps_date,
            analysis_types_df, analysis_df, dsets_df, tables_sizes,
            top_5_dsets_date, top_5_dsets_size, sync, private, link, public,
            geo, none_tbls, points, lines, polys, fig_analysis, fig_lds)

        end = time.time()
        duration = end - start

        self.logger.info('Time: start at {}, end at {}, duration: {}'.format(
            start, end, duration))

        return report

    ### helper - get date
    def getDate(self):
        '''
        Method to get the exact date of the report.
        '''
        now = dt.datetime.now()
        today = now.strftime("%Y-%m-%d %H:%M")
        return today

    ### get maps data

    def getMaps(self, vizs):
        '''
        Method to get a df with the list of maps with names, urls and date of creation.
        '''

        self.logger.info('Getting all maps data...')

        # helper - get key
        def getKey(obj):
            return obj.updated_at

        maps = [{
            'name': viz.name,
            'created': viz.created_at,
            'url': viz.url
        } for viz in sorted(vizs, key=getKey, reverse=True)]

        maps_df = json_normalize(maps)

        self.logger.info('Retrieved {} maps'.format(len(maps_df)))

        return maps_df

    ### get dsets data

    def getDatasets(self, dsets):
        '''
        Method to get a df with the list of dsets with names, privacy, sync, geometry and date of creation.
        '''

        self.logger.info('Getting all datasets data...')

        tables = [{
            'name': table.name,
            'privacy': table.privacy,
            'created': table.created_at,
            'synchronization': table.synchronization.updated_at,
            'geometry': table.table.geometry_types
        } for table in dsets]

        tables_df = json_normalize(tables)

        self.logger.info('Retrieved {} datasets'.format(len(tables_df)))

        return tables_df

    def getSync(self, tables_df):
        '''
        Method to get the number of sync tables.
        '''

        self.logger.info('Getting privacy and sync information...')

        try:
            tables_df.synchronization = tables_df.synchronization.fillna(
                'None Sync')
            sync = len(dsets) - len(
                tables_df.loc[tables_df['synchronization'] == 'None Sync'])
            self.logger.info('{} sync tables'.format(sync))
        except:
            self.logger.info('Sync tables unable to be retrieved.')
            sync = 0
            self.logger.info('{} tables will be returned.'.format(sync))

        return sync

    ### get datasets privacy settings

    def getPrivacy(self, tables_df):
        '''
        Method to get the number of tables based on their privacy settings (private, link and public).
        '''

        self.logger.info('Getting privacy information...')

        private = len(tables_df.loc[tables_df['privacy'] == 'PRIVATE'])
        link = len(tables_df.loc[tables_df['privacy'] == 'LINK'])
        public = len(tables_df.loc[tables_df['privacy'] == 'PUBLIC'])

        self.logger.info(
            '{} private tables, {} tables shared with link and {} public tables'
            .format(private, link, public))

        return (private, link, public)

    ### get datasets geometry

    def getGeometry(self, tables_df):
        '''
        Method to get the number of tables with and without geometry. It also returns the geometry type (lines, points and polygons).
        '''

        self.logger.info('Getting geometry information...')

        tables_df['geom_type'] = tables_df.geometry.str[0]

        tables_df['geocoded'] = False
        for i in range(len(tables_df)):
            if tables_df.geom_type[i] in ('ST_Point', 'ST_MultiPolygon',
                                          'ST_Polygon', 'ST_MultiLineString',
                                          'ST_LineString'):
                tables_df['geocoded'][i] = True
            else:
                tables_df['geocoded'][i] = False

        none_tbls = len(tables_df.loc[tables_df['geocoded'] == False])
        geo = len(tables_df.loc[tables_df['geocoded'] == True])
        polys = len(tables_df.loc[tables_df['geom_type'].isin(
            ['ST_MultiPolygon', 'Polygon'])])
        lines = len(tables_df.loc[tables_df['geom_type'].isin(
            ['ST_LineString', 'MultiLineString'])])
        points = len(tables_df.loc[tables_df['geom_type'].isin(['ST_Point'])])

        self.logger.info(
            '{} non-geocoded datasets retrieved'.format(none_tbls))
        self.logger.info('{} geocoded datasets'.format(geo))
        self.logger.info('{} point datasets'.format(points))
        self.logger.info('{} polygon datasets'.format(polys))
        self.logger.info('{} lines datasets'.format(lines))

        return (points, lines, polys, none_tbls, geo)

    ### helper - get percentage

    def getPercentage(self, part, df):
        percentage = round(part * 100 / len(df), 2)
        return percentage

    ### helper - get top list

    def getTop5(self, df, col_order, col_index):
        top5 = df.sort_values([col_order], ascending=False).head()
        top5 = top5.set_index(col_index)
        return top5

    ### get quota information

    def getQuota(self, user, quota):
        '''
        Method to get storage quota and LDS (geocoding, routing, isolines) information as df.
        '''

        self.logger.info(
            'Getting storage quota and geocoding, routing and isolines quota information...'
        )

        dsets_size = pd.DataFrame(
            self.sql.send(
                "SELECT SUM(pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))/1000000 as total FROM pg_tables WHERE schemaname = '"
                + user + "'")['rows'])['total'][0]
        self.logger.info('Retrieved {} MB as storage quota'.format(dsets_size))

        lds = pd.DataFrame(
            self.sql.send('SELECT * FROM cdb_service_quota_info()')['rows'])
        self.logger.info('Retrieved {} Location Data Services'.format(
            len(lds)))

        lds = lds[0:3]  #leave DO out
        lds['pc_used'] = round(
            lds['used_quota'] * 100.00 / lds['monthly_quota'], 2)
        lds = lds.rename(
            columns={
                "monthly_quota": "Monthly Quota",
                "provider": "Provider",
                "service": "Service",
                "used_quota": "Used",
                "pc_used": "% Used"
            })

        real_storage = quota * 2
        used_storage = round(dsets_size, 2)
        pc_used = round(used_storage * 100.00 / real_storage, 2)
        storage = [
            real_storage, 'carto', 'storage', 'false', used_storage, pc_used
        ]
        lds.loc[len(lds)] = storage

        lds = lds.set_index('Service')
        lds['Left'] = round(lds['Monthly Quota'] - lds['Used'], 1)
        lds['% Left'] = 100.00 - lds['% Used']
        lds_df = lds[[
            'Monthly Quota', 'Provider', 'Used', '% Used', 'Left', '% Left'
        ]]

        return lds_df

    ### get analysis and tables data

    def getSizes(self, dsets_df):
        '''
        Method to get all tables sizes, know cartodbfied and non cartodbfied tables (analysis).
        '''

        self.logger.info('Getting list of tables...')

        all_tables = self.sql.send(
            "select pg_class.relname as name from pg_class, pg_roles, pg_namespace"
            + " where pg_roles.oid = pg_class.relowner and " +
            "pg_roles.rolname = current_user " +
            "and pg_namespace.oid = pg_class.relnamespace and pg_class.relkind = 'r'"
        )['rows']

        all_tables_df = json_normalize(all_tables)

        self.logger.info('Retrieved {} tables.'.format(len(all_tables_df)))

        dsets_df['cartodbfied'] = 'Yes'
        all_tables_df = all_tables_df.merge(dsets_df, on='name', how='left')
        all_tables_df['cartodbfied'] = all_tables_df['cartodbfied'].fillna(
            'No')
        all_tables_df['size'] = 0

        self.logger.info('Getting table sizes...')

        for index, row in all_tables_df.iterrows():
            try:
                size = self.sql.send("select pg_total_relation_size('" +
                                     row['name'] +
                                     "') as size")['rows'][0].get('size')
            except:
                self.logger.warning('Error at: ' + str(row['name']))

            all_tables_df.set_value(index, 'size', size)

        self.logger.info('Table sizes retrieved with a sum of {} MB'.format(
            all_tables_df['size'].sum()))

        return all_tables_df

    ### get analysis names table

    def getCachedAnalysisNames(self, all_tables_df):
        '''
        Method to transform cached analysis ids to analysis names.
        '''

        self.logger.info('Getting analysis from tables information...')

        analysis_df = all_tables_df.loc[all_tables_df['cartodbfied'] == 'No']

        if len(analysis_df) > 0:

            #get analysis id
            self.logger.info('Replacing analysis id with proper names...')
            analysis_df['id'] = analysis_df['name'].str.split("_",
                                                              n=3,
                                                              expand=True)[1]

            #convert equivalences object to a df
            equivalences = [{
                "type": "aggregate-intersection",
                "id": "b194a8f896"
            }, {
                "type": "bounding-box",
                "id": "5f80bdff9d"
            }, {
                "type": "bounding-circle",
                "id": "b7636131b5"
            }, {
                "type": "buffer",
                "id": "2f13a3dbd7"
            }, {
                "type": "centroid",
                "id": "ae64186757"
            }, {
                "type": "closest",
                "id": "4bd65e58e4"
            }, {
                "type": "concave-hull",
                "id": "259cf96ece"
            }, {
                "type": "contour",
                "id": "779051ec8e"
            }, {
                "type": "convex-hull",
                "id": "05234e7c2a"
            }, {
                "type": "data-observatory-measure",
                "id": "a08f3b6124"
            }, {
                "type": "data-observatory-multiple-measures",
                "id": "cd60938c7b"
            }, {
                "type": "deprecated-sql-function",
                "id": "e85ed857c2"
            }, {
                "type": "filter-by-node-column",
                "id": "83d60eb9fa"
            }, {
                "type": "filter-category",
                "id": "440d2c1487"
            }, {
                "type": "filter-grouped-rank",
                "id": "f15fa0b618"
            }, {
                "type": "filter-range",
                "id": "942b6fec82"
            }, {
                "type": "filter-rank",
                "id": "43155891da"
            }, {
                "type": "georeference-admin-region",
                "id": "a5bdb274e8"
            }, {
                "type": "georeference-city",
                "id": "d5b2dd1672"
            }, {
                "type": "georeference-country",
                "id": "792d8938e3"
            }, {
                "type": "georeference-ip-address",
                "id": "d5b2274cdf"
            }, {
                "type": "georeference-long-lat",
                "id": "0623244fc4"
            }, {
                "type": "georeference-postal-code",
                "id": "1f7c6f9f43"
            }, {
                "type": "georeference-street-address",
                "id": "1ea6dec9f3"
            }, {
                "type": "gravity",
                "id": "93ab69856c"
            }, {
                "type": "intersection",
                "id": "971639c870"
            }, {
                "type": "kmeans",
                "id": "3c835a874c"
            }, {
                "type": "line-sequential",
                "id": "9fd29bd5c0"
            }, {
                "type": "line-source-to-target",
                "id": "9e88a1147e"
            }, {
                "type": "line-to-column",
                "id": "be2ff62ce9"
            }, {
                "type": "line-to-single-point",
                "id": "eca516b80b"
            }, {
                "type": "link-by-line",
                "id": "49ca809a90"
            }, {
                "type": "merge",
                "id": "c38cb847a0"
            }, {
                "type": "moran",
                "id": "91837cbb3c"
            }, {
                "type": "point-in-polygon",
                "id": "2e94d3858c"
            }, {
                "type": "population-in-area",
                "id": "d52251dc01"
            }, {
                "type": "routing-sequential",
                "id": "a627e132c2"
            }, {
                "type": "routing-to-layer-all-to-all",
                "id": "b70cf71482"
            }, {
                "type": "routing-to-single-point",
                "id": "2923729eb9"
            }, {
                "type": "sampling",
                "id": "7530d60ffc"
            }, {
                "type": "source",
                "id": "fd83c76763"
            }, {
                "type": "spatial-markov-trend",
                "id": "9c3b798f46"
            }, {
                "type": "trade-area",
                "id": "112d4fc091"
            }, {
                "type": "weighted-centroid",
                "id": "1d85314d7a"
            }]
            equivalences_df = json_normalize(equivalences)

            #join equivalences to analysis table
            analysis_df = pd.merge(analysis_df,
                                   equivalences_df,
                                   on='id',
                                   how='left')

            #get analysis summuary
            analysis_types = analysis_df['type'].value_counts()
            analysis_types_df = analysis_types.to_frame()
            analysis_types_df = analysis_types_df.rename(
                columns={'type': 'Analysis Count'})

            self.logger.info(
                '{} analysis retrieved, {} different types. '.format(
                    len(analysis_df), analysis_types_df.nunique()))
        else:
            self.logger.warning('No analysis found.')

        return (analysis_df, analysis_types_df)

    ### plot LDS figure

    def plotQuota(self, lds_df):
        '''
        Method to plot a lds and storage bar chart.
        '''

        self.logger.info('Plotting LDS figure...')

        # plot properties
        r = list(range(len(lds_df)))
        barWidth = 0.85
        names = lds_df.index.tolist()

        # create a plot
        fig_lds, ax_lds = plt.subplots()

        # create used quota / red bars
        ax_lds.bar(r,
                   lds_df['% Left'],
                   bottom=lds_df['% Used'],
                   color='#009392',
                   edgecolor='white',
                   width=barWidth,
                   label='% Left')
        # create quota left / red bars
        ax_lds.bar(r,
                   lds_df['% Used'],
                   color='#cf597e',
                   edgecolor='white',
                   width=barWidth,
                   label='% Used')

        # customize ticks and labels
        ax_lds.set_xticks(r)
        ax_lds.set_xticklabels(names)
        ax_lds.set_xlabel("Location Data Service")
        ax_lds.set_ylabel("%")

        # Add a legend
        handles, labels = ax_lds.get_legend_handles_labels()
        ax_lds.legend(handles,
                      labels,
                      loc='upper left',
                      bbox_to_anchor=(0, 1, 1, 0))

        # tight plot
        plt.tight_layout()

        return fig_lds

    ### plot analysis figure

    def plotAnalysis(self, analysis_types_df):
        '''
        Method to plot a analysis count bar chart.
        '''

        self.logger.info('Plotting analysis figure...')

        # plot properties
        analysis_names = analysis_types_df.index.tolist()
        analysis_portions = analysis_types_df['Analysis Count']
        cartocolors = [
            '#7F3C8D', '#11A579', '#3969AC', '#F2B701', '#E73F74', '#80BA5A',
            '#E68310', '#008695', '#CF1C90', '#f97b72', '#4b4b8f', '#A5AA99'
        ]
        names_positions = [i for i, _ in enumerate(analysis_names)]

        # create plot
        fig_analysis, ax_analysis = plt.subplots()

        # plot bars
        ax_analysis.barh(names_positions, analysis_portions, color=cartocolors)

        # customize ticks and labels
        ax_analysis.set_ylabel("Analysis Type")
        ax_analysis.set_xlabel("Analysis Count")
        ax_analysis.set_yticks(names_positions)
        ax_analysis.set_yticklabels(analysis_names)

        # tight plot
        plt.tight_layout()

        return fig_analysis

    ### generate report with an HTML template

    def generateReport(self, user, org, today, lds_df, maps_df,
                       top_5_maps_date, analysis_types_df, analysis_df,
                       dsets_df, tables_sizes, top_5_dsets_date,
                       top_5_dsets_size, sync, private, link, public, geo,
                       none_tbls, points, lines, polys, fig_analysis, fig_lds):
        '''
        Method to generate a HTML report.
        '''

        self.logger.info('Generating HTML template...')

        template = """
            <!DOCTYPE html>
            <html lang="en">
            <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <meta http-equiv="X-UA-Compatible" content="ie=edge">
            <title>CARTO Database Metrics Report Template</title>
            <link rel="stylesheet" href="https://libs.cartocdn.com/airship-style/v1.0.3/airship.css">
            <script src="https://libs.cartocdn.com/airship-components/v1.0.3/airship.js"></script>
            <style>
            .as-sidebar{
                width: 33.33%;
            }
            .as-box{
                border-bottom: 1px solid #F5F5F5;
            }
            </style>
            </head>                   
            <body class="as-app-body as-app">
            <header class="as-toolbar">
                <div class="as-toolbar__item as-title">
                    CARTO Metrics Report 
                </div>
                <div class="as-toolbar__item as-display--block as-p--12 as-subheader as-bg--complementary">
                    {{ user }} from {{org}} at {{today}}
                </div>
            </header>
            <div class="as-content">
                <aside class="as-sidebar as-sidebar--left">
                <div class="as-container">
                    <h1 class="as-box as-title as-font--medium">
                    Maps and Analysis
                    </h1>
                    <div class="as-box">
                        <h2 class="as-title">
                            Maps
                        </h2>
                        <p class="as-body as-font--medium">Number of maps: {{total_maps}}</p>
                        <div class="as-box" id="maps-table">
                            <h3 class="as-subheader">Top 5 Maps by Date</h3>
                            {{top_5_maps_date.to_html()}}
                        </div>
                    </div>

                    <div class="as-box">
                    <h2 class="as-title">
                        Builder Cached Analysis
                    </h2>
                    <ul class="as-list">
                        <li class="as-list__item">Number of cached analyses: {{total_analysis}}</li>
                        <li class="as-list__item">Cached Analyses Size: {{total_size_analysis}} MB</li>
                    </ul>
                    <div class="as-box" id="analysis-table">
                        {{analysis_types_df.to_html()}}
                    </div>
                    <div class="as-box" id="analysis-fig">
                        {{html_fig_analysis}}
                    </div>
                    </div>
                </div>
                </aside>
                <main class="as-main">
                    <h1 class="as-box as-title as-font--medium">
                        Storage Quota & LDS
                    </h1>
                    <div class="as-box">
                        <h2 class="as-title">
                            Storage Quota
                        </h2>
                        <ul class="as-list">
                            <li class="as-list__item as-font--medium">Account Storage: {{real_storage}} MB</li>
                            <li class="as-list__item as-color--support-01">Used Quota: {{used_storage}} MB, {{pc_used}} %</li>
                            <li class="as-list__item as-color--complementary">Quota Left: {{left_storage}} MB, {{pc_left}} %</li>
                        </ul>
                    </div>
                    <div class="as-box">
                        <h2 class="as-title">
                            Location Data Services
                        </h2>
                        <div class="as-box" id="lds-table">
                            {{lds.to_html()}}
                        </div>
                        <div class="as-box" id="lds-fig">
                            {{html_fig_lds}}
                        </div>
                    </div>
                </main>
                <aside class="as-sidebar as-sidebar--right">
                <div class="as-container">
                    <div class="as-box as-title as-font--medium">
                    Datasets
                    </div>
                    <div class="as-box">
                        <h2 class="as-title">
                            Datasets Summary
                        </h2>
                        <ul class="as-list">
                            <li class="as-list__item as-font--medium">Number of tables: {{total_dsets}}</li>
                            <li class="as-list__item">Sync tables: {{sync}}</li>
                            <li class="as-list__item">Tables Size: {{total_size_tbls}} MB</li>
                        </ul>
                    </div>
                    <div class="as-box">
                    <h2 class="as-title">
                        Privacy
                    </h2>
                    <ul class="as-list">
                        <li class="as-list__item as-color--support-01">🔒 Private: {{private}} tables</li>
                        <li class="as-list__item as-color--support-02">🔗 Shared with link: {{link}} tables</li>
                        <li class="as-list__item as-color--support-03">🔓 Public: {{public}} tables</li>
                    </ul>
                    </div>
                    <div class="as-box">
                    <h2 class="as-title">
                        Geometry
                    </h2>
                    <p class="as-body">
                        Number of geocoded tables: {{geo}}
                    </p>
                    <ul class="as-list">
                        <li class="as-list__item">📌 Points: {{points}} tables</li>
                        <li class="as-list__item">〰️ Lines: {{lines}} tables</span></li>
                        <li class="as-list__item">⬛ Polygons: {{polys}} tables</li>
                    </ul>
                    <p class="as-body">
                        Number of non-geocoded tables: {{none_tbls}}
                    </p>
                    </div>
                    <div class="as-box" id="tables-size">
                        <h3 class="as-subheader">Top 5 Datasets by Size</h3>
                        {{top_5_dsets_size.to_html()}}
                    </div>
                    <div class="as-box" id="tables-date">
                        <h3 class="as-subheader">Top 5 Datasets by Date</h3>
                        {{top_5_dsets_date.to_html()}}
                    </div>
                </div>
                </aside>
            </div>
            <script>
                // add airship class to tables 
                const tableElements = document.querySelectorAll('table');
                tableElements.forEach(element => element.classList.add("as-table"));
            </script>
            </body>
            </html>
        """
        rtemplate = Environment(loader=BaseLoader()).from_string(template)

        self.logger.info('Rendering HTML report...')

        report = rtemplate.render({

            # user and date info
            'user':
            user,
            'org':
            org,
            'today':
            today,

            # lds and storage info
            'lds':
            lds_df,
            'real_storage':
            lds_df.iloc[3]['Monthly Quota'],
            'used_storage':
            lds_df.iloc[3]['Used'],
            'pc_used':
            lds_df.iloc[3]['% Used'],
            'left_storage':
            lds_df.iloc[3]['Left'],
            'pc_left':
            round(lds_df.iloc[3]['% Left'], 2),

            # maps info
            'total_maps':
            len(maps_df),
            'total_analysis':
            len(analysis_df),
            'total_size_analysis':
            analysis_df['size'].sum(),
            'analysis_types_df':
            analysis_types_df,
            'top_5_maps_date':
            top_5_maps_date,

            # datasets info
            'sync':
            sync,
            'total_dsets':
            len(dsets_df),
            'total_size_tbls':
            tables_sizes['size'].sum(),
            'top_5_dsets_size':
            top_5_dsets_size[['size']],
            'top_5_dsets_date':
            top_5_dsets_date[['created']],

            # privacy info
            'private':
            private,
            'link':
            link,
            'public':
            public,

            # geometry info
            'geo':
            geo,
            'points':
            points,
            'lines':
            lines,
            'polys':
            polys,
            'none_tbls':
            none_tbls,

            # figures
            'html_fig_analysis':
            fig_to_html(fig_analysis),
            'html_fig_lds':
            fig_to_html(fig_lds)
        })

        return report