help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'], help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Authenticate to CARTO account auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # check all table name of account all_tables = [] tables = sql.send( "select pg_class.relname from pg_class, pg_roles, pg_namespace" + " where pg_roles.oid = pg_class.relowner and " +
# Authenticate to CARTO account if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # Dataset manager dataset_manager = DatasetManager(auth_client) # Get all datasets from account datasets = dataset_manager.all() # loop over all datasets from account for tablename in datasets: query = 'SELECT * FROM {table_name}'.format(table_name=tablename.name) try: result = sql.send(query, format=args.EXPORT_FORMAT) except Exception as e: logger.error(str(e)) break data_folder = Path(args.SAVE_FOLDER) / "{table_name}.{format}".format(table_name=tablename.name,format=args.EXPORT_FORMAT) # write file to files folder try:
df_long.electricity_consumption_billionkwh = df_long.electricity_consumption_billionkwh.astype( 'float64') #save processed dataset to csv processed_data_file = os.path.join(data_dir, dataset_name + '_edit.csv') df_long.to_csv(processed_data_file, index=False) ''' Upload processed data to Carto ''' print('Uploading processed data to Carto.') #set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY) auth_client = APIKeyAuthClient(api_key=os.getenv('CARTO_WRI_RW_KEY'), base_url="https://{user}.carto.com/".format( user=os.getenv('CARTO_WRI_RW_USER'))) #set up dataset manager with authentication dataset_manager = DatasetManager(auth_client) #upload dataset to carto dataset = dataset_manager.create(processed_data_file) print('Carto table created: {}'.format( os.path.basename(processed_data_file).split('.')[0])) #set dataset privacy to 'Public with link' dataset.privacy = 'LINK' dataset.save() print('Privacy set to public with link.') ''' Upload original data and processed data to Amazon S3 storage ''' def upload_to_aws(local_file, bucket, s3_file): s3 = boto3.client('s3',
'(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) table = dataset_manager.create(args.url, args.sync_time) else: logger.error( 'You need to provide valid credentials, run with -h parameter for details' ) import sys sys.exit(1) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # return the id of the sync logger.info('Name of table: ' + str(table.name))
help='Set the name of the organization' + ' account (defaults to env variable CARTO_ORG)') parser.add_argument('--base_url', type=str, dest='CARTO_BASE_URL', default=os.environ['CARTO_API_URL'], help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'], help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) dataset = dataset_manager.get(args.dataset_name) # PRIVATE, PUBLIC, LINK dataset.privacy = args.privacy dataset.save() logger.info("Done!")
# create empty table for dataset on Carto CARTO_SCHEMA = carto_schema(gdf) checkCreateTable(os.path.basename(processed_data_file).split('.')[0], CARTO_SCHEMA) # convert the geometry of the file from shapely to geojson gdf['geometry'] = convert_geometry(gdf['geometry']) # upload the shapefile to the empty carto table cartosql.insertRows(os.path.basename(processed_data_file).split('.')[0], CARTO_SCHEMA.keys(), CARTO_SCHEMA.values(), gdf.values.tolist(), user=CARTO_USER, key=CARTO_KEY) # Change privacy of table on Carto #set up carto authentication using local variables for username (CARTO_WRI_RW_USER) and API key (CARTO_WRI_RW_KEY) auth_client = APIKeyAuthClient(api_key=os.getenv('CARTO_WRI_RW_KEY'), base_url="https://{user}.carto.com/".format(user=os.getenv('CARTO_WRI_RW_USER'))) #set up dataset manager with authentication dataset_manager = DatasetManager(auth_client) #set dataset privacy to 'Public with link' dataset = dataset_manager.get(dataset_name+'_edit') dataset.privacy = 'LINK' dataset.save() print('Privacy set to public with link.') ''' Upload original data and processed data to Amazon S3 storage ''' def upload_to_aws(local_file, bucket, s3_file): s3 = boto3.client('s3', aws_access_key_id=os.getenv('aws_access_key_id'), aws_secret_access_key=os.getenv('aws_secret_access_key')) try: s3.upload_file(local_file, bucket, s3_file) print("Upload Successful")
default=os.environ['CARTO_API_URL'], help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'], help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, "organization") # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # Dataset manager dataset_manager = DatasetManager(auth_client) connection = json.loads(args.connection.replace("\\", "")) logger.info(connection) table = dataset_manager.create(None, None, connection=connection) logger.info('Table imported: {table}'.format(table=table.name))
help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) table = dataset_manager.create(args.url, args.sync_time) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # return the id of the sync logger.info('Name of table: ' + str(table.name)) print('\nURL of dataset is: \ https://{org}.carto.com/u/{username}/dataset/{data}'). \
'weather_norm_site_eui_2018', 'weather_norm_site_eui_2017', 'weather_norm_site_eui_2016', 'weather_norm_site_eui_2015', 'weather_norm_site_eui_2014', 'weather_norm_site_eui_2013', 'weather_norm_site_eui_2012', 'site_eui_change_current_last', 'weather_norm_source_eui_2020', 'weather_norm_source_eui_2019', 'weather_norm_source_eui_2018', 'weather_norm_source_eui_2017', 'weather_norm_source_eui_2016', 'weather_norm_source_eui_2015', 'weather_norm_source_eui_2014', 'weather_norm_source_eui_2013', 'weather_norm_source_eui_2012', 'source_eui_change_current_last', 'total_ghg_emissions_intensity_2020', 'total_ghg_emissions_intensity_2019', 'total_ghg_emissions_intensity_2018', 'total_ghg_emissions_intensity_2017', 'total_ghg_emissions_intensity_2016', 'total_ghg_emissions_intensity_2015', 'total_ghg_emissions_intensity_2014', 'total_ghg_emissions_intensity_2013', 'total_ghg_emissions_intensity_2012', 'tot_ghg_emissions_intensity_change_current_last' ]] benchmarking_disclosure.set_index('pid', inplace=True) #benchmarking_disclosure.to_excel('Output\\tbl_Consolidated_2012_infinite.xlsx') benchmarking_disclosure.to_csv('Output\\tbl_Consolidated_2012_infinite.csv') #Saves data CartoUser = CartoUserName USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser) auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL) dataset_manager = DatasetManager(auth_client) datasets = dataset_manager.get('tbl_consolidated_2012_infinite') datasets.delete() dataset = dataset_manager.create('Output\\tbl_Consolidated_2012_infinite.csv')
args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # Dataset manager dataset_manager = DatasetManager(auth_client) # define path of the files path = os.getcwd() file_folder = glob.glob(path + '/' + args.folder_name) # import files from the path to CARTO table_name = [] for i in file_folder: table = dataset_manager.create(i) logger.info( 'Table imported: {table}'.format(table=table.name)) table_name.append(table.name)
parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # Dataset manager dataset_manager = DatasetManager(auth_client) connection = json.loads(args.connection.replace("\\", "")) logger.info(connection) table = dataset_manager.create(None, None, connection=connection) logger.info( 'Table imported: {table}'.format(table=table.name))
def get_dataset_manager(self): try: self.sql_client except AttributeError: self.initialize() return DatasetManager(self.client)
from carto.auth import APIKeyAuthClient from carto.datasets import DatasetManager from _settings import USERNAME, APIKEY USR_BASE_URL = "https://{user}.carto.com/".format(user=USERNAME) auth_client = APIKeyAuthClient(api_key=APIKEY, base_url=USR_BASE_URL) USR_BASE_URL = 'https://carto.com/user/dms2203' #BASE_URL = "https://{organization}.carto.com/user/{user}/". \ #format(user=USERNAME) # organization=ORGANIZATION USR_BASE_URL = "https://{user}.carto.com/".format(user=USERNAME) auth_client = APIKeyAuthClient(api_key=APIKEY, base_url=USR_BASE_URL, organization='dms2203') # write here the path to a local file or remote URL LOCAL_FILE_OR_URL = "data/processing/new-york_new-york_points.csv" dataset_manager = DatasetManager(auth_client) dataset = dataset_manager.create(LOCAL_FILE_OR_URL)
'property.year_ending_67011' ], inplace=True) years = data_View4['year_ending'].drop_duplicates(keep="first").tolist() for item in years: data_View5 = data_View4.loc[data_View4['year_ending'] == item] data_View5['year_ending'] = data_View5['year_ending'].replace( str(item), '12/31/' + str(item)) data_View5.set_index('pid') print(str(item).replace('.0', '')) data_View5.to_csv('CSV output\\t' + str(item).replace('.0', '') + '.csv') #Saves data CartoUser = CartoUserName USR_BASE_URL = "https://{user}.carto.com/".format(user=CartoUser) auth_client = APIKeyAuthClient(api_key=CartoPassword, base_url=USR_BASE_URL) dataset_manager = DatasetManager(auth_client) for item in years: print(item) try: datasets = dataset_manager.get('t' + str(item).replace('.0', '')) datasets.delete() dataset = dataset_manager.create('CSV output\\t' + str(item).replace('.0', '') + '.csv') print('works') except: pass
] for obj in write_csv(MATCHES_FILE_NAME, fieldnames, matches): keys = list(obj.keys()).copy() for key in keys: if key not in fieldnames: del obj[key] obj['channels'] = ' '.join(map(str, obj['channels'])).strip() fieldnames = ['date', 'opacity', 'team'] for obj in write_csv(MATCHES_POLYGONS_FILE_NAME, fieldnames, matches_polygons): keys = list(obj.keys()).copy() for key in keys: if key not in fieldnames: del obj[key] dataset_manager = DatasetManager(auth_client) # pass any parameter to generate all files if len(sys.argv) > 1: teams = dataset_manager.get('teams') if teams: teams.delete() tvchannels = dataset_manager.get('tvchannels') if tvchannels: tvchannels.delete() stadiums = dataset_manager.get('stadiums') if stadiums: stadiums.delete() dataset_manager.create(global_path(STADIUMS_FILE_NAME)) dataset_manager.create(global_path(TVCHANNELS_FILE_NAME)) dataset_manager.create(global_path(TEAMS_FILE_NAME))
parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # imports the file to CARTO dataset_manager = DatasetManager(auth_client) table = dataset_manager.create(args.url) logger.info('Name of table: ' + str(table.name)) print('URL of dataset: \ https://{org}.carto.com/u/{username}/dataset/{data}'). \ format(org=args.organization, username=username, data=str(table.name))
def upload_dataset(auth_client, LOCAL_FILE_OR_URL): dataset_manager = DatasetManager(auth_client) dataset = dataset_manager.create(LOCAL_FILE_OR_URL) print( '-------------------------------file uploaded-------------------------------------' )
'(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Authenticate to CARTO account if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # display and count all datasets of account all_datasets = dataset_manager.all() # set the arrays to store the values that will be used to display tables results_col = []
class Reporter(object): def __init__(self, CARTO_USER, CARTO_API_URL, CARTO_ORG, CARTO_API_KEY, USER_QUOTA): self.CARTO_USER = CARTO_USER self.CARTO_ORG = CARTO_ORG self.USER_QUOTA = USER_QUOTA ### CARTO clients auth_client = APIKeyAuthClient(CARTO_API_URL, CARTO_API_KEY, CARTO_ORG) self.sql = SQLClient(auth_client) self.vm = VisualizationManager(auth_client) self.dm = DatasetManager(auth_client) ### logger, variables and CARTO clients self.logger = logging.getLogger('carto_report') self.logger.addHandler(logging.NullHandler()) def report(self): ''' Main method to get the full report ''' start = time.time() vizs = self.vm.all() dsets = self.dm.all() user = self.CARTO_USER org = self.CARTO_ORG quota = self.USER_QUOTA #maps maps_df = self.getMaps(vizs) top_5_maps_date = self.getTop5(maps_df, 'created', 'name') #datasets dsets_df = self.getDatasets(dsets) top_5_dsets_date = self.getTop5(dsets_df, 'created', 'name') sync = self.getSync(dsets_df) (private, link, public) = self.getPrivacy(dsets_df) (points, lines, polys, none_tbls, geo) = self.getGeometry(dsets_df) all_tables_df = self.getSizes(dsets_df) tables_sizes = all_tables_df.loc[all_tables_df['cartodbfied'] == 'Yes'] top_5_dsets_size = self.getTop5(all_tables_df, 'size', 'name') #lds (lds_df) = self.getQuota(user, quota) #analysis (analysis_df, analysis_types_df) = self.getCachedAnalysisNames(all_tables_df) #plots fig_analysis = self.plotAnalysis(analysis_types_df) fig_lds = self.plotQuota(lds_df) #date today = self.getDate() #report report = self.generateReport( user, org, today, lds_df, maps_df, top_5_maps_date, analysis_types_df, analysis_df, dsets_df, tables_sizes, top_5_dsets_date, top_5_dsets_size, sync, private, link, public, geo, none_tbls, points, lines, polys, fig_analysis, fig_lds) end = time.time() duration = end - start self.logger.info('Time: start at {}, end at {}, duration: {}'.format( start, end, duration)) return report ### helper - get date def getDate(self): ''' Method to get the exact date of the report. ''' now = dt.datetime.now() today = now.strftime("%Y-%m-%d %H:%M") return today ### get maps data def getMaps(self, vizs): ''' Method to get a df with the list of maps with names, urls and date of creation. ''' self.logger.info('Getting all maps data...') # helper - get key def getKey(obj): return obj.updated_at maps = [{ 'name': viz.name, 'created': viz.created_at, 'url': viz.url } for viz in sorted(vizs, key=getKey, reverse=True)] maps_df = json_normalize(maps) self.logger.info('Retrieved {} maps'.format(len(maps_df))) return maps_df ### get dsets data def getDatasets(self, dsets): ''' Method to get a df with the list of dsets with names, privacy, sync, geometry and date of creation. ''' self.logger.info('Getting all datasets data...') tables = [{ 'name': table.name, 'privacy': table.privacy, 'created': table.created_at, 'synchronization': table.synchronization.updated_at, 'geometry': table.table.geometry_types } for table in dsets] tables_df = json_normalize(tables) self.logger.info('Retrieved {} datasets'.format(len(tables_df))) return tables_df def getSync(self, tables_df): ''' Method to get the number of sync tables. ''' self.logger.info('Getting privacy and sync information...') try: tables_df.synchronization = tables_df.synchronization.fillna( 'None Sync') sync = len(dsets) - len( tables_df.loc[tables_df['synchronization'] == 'None Sync']) self.logger.info('{} sync tables'.format(sync)) except: self.logger.info('Sync tables unable to be retrieved.') sync = 0 self.logger.info('{} tables will be returned.'.format(sync)) return sync ### get datasets privacy settings def getPrivacy(self, tables_df): ''' Method to get the number of tables based on their privacy settings (private, link and public). ''' self.logger.info('Getting privacy information...') private = len(tables_df.loc[tables_df['privacy'] == 'PRIVATE']) link = len(tables_df.loc[tables_df['privacy'] == 'LINK']) public = len(tables_df.loc[tables_df['privacy'] == 'PUBLIC']) self.logger.info( '{} private tables, {} tables shared with link and {} public tables' .format(private, link, public)) return (private, link, public) ### get datasets geometry def getGeometry(self, tables_df): ''' Method to get the number of tables with and without geometry. It also returns the geometry type (lines, points and polygons). ''' self.logger.info('Getting geometry information...') tables_df['geom_type'] = tables_df.geometry.str[0] tables_df['geocoded'] = False for i in range(len(tables_df)): if tables_df.geom_type[i] in ('ST_Point', 'ST_MultiPolygon', 'ST_Polygon', 'ST_MultiLineString', 'ST_LineString'): tables_df['geocoded'][i] = True else: tables_df['geocoded'][i] = False none_tbls = len(tables_df.loc[tables_df['geocoded'] == False]) geo = len(tables_df.loc[tables_df['geocoded'] == True]) polys = len(tables_df.loc[tables_df['geom_type'].isin( ['ST_MultiPolygon', 'Polygon'])]) lines = len(tables_df.loc[tables_df['geom_type'].isin( ['ST_LineString', 'MultiLineString'])]) points = len(tables_df.loc[tables_df['geom_type'].isin(['ST_Point'])]) self.logger.info( '{} non-geocoded datasets retrieved'.format(none_tbls)) self.logger.info('{} geocoded datasets'.format(geo)) self.logger.info('{} point datasets'.format(points)) self.logger.info('{} polygon datasets'.format(polys)) self.logger.info('{} lines datasets'.format(lines)) return (points, lines, polys, none_tbls, geo) ### helper - get percentage def getPercentage(self, part, df): percentage = round(part * 100 / len(df), 2) return percentage ### helper - get top list def getTop5(self, df, col_order, col_index): top5 = df.sort_values([col_order], ascending=False).head() top5 = top5.set_index(col_index) return top5 ### get quota information def getQuota(self, user, quota): ''' Method to get storage quota and LDS (geocoding, routing, isolines) information as df. ''' self.logger.info( 'Getting storage quota and geocoding, routing and isolines quota information...' ) dsets_size = pd.DataFrame( self.sql.send( "SELECT SUM(pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))/1000000 as total FROM pg_tables WHERE schemaname = '" + user + "'")['rows'])['total'][0] self.logger.info('Retrieved {} MB as storage quota'.format(dsets_size)) lds = pd.DataFrame( self.sql.send('SELECT * FROM cdb_service_quota_info()')['rows']) self.logger.info('Retrieved {} Location Data Services'.format( len(lds))) lds = lds[0:3] #leave DO out lds['pc_used'] = round( lds['used_quota'] * 100.00 / lds['monthly_quota'], 2) lds = lds.rename( columns={ "monthly_quota": "Monthly Quota", "provider": "Provider", "service": "Service", "used_quota": "Used", "pc_used": "% Used" }) real_storage = quota * 2 used_storage = round(dsets_size, 2) pc_used = round(used_storage * 100.00 / real_storage, 2) storage = [ real_storage, 'carto', 'storage', 'false', used_storage, pc_used ] lds.loc[len(lds)] = storage lds = lds.set_index('Service') lds['Left'] = round(lds['Monthly Quota'] - lds['Used'], 1) lds['% Left'] = 100.00 - lds['% Used'] lds_df = lds[[ 'Monthly Quota', 'Provider', 'Used', '% Used', 'Left', '% Left' ]] return lds_df ### get analysis and tables data def getSizes(self, dsets_df): ''' Method to get all tables sizes, know cartodbfied and non cartodbfied tables (analysis). ''' self.logger.info('Getting list of tables...') all_tables = self.sql.send( "select pg_class.relname as name from pg_class, pg_roles, pg_namespace" + " where pg_roles.oid = pg_class.relowner and " + "pg_roles.rolname = current_user " + "and pg_namespace.oid = pg_class.relnamespace and pg_class.relkind = 'r'" )['rows'] all_tables_df = json_normalize(all_tables) self.logger.info('Retrieved {} tables.'.format(len(all_tables_df))) dsets_df['cartodbfied'] = 'Yes' all_tables_df = all_tables_df.merge(dsets_df, on='name', how='left') all_tables_df['cartodbfied'] = all_tables_df['cartodbfied'].fillna( 'No') all_tables_df['size'] = 0 self.logger.info('Getting table sizes...') for index, row in all_tables_df.iterrows(): try: size = self.sql.send("select pg_total_relation_size('" + row['name'] + "') as size")['rows'][0].get('size') except: self.logger.warning('Error at: ' + str(row['name'])) all_tables_df.set_value(index, 'size', size) self.logger.info('Table sizes retrieved with a sum of {} MB'.format( all_tables_df['size'].sum())) return all_tables_df ### get analysis names table def getCachedAnalysisNames(self, all_tables_df): ''' Method to transform cached analysis ids to analysis names. ''' self.logger.info('Getting analysis from tables information...') analysis_df = all_tables_df.loc[all_tables_df['cartodbfied'] == 'No'] if len(analysis_df) > 0: #get analysis id self.logger.info('Replacing analysis id with proper names...') analysis_df['id'] = analysis_df['name'].str.split("_", n=3, expand=True)[1] #convert equivalences object to a df equivalences = [{ "type": "aggregate-intersection", "id": "b194a8f896" }, { "type": "bounding-box", "id": "5f80bdff9d" }, { "type": "bounding-circle", "id": "b7636131b5" }, { "type": "buffer", "id": "2f13a3dbd7" }, { "type": "centroid", "id": "ae64186757" }, { "type": "closest", "id": "4bd65e58e4" }, { "type": "concave-hull", "id": "259cf96ece" }, { "type": "contour", "id": "779051ec8e" }, { "type": "convex-hull", "id": "05234e7c2a" }, { "type": "data-observatory-measure", "id": "a08f3b6124" }, { "type": "data-observatory-multiple-measures", "id": "cd60938c7b" }, { "type": "deprecated-sql-function", "id": "e85ed857c2" }, { "type": "filter-by-node-column", "id": "83d60eb9fa" }, { "type": "filter-category", "id": "440d2c1487" }, { "type": "filter-grouped-rank", "id": "f15fa0b618" }, { "type": "filter-range", "id": "942b6fec82" }, { "type": "filter-rank", "id": "43155891da" }, { "type": "georeference-admin-region", "id": "a5bdb274e8" }, { "type": "georeference-city", "id": "d5b2dd1672" }, { "type": "georeference-country", "id": "792d8938e3" }, { "type": "georeference-ip-address", "id": "d5b2274cdf" }, { "type": "georeference-long-lat", "id": "0623244fc4" }, { "type": "georeference-postal-code", "id": "1f7c6f9f43" }, { "type": "georeference-street-address", "id": "1ea6dec9f3" }, { "type": "gravity", "id": "93ab69856c" }, { "type": "intersection", "id": "971639c870" }, { "type": "kmeans", "id": "3c835a874c" }, { "type": "line-sequential", "id": "9fd29bd5c0" }, { "type": "line-source-to-target", "id": "9e88a1147e" }, { "type": "line-to-column", "id": "be2ff62ce9" }, { "type": "line-to-single-point", "id": "eca516b80b" }, { "type": "link-by-line", "id": "49ca809a90" }, { "type": "merge", "id": "c38cb847a0" }, { "type": "moran", "id": "91837cbb3c" }, { "type": "point-in-polygon", "id": "2e94d3858c" }, { "type": "population-in-area", "id": "d52251dc01" }, { "type": "routing-sequential", "id": "a627e132c2" }, { "type": "routing-to-layer-all-to-all", "id": "b70cf71482" }, { "type": "routing-to-single-point", "id": "2923729eb9" }, { "type": "sampling", "id": "7530d60ffc" }, { "type": "source", "id": "fd83c76763" }, { "type": "spatial-markov-trend", "id": "9c3b798f46" }, { "type": "trade-area", "id": "112d4fc091" }, { "type": "weighted-centroid", "id": "1d85314d7a" }] equivalences_df = json_normalize(equivalences) #join equivalences to analysis table analysis_df = pd.merge(analysis_df, equivalences_df, on='id', how='left') #get analysis summuary analysis_types = analysis_df['type'].value_counts() analysis_types_df = analysis_types.to_frame() analysis_types_df = analysis_types_df.rename( columns={'type': 'Analysis Count'}) self.logger.info( '{} analysis retrieved, {} different types. '.format( len(analysis_df), analysis_types_df.nunique())) else: self.logger.warning('No analysis found.') return (analysis_df, analysis_types_df) ### plot LDS figure def plotQuota(self, lds_df): ''' Method to plot a lds and storage bar chart. ''' self.logger.info('Plotting LDS figure...') # plot properties r = list(range(len(lds_df))) barWidth = 0.85 names = lds_df.index.tolist() # create a plot fig_lds, ax_lds = plt.subplots() # create used quota / red bars ax_lds.bar(r, lds_df['% Left'], bottom=lds_df['% Used'], color='#009392', edgecolor='white', width=barWidth, label='% Left') # create quota left / red bars ax_lds.bar(r, lds_df['% Used'], color='#cf597e', edgecolor='white', width=barWidth, label='% Used') # customize ticks and labels ax_lds.set_xticks(r) ax_lds.set_xticklabels(names) ax_lds.set_xlabel("Location Data Service") ax_lds.set_ylabel("%") # Add a legend handles, labels = ax_lds.get_legend_handles_labels() ax_lds.legend(handles, labels, loc='upper left', bbox_to_anchor=(0, 1, 1, 0)) # tight plot plt.tight_layout() return fig_lds ### plot analysis figure def plotAnalysis(self, analysis_types_df): ''' Method to plot a analysis count bar chart. ''' self.logger.info('Plotting analysis figure...') # plot properties analysis_names = analysis_types_df.index.tolist() analysis_portions = analysis_types_df['Analysis Count'] cartocolors = [ '#7F3C8D', '#11A579', '#3969AC', '#F2B701', '#E73F74', '#80BA5A', '#E68310', '#008695', '#CF1C90', '#f97b72', '#4b4b8f', '#A5AA99' ] names_positions = [i for i, _ in enumerate(analysis_names)] # create plot fig_analysis, ax_analysis = plt.subplots() # plot bars ax_analysis.barh(names_positions, analysis_portions, color=cartocolors) # customize ticks and labels ax_analysis.set_ylabel("Analysis Type") ax_analysis.set_xlabel("Analysis Count") ax_analysis.set_yticks(names_positions) ax_analysis.set_yticklabels(analysis_names) # tight plot plt.tight_layout() return fig_analysis ### generate report with an HTML template def generateReport(self, user, org, today, lds_df, maps_df, top_5_maps_date, analysis_types_df, analysis_df, dsets_df, tables_sizes, top_5_dsets_date, top_5_dsets_size, sync, private, link, public, geo, none_tbls, points, lines, polys, fig_analysis, fig_lds): ''' Method to generate a HTML report. ''' self.logger.info('Generating HTML template...') template = """ <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta http-equiv="X-UA-Compatible" content="ie=edge"> <title>CARTO Database Metrics Report Template</title> <link rel="stylesheet" href="https://libs.cartocdn.com/airship-style/v1.0.3/airship.css"> <script src="https://libs.cartocdn.com/airship-components/v1.0.3/airship.js"></script> <style> .as-sidebar{ width: 33.33%; } .as-box{ border-bottom: 1px solid #F5F5F5; } </style> </head> <body class="as-app-body as-app"> <header class="as-toolbar"> <div class="as-toolbar__item as-title"> CARTO Metrics Report </div> <div class="as-toolbar__item as-display--block as-p--12 as-subheader as-bg--complementary"> {{ user }} from {{org}} at {{today}} </div> </header> <div class="as-content"> <aside class="as-sidebar as-sidebar--left"> <div class="as-container"> <h1 class="as-box as-title as-font--medium"> Maps and Analysis </h1> <div class="as-box"> <h2 class="as-title"> Maps </h2> <p class="as-body as-font--medium">Number of maps: {{total_maps}}</p> <div class="as-box" id="maps-table"> <h3 class="as-subheader">Top 5 Maps by Date</h3> {{top_5_maps_date.to_html()}} </div> </div> <div class="as-box"> <h2 class="as-title"> Builder Cached Analysis </h2> <ul class="as-list"> <li class="as-list__item">Number of cached analyses: {{total_analysis}}</li> <li class="as-list__item">Cached Analyses Size: {{total_size_analysis}} MB</li> </ul> <div class="as-box" id="analysis-table"> {{analysis_types_df.to_html()}} </div> <div class="as-box" id="analysis-fig"> {{html_fig_analysis}} </div> </div> </div> </aside> <main class="as-main"> <h1 class="as-box as-title as-font--medium"> Storage Quota & LDS </h1> <div class="as-box"> <h2 class="as-title"> Storage Quota </h2> <ul class="as-list"> <li class="as-list__item as-font--medium">Account Storage: {{real_storage}} MB</li> <li class="as-list__item as-color--support-01">Used Quota: {{used_storage}} MB, {{pc_used}} %</li> <li class="as-list__item as-color--complementary">Quota Left: {{left_storage}} MB, {{pc_left}} %</li> </ul> </div> <div class="as-box"> <h2 class="as-title"> Location Data Services </h2> <div class="as-box" id="lds-table"> {{lds.to_html()}} </div> <div class="as-box" id="lds-fig"> {{html_fig_lds}} </div> </div> </main> <aside class="as-sidebar as-sidebar--right"> <div class="as-container"> <div class="as-box as-title as-font--medium"> Datasets </div> <div class="as-box"> <h2 class="as-title"> Datasets Summary </h2> <ul class="as-list"> <li class="as-list__item as-font--medium">Number of tables: {{total_dsets}}</li> <li class="as-list__item">Sync tables: {{sync}}</li> <li class="as-list__item">Tables Size: {{total_size_tbls}} MB</li> </ul> </div> <div class="as-box"> <h2 class="as-title"> Privacy </h2> <ul class="as-list"> <li class="as-list__item as-color--support-01">🔒 Private: {{private}} tables</li> <li class="as-list__item as-color--support-02">🔗 Shared with link: {{link}} tables</li> <li class="as-list__item as-color--support-03">🔓 Public: {{public}} tables</li> </ul> </div> <div class="as-box"> <h2 class="as-title"> Geometry </h2> <p class="as-body"> Number of geocoded tables: {{geo}} </p> <ul class="as-list"> <li class="as-list__item">📌 Points: {{points}} tables</li> <li class="as-list__item">〰️ Lines: {{lines}} tables</span></li> <li class="as-list__item">⬛ Polygons: {{polys}} tables</li> </ul> <p class="as-body"> Number of non-geocoded tables: {{none_tbls}} </p> </div> <div class="as-box" id="tables-size"> <h3 class="as-subheader">Top 5 Datasets by Size</h3> {{top_5_dsets_size.to_html()}} </div> <div class="as-box" id="tables-date"> <h3 class="as-subheader">Top 5 Datasets by Date</h3> {{top_5_dsets_date.to_html()}} </div> </div> </aside> </div> <script> // add airship class to tables const tableElements = document.querySelectorAll('table'); tableElements.forEach(element => element.classList.add("as-table")); </script> </body> </html> """ rtemplate = Environment(loader=BaseLoader()).from_string(template) self.logger.info('Rendering HTML report...') report = rtemplate.render({ # user and date info 'user': user, 'org': org, 'today': today, # lds and storage info 'lds': lds_df, 'real_storage': lds_df.iloc[3]['Monthly Quota'], 'used_storage': lds_df.iloc[3]['Used'], 'pc_used': lds_df.iloc[3]['% Used'], 'left_storage': lds_df.iloc[3]['Left'], 'pc_left': round(lds_df.iloc[3]['% Left'], 2), # maps info 'total_maps': len(maps_df), 'total_analysis': len(analysis_df), 'total_size_analysis': analysis_df['size'].sum(), 'analysis_types_df': analysis_types_df, 'top_5_maps_date': top_5_maps_date, # datasets info 'sync': sync, 'total_dsets': len(dsets_df), 'total_size_tbls': tables_sizes['size'].sum(), 'top_5_dsets_size': top_5_dsets_size[['size']], 'top_5_dsets_date': top_5_dsets_date[['created']], # privacy info 'private': private, 'link': link, 'public': public, # geometry info 'geo': geo, 'points': points, 'lines': lines, 'polys': polys, 'none_tbls': none_tbls, # figures 'html_fig_analysis': fig_to_html(fig_analysis), 'html_fig_lds': fig_to_html(fig_lds) }) return report