def remove(self, carto_url=None, api_key=None): api_key = api_key or os.environ['CARTODB_API_KEY'] try: while True: resp = requests.get( '{url}/api/v1/tables/{tablename}?api_key={api_key}'.format( url=carto_url, tablename=self.tablename, api_key=api_key)) viz_id = resp.json()['id'] # delete dataset by id DELETE # https://observatory.cartodb.com/api/v1/viz/ed483a0b-7842-4610-9f6c-8591273b8e5c try: requests.delete( '{url}/api/v1/viz/{viz_id}?api_key={api_key}'.format( url=carto_url, viz_id=viz_id, api_key=api_key), timeout=1) except requests.Timeout: pass except ValueError: pass query_cartodb('DROP TABLE IF EXISTS {tablename}'.format( tablename=self.tablename)) assert not self.exists()
def run(self): self._complete = True queries = [ ''' ALTER TABLE acris_real_property_groupby_{year} ADD COLUMN bldgarea_orig Numeric ''', ''' UPDATE acris_real_property_groupby_{year} SET bldgarea_orig = bldgarea ''', ''' UPDATE acris_real_property_groupby_{year} SET bldgarea = CASE WHEN bldgarea_orig > 50000 THEN NULL ELSE bldgarea_orig END ''', ''' ALTER TABLE acris_real_property_groupby_{year} ADD COLUMN lotarea_orig Numeric ''', ''' UPDATE acris_real_property_groupby_{year} SET lotarea_orig = lotarea ''', ''' UPDATE acris_real_property_groupby_{year} SET lotarea = CASE WHEN lotarea_orig > 50000 THEN NULL ELSE lotarea_orig END ''' ] for q in queries: resp = query_cartodb(q.format(year=self.year), carto_url='https://' + self.username + '.carto.com', api_key=self.api_key) if 'already exists' in resp.text: continue else: assert resp.status_code == 200
def run(self): query_cartodb(self.FIRST_AGGREGATE) CartoDBTarget(tablename='obs_meta').remove() import_api({ 'table_name': 'obs_meta', 'sql': self.QUERY.replace('\n', ' '), 'privacy': 'public', }) for dimension, query in self.DIMENSIONS.iteritems(): CartoDBTarget(tablename='obs_meta_{}'.format(dimension)).remove() import_api({ 'table_name': 'obs_meta_{}'.format(dimension), 'sql': query.replace('\n', ' '), 'privacy': 'public', }) self._complete = True
def exists(self): resp = query_cartodb( 'SELECT row_number() over () FROM "{tablename}" LIMIT 1'.format( tablename=self.tablename), api_key=self.api_key, carto_url=self.carto_url) if resp.status_code != 200: return False return resp.json()['total_rows'] > 0
def requires(self): tables = {} session = current_session() existing_table_versions = dict([ (r['tablename'], r['version']) for r in query_cartodb( 'SELECT * FROM obs_table' ).json()['rows'] ]) for table in session.query(OBSTable): if should_upload(table): tables[table.tablename] = table.version for tablename, version in tables.iteritems(): if version > existing_table_versions.get(tablename): force = True else: force = self.force yield TableToCartoViaImportAPI(table=tablename, force=force)
def run(self): resp = query_cartodb('SELECT tablename FROM obs_table') tablenames = set([r['tablename'] for r in resp.json()['rows']]) remote_tables = [] for page in range(self.start, self.end + 1): remote_tables.extend( shell("curl -s '{cartodb_url}/datasets?page={page}' " "| grep -Eo 'obs_[0-f]{{40}}' | uniq".format( cartodb_url=os.environ['CARTODB_URL'], page=page)).strip().split('\n')) for table in remote_tables: LOGGER.info('keeping %s', table) if table not in tablenames: LOGGER.info('removing %s', table) try: CartoDBTarget(table).remove() except Exception as err: LOGGER.warn(err)
def requires(self): existing_table_versions = dict([ (r['tablename'], r['version']) for r in query_cartodb('SELECT * FROM obs_table').json()['rows'] ]) tables = dict([(k, v) for k, v in current_session().execute(''' SELECT tablename, t.version FROM observatory.obs_table t, observatory.obs_column_table ct, observatory.obs_column c WHERE t.id = ct.table_id AND c.id = ct.column_id AND t.tablename NOT IN ('obs_ffebc3eb689edab4faa757f75ca02c65d7db7327') AND c.weight > 0 ''')]) for tablename, version in tables.iteritems(): if version > existing_table_versions.get(tablename): force = True else: force = self.force yield TableToCartoViaImportAPI(table=tablename, force=force)
def _generate_config(self, zoom, lon, lat, boundary=None): layers = [] layers.append(self.BASEMAP) session = current_session() measure = session.query(OBSColumn).get(self.measure) mainquery = ''' SELECT numer_aggregate, numer_type, numer_colname, numer_geomref_colname, numer_tablename, geom_geomref_colname, geom_colname, geom_tablename, denom_colname, denom_tablename, denom_geomref_colname FROM observatory.obs_meta WHERE numer_id = '{measure}' {boundary_clause} ORDER BY geom_weight DESC, numer_timespan DESC, geom_colname DESC; ''' query = mainquery.format( measure=self.measure, boundary_clause="AND geom_id = '{}'".format(boundary) if boundary else '') resp = session.execute(query) results = resp.fetchone() # how should we determine fallback resolution? if results is None: query = mainquery.format(measure=self.measure, boundary_clause="") resp = session.execute(query) results = resp.fetchone() numer_aggregate, numer_type, numer_colname, numer_geomref_colname, \ numer_tablename, geom_geomref_colname, geom_colname, \ geom_tablename, denom_colname, \ denom_tablename, denom_geomref_colname = results if denom_colname: cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ "geom.the_geom_webmercator, " \ "numer.{numer_colname} / NULLIF(denom.{denom_colname}, 0) measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer, " \ " {denom_tablename} as denom " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \ " AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} " statssql = "SELECT " \ 'CDB_HeadsTailsBins(array_agg(distinct( ' \ ' (numer.{numer_colname} / ' \ ' NULLIF(denom.{denom_colname}, 0))::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer, " \ " {denom_tablename} as denom " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \ " AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} " elif numer_aggregate == 'sum': cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ "geom.the_geom_webmercator, " \ "numer.{numer_colname} / " \ " ST_Area(geom.the_geom) * 1000000.0 measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " statssql = "SELECT CDB_HeadsTailsBins(array_agg(distinct( " \ ' (numer.{numer_colname} / ST_Area(geom.the_geom) ' \ ' * 1000000.0)::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " else: cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ " geom.the_geom_webmercator, " \ " numer.{numer_colname} measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer " \ " WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " if numer_type.lower() == 'numeric': statssql = "SELECT " \ 'CDB_HeadsTailsBins(array_agg( ' \ ' distinct(numer.{numer_colname}::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " else: statssql = ''' SELECT array_agg(category) categories FROM ( SELECT row_number() over () catname, {numer_colname} as category, COUNT(*) cnt FROM {numer_tablename} GROUP BY {numer_colname} ORDER BY COUNT(*) DESC LIMIT 10 ) foo''' cartosql = cartosql.format(geom_colname=geom_colname, numer_colname=numer_colname, geom_tablename=geom_tablename, numer_tablename=numer_tablename, geom_geomref_colname=geom_geomref_colname, numer_geomref_colname=numer_geomref_colname, denom_colname=denom_colname, denom_tablename=denom_tablename, denom_geomref_colname=denom_geomref_colname) statssql = statssql.format(geom_colname=geom_colname, numer_colname=numer_colname, geom_tablename=geom_tablename, numer_tablename=numer_tablename, geom_geomref_colname=geom_geomref_colname, numer_geomref_colname=numer_geomref_colname, denom_colname=denom_colname, denom_tablename=denom_tablename, denom_geomref_colname=denom_geomref_colname) resp = query_cartodb(statssql) if resp.status_code != 200: raise Exception("Unable to obtain statssql: {}".format(resp.text)) if measure.unit(): ramp = self.PALETTES.get(measure.unit().id, self.PALETTES['tags.ratio']) else: ramp = self.PALETTES['tags.ratio'] bucket_css = u'' if numer_type.lower() == 'numeric': buckets = resp.json()['rows'][0]['headtails'] for i, bucket in enumerate(buckets): bucket_css = u''' [measure <= {bucket}] {{ polygon-fill: @{i}; }} '''.format(bucket=bucket, i=i + 1) + bucket_css else: buckets = resp.json()['rows'][0]['categories'] for i, bucket in enumerate(buckets): bucket_css = u''' [measure = "{bucket}"] {{ polygon-fill: @{i}; }} '''.format(bucket=bucket, i=i + 1) + bucket_css layers.append({ 'type': 'mapnik', 'options': { 'layer_name': geom_tablename, 'cartocss': '''/** choropleth visualization */ {ramp} #data {{ polygon-opacity: 0.9; polygon-gamma: 0.5; line-color: #000000; line-width: 0.25; line-opacity: 0.2; line-comp-op: hard-light; polygon-fill: @{bucketlen}; [measure=null]{{ polygon-fill: #cacdce; }} {bucket_css} }}'''.format(ramp=ramp, bucketlen=len(buckets) + 1, bucket_css=bucket_css), 'cartocss_version': "2.1.1", 'sql': cartosql, "table_name": "\"\"." } }) #layers.append(self.LABELS) return { 'layers': layers, 'center': [lon, lat], #'bounds': self.bounds, 'zoom': zoom }
def _generate_config(self, zoom, lon, lat, boundary=None): layers = [] layers.append(self.BASEMAP) session = current_session() measure = session.query(OBSColumn).get(self.measure) mainquery = ''' SELECT numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename, geom_geomref_colname, geom_colname, geom_tablename, denom_colname, denom_tablename, denom_geomref_colname FROM observatory.obs_meta WHERE numer_id = '{measure}' {boundary_clause} ORDER BY geom_weight DESC, numer_timespan DESC, geom_colname DESC; ''' query = mainquery.format( measure=self.measure, boundary_clause="AND geom_id = '{}'".format(boundary) if boundary else '') resp = session.execute(query) results = resp.fetchone() # how should we determine fallback resolution? if results is None: query = mainquery.format( measure=self.measure, boundary_clause="") resp = session.execute(query) results = resp.fetchone() numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename, \ geom_geomref_colname, geom_colname, geom_tablename, denom_colname, \ denom_tablename, denom_geomref_colname = results if denom_colname: cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ "geom.the_geom_webmercator, " \ "numer.{numer_colname} / NULLIF(denom.{denom_colname}, 0) measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer, " \ " {denom_tablename} as denom " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \ " AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} " statssql = "SELECT " \ 'CDB_HeadsTailsBins(array_agg(distinct( ' \ ' (numer.{numer_colname} / ' \ ' NULLIF(denom.{denom_colname}, 0))::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer, " \ " {denom_tablename} as denom " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \ " AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} " elif numer_aggregate == 'sum': cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ "geom.the_geom_webmercator, " \ "numer.{numer_colname} / " \ " ST_Area(geom.the_geom_webmercator) * 1000000.0 measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " statssql = "SELECT CDB_HeadsTailsBins(array_agg(distinct( " \ ' (numer.{numer_colname} / ST_Area(geom.the_geom_webmercator) ' \ ' * 1000000.0)::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " else: cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ " geom.the_geom_webmercator, " \ " numer.{numer_colname} measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer " \ " WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " statssql = "SELECT " \ 'CDB_HeadsTailsBins(array_agg( ' \ ' distinct(numer.{numer_colname}::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " cartosql = cartosql.format(geom_colname=geom_colname, numer_colname=numer_colname, geom_tablename=geom_tablename, numer_tablename=numer_tablename, geom_geomref_colname=geom_geomref_colname, numer_geomref_colname=numer_geomref_colname, denom_colname=denom_colname, denom_tablename=denom_tablename, denom_geomref_colname=denom_geomref_colname) statssql = statssql.format(geom_colname=geom_colname, numer_colname=numer_colname, geom_tablename=geom_tablename, numer_tablename=numer_tablename, geom_geomref_colname=geom_geomref_colname, numer_geomref_colname=numer_geomref_colname, denom_colname=denom_colname, denom_tablename=denom_tablename, denom_geomref_colname=denom_geomref_colname) resp = query_cartodb(statssql) assert resp.status_code == 200 headtails = resp.json()['rows'][0]['headtails'] if measure.unit(): ramp = self.PALETTES.get(measure.unit().id, self.PALETTES['tags.ratio']) else: ramp = self.PALETTES['tags.ratio'] bucket_css = u'' for i, bucket in enumerate(headtails): bucket_css = u''' [measure <= {bucket}] {{ polygon-fill: @{i}; }} '''.format(bucket=bucket, i=i+1) + bucket_css layers.append({ 'type': 'mapnik', 'options': { 'layer_name': numer_tablename, 'cartocss': '''/** choropleth visualization */ {ramp} #data {{ polygon-opacity: 0.9; polygon-gamma: 0.5; line-color: #000000; line-width: 0.25; line-opacity: 0.2; line-comp-op: hard-light; polygon-fill: @{bucketlen}; [measure=null]{{ polygon-fill: #cacdce; }} {bucket_css} }}'''.format( ramp=ramp, bucketlen=len(headtails) + 1, bucket_css=bucket_css), 'cartocss_version': "2.1.1", 'sql': cartosql, "table_name": "\"\"." } }) #layers.append(self.LABELS) return { 'layers': layers, 'center': [lon, lat], #'bounds': self.bounds, 'zoom': zoom }