class PDFCatalogToS3(Task): timestamp = DateParameter(default=date.today()) force = BooleanParameter(significant=False) def __init__(self, **kwargs): if kwargs.get('force'): try: shell('aws s3 rm s3://data-observatory/observatory.pdf') except: pass super(PDFCatalogToS3, self).__init__() def run(self): for target in self.output(): shell('aws s3 cp catalog/build/observatory.pdf {output} ' '--acl public-read'.format( output=target.path )) def output(self): return [ S3Target('s3://data-observatory/observatory.pdf'), S3Target('s3://data-observatory/observatory-{timestamp}.pdf'.format( timestamp=self.timestamp )), ]
class SyncAllData(WrapperTask): ''' Sync all data to the linked CARTO account. ''' force = BooleanParameter(default=False, significant=False) def requires(self): existing_table_versions = dict([ (r['tablename'], r['version']) for r in query_cartodb('SELECT * FROM obs_table').json()['rows'] ]) tables = dict([(k, v) for k, v in current_session().execute(''' SELECT tablename, t.version FROM observatory.obs_table t, observatory.obs_column_table ct, observatory.obs_column c WHERE t.id = ct.table_id AND c.id = ct.column_id AND t.tablename NOT IN ('obs_ffebc3eb689edab4faa757f75ca02c65d7db7327') AND c.weight > 0 ''')]) for tablename, version in tables.iteritems(): if version > existing_table_versions.get(tablename): force = True else: force = self.force yield TableToCartoViaImportAPI(table=tablename, force=force)
class DumpS3(Task): ''' Uploads ``observatory`` schema dumped from :class:`~.carto.Dump` to `Amazon S3 <https://aws.amazon.com/s3/>`_, using credentials from ``.env``. Automatically updates :class:`~.meta.OBSDumpVersion`. :param timestamp: Optional date parameter, defaults to today. ''' timestamp = DateParameter(default=date.today()) force = BooleanParameter(default=False, significant=False) def requires(self): return Dump(timestamp=self.timestamp) def run(self): shell('aws s3 cp {input} {output}'.format(input=self.input().path, output=self.output().path)) def output(self): path = self.input().path.replace('tmp/carto/Dump_', 'do-release-') path = path.replace('.dump', '/obs.dump') path = 's3://cartodb-observatory-data/{path}'.format(path=path) LOGGER.info(path) target = S3Target(path) if self.force: shell('aws s3 rm {output}'.format(output=path)) self.force = False return target
class Catalog(Task): force = BooleanParameter(default=False) format = Parameter(default='html') preview = BooleanParameter(default=False) images = BooleanParameter(default=False) def requires(self): return GenerateRST(force=self.force, format=self.format, preview=self.preview, images=self.images) def complete(self): return False def run(self): shell('cd catalog && make {}'.format(self.format))
class SyncMetadata(WrapperTask): no_force = BooleanParameter(default=False, significant=False) def requires(self): for table in ( 'obs_table', 'obs_column', 'obs_column_table', 'obs_tag', 'obs_column_tag', 'obs_dump_version', 'obs_column_to_column', 'obs_meta', 'obs_meta_numer', 'obs_meta_denom', 'obs_meta_geom', 'obs_meta_timespan', 'obs_column_table_tile', ): if table == 'obs_meta': yield TableToCartoViaImportAPI(columns=[ 'numer_id', 'denom_id', 'geom_id', 'numer_name', 'denom_name', 'geom_name', 'numer_description', 'denom_description', 'geom_description', 'numer_aggregate', 'denom_aggregate', 'geom_aggregate', 'numer_type', 'denom_type', 'geom_type', 'numer_colname', 'denom_colname', 'geom_colname', 'numer_geomref_colname', 'denom_geomref_colname', 'geom_geomref_colname', 'numer_tablename', 'denom_tablename', 'geom_tablename', 'numer_timespan', 'denom_timespan', 'numer_weight', 'denom_weight', 'geom_weight', 'geom_timespan', 'numer_tags', 'denom_tags', 'geom_tags', 'timespan_tags', 'section_tags', 'subsection_tags', 'unit_tags', 'numer_extra', 'numer_ct_extra', 'denom_extra', 'denom_ct_extra', 'geom_extra', 'geom_ct_extra' ], table=table, force=not self.no_force) else: yield TableToCartoViaImportAPI(table=table, force=not self.no_force)
class SyncMetadata(WrapperTask): force = BooleanParameter(default=True, significant=False) def requires(self): for table in ( 'obs_table', 'obs_column', 'obs_column_table', 'obs_tag', 'obs_column_tag', 'obs_dump_version', 'obs_column_to_column', 'obs_meta', 'obs_meta_numer', 'obs_meta_denom', 'obs_meta_geom', 'obs_meta_timespan', 'obs_column_table_tile', ): yield TableToCartoViaImportAPI(table=table, force=True)
class SyncData(WrapperTask): ''' Upload a single OBS table to cartodb by fuzzy ID ''' force = BooleanParameter(default=True, significant=False) id = Parameter(default=None) exact_id = Parameter(default=None) tablename = Parameter(default=None) def requires(self): session = current_session() if self.exact_id: table = session.query(OBSTable).get(self.exact_id) elif self.tablename: table = session.query(OBSTable).filter( OBSTable.tablename == self.tablename).one() elif self.id: table = session.query(OBSTable).filter( OBSTable.id.ilike('%' + self.id + '%')).one() else: raise Exception('Need id or exact_id for SyncData') return TableToCarto(table=table.tablename, force=self.force)
class OBSMeta(Task): force = BooleanParameter(default=True) FIRST_AGGREGATE = ''' CREATE OR REPLACE FUNCTION public.first_agg ( anyelement, anyelement ) RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$ SELECT $1; $$; DROP AGGREGATE IF EXISTS public.FIRST (anyelement); CREATE AGGREGATE public.FIRST ( sfunc = public.first_agg, basetype = anyelement, stype = anyelement ); ''' QUERY = ''' WITH denoms as ( SELECT numer_c.id numer_id, denom_c.id denom_id, denom_t.id denom_tid, geomref_c.id geomref_id, FIRST(denom_c.name) denom_name, FIRST(denom_c.description) denom_description, FIRST(denom_c.aggregate) denom_aggregate, FIRST(denom_c.type) denom_type, FIRST(denom_data_ct.colname) denom_colname, FIRST(denom_geomref_ct.colname) denom_geomref_colname, FIRST(denom_t.tablename) denom_tablename, FIRST(denom_t.timespan) denom_timespan, FIRST(denom_c.weight) denom_weight, JSONB_OBJECT_AGG( denom_tag.type || '/' || denom_tag.id, denom_tag.name ) FILTER (WHERE denom_tag.type IS NOT NULL) denom_tags, FIRST(denom_c.extra)::JSONB denom_extra, FIRST(denom_data_ct.extra)::JSONB denom_ct_extra FROM observatory.obs_column numer_c , observatory.obs_column_to_column denom_c2c , observatory.obs_column denom_c , observatory.obs_column_table denom_data_ct , observatory.obs_table denom_t , observatory.obs_column_tag denom_ctag , observatory.obs_tag denom_tag , observatory.obs_column_table denom_geomref_ct , observatory.obs_column geomref_c , observatory.obs_column_to_column geomref_c2c WHERE denom_c.weight > 0 AND denom_c2c.source_id = numer_c.id AND denom_c2c.target_id = denom_c.id AND denom_data_ct.column_id = denom_c.id AND denom_data_ct.table_id = denom_t.id AND denom_c.id = denom_ctag.column_id AND denom_ctag.tag_id = denom_tag.id AND denom_c2c.reltype = 'denominator' AND denom_geomref_ct.table_id = denom_t.id AND denom_geomref_ct.column_id = geomref_c.id AND geomref_c2c.reltype = 'geom_ref' AND geomref_c2c.source_id = geomref_c.id GROUP BY numer_c.id, denom_c.id, denom_t.id, geomref_c.id ), leftjoined_denoms AS ( SELECT numer_c.id all_numer_id, denoms.* FROM observatory.obs_column numer_c LEFT JOIN denoms ON numer_c.id = denoms.numer_id ) SELECT numer_c.id numer_id, denom_id, geom_c.id geom_id, FIRST(numer_c.name) numer_name, FIRST(denom_name) denom_name, FIRST(geom_c.name) geom_name, FIRST(numer_c.description) numer_description, FIRST(denom_description) denom_description, FIRST(geom_c.description) geom_description, FIRST(numer_c.aggregate) numer_aggregate, FIRST(denom_aggregate) denom_aggregate, FIRST(geom_c.aggregate) geom_aggregate, FIRST(numer_c.type) numer_type, FIRST(denom_type) denom_type, FIRST(geom_c.type) geom_type, FIRST(numer_data_ct.colname) numer_colname, FIRST(denom_colname) denom_colname, FIRST(geom_geom_ct.colname) geom_colname, FIRST(numer_geomref_ct.colname) numer_geomref_colname, FIRST(denom_geomref_colname) denom_geomref_colname, FIRST(geom_geomref_ct.colname) geom_geomref_colname, FIRST(numer_t.tablename) numer_tablename, FIRST(denom_tablename) denom_tablename, FIRST(geom_t.tablename) geom_tablename, FIRST(numer_t.timespan) numer_timespan, FIRST(denom_timespan) denom_timespan, FIRST(numer_c.weight) numer_weight, FIRST(denom_weight) denom_weight, FIRST(geom_c.weight) geom_weight, FIRST(geom_t.timespan) geom_timespan , FIRST(geom_t.the_geom)::geometry AS the_geom , JSONB_OBJECT_AGG( numer_tag.type || '/' || numer_tag.id, numer_tag.name ) numer_tags, FIRST(denom_tags) denom_tags, JSONB_OBJECT_AGG( geom_tag.type || '/' || geom_tag.id, geom_tag.name ) FILTER (WHERE geom_tag.type IS NOT NULL) geom_tags, NULL::JSONB timespan_tags, ARRAY_AGG(DISTINCT numer_tag.id) FILTER (WHERE numer_tag.type = 'section') section_tags, ARRAY_AGG(DISTINCT numer_tag.id) FILTER (WHERE numer_tag.type = 'subsection') subsection_tags, ARRAY_AGG(DISTINCT numer_tag.id) FILTER (WHERE numer_tag.type = 'unit') unit_tags, FIRST(numer_c.extra)::JSONB numer_extra, FIRST(numer_data_ct.extra)::JSONB numer_ct_extra, FIRST(denom_extra) denom_extra, FIRST(denom_ct_extra) denom_ct_extra, FIRST(geom_c.extra)::JSONB geom_extra, FIRST(geom_geom_ct.extra)::JSONB geom_ct_extra FROM observatory.obs_column_table numer_data_ct, observatory.obs_table numer_t, observatory.obs_column_table numer_geomref_ct, observatory.obs_column geomref_c, observatory.obs_column_to_column geomref_c2c, observatory.obs_column_table geom_geom_ct, observatory.obs_column_table geom_geomref_ct, observatory.obs_table geom_t, observatory.obs_column_tag numer_ctag, observatory.obs_tag numer_tag, observatory.obs_column numer_c, leftjoined_denoms, observatory.obs_column geom_c LEFT JOIN ( observatory.obs_column_tag geom_ctag JOIN observatory.obs_tag geom_tag ON geom_tag.id = geom_ctag.tag_id ) ON geom_c.id = geom_ctag.column_id WHERE numer_c.weight > 0 AND numer_c.id = numer_data_ct.column_id AND numer_data_ct.table_id = numer_t.id AND numer_t.id = numer_geomref_ct.table_id AND numer_geomref_ct.column_id = geomref_c.id AND geomref_c2c.reltype = 'geom_ref' AND geomref_c.id = geomref_c2c.source_id AND geom_c.id = geomref_c2c.target_id AND geom_geomref_ct.column_id = geomref_c.id AND geom_geomref_ct.table_id = geom_t.id AND geom_geom_ct.column_id = geom_c.id AND geom_geom_ct.table_id = geom_t.id AND geom_c.type ILIKE 'geometry' AND numer_c.type NOT ILIKE 'geometry' AND numer_c.id != geomref_c.id AND numer_ctag.column_id = numer_c.id AND numer_ctag.tag_id = numer_tag.id AND numer_c.id = leftjoined_denoms.all_numer_id AND (leftjoined_denoms.numer_id IS NULL OR ( numer_t.timespan = leftjoined_denoms.denom_timespan AND geomref_c.id = leftjoined_denoms.geomref_id )) GROUP BY numer_c.id, denom_id, geom_c.id, numer_t.id, denom_tid, geom_t.id ''' DIMENSIONS = { 'numer': ''' SELECT numer_id::TEXT, FIRST(numer_name)::TEXT numer_name, FIRST(numer_description)::TEXT numer_description, FIRST(numer_tags)::JSONB numer_tags, FIRST(numer_weight)::NUMERIC numer_weight, FIRST(numer_extra)::JSONB numer_extra, FIRST(numer_type)::TEXT numer_type, FIRST(numer_aggregate)::TEXT numer_aggregate, ARRAY_AGG(DISTINCT denom_id)::TEXT[] denoms, ARRAY_AGG(DISTINCT geom_id)::TEXT[] geoms, ARRAY_AGG(DISTINCT numer_timespan)::TEXT[] timespans, ST_Union(DISTINCT ST_SetSRID(the_geom, 4326)) the_geom FROM observatory.obs_meta GROUP BY numer_id ''', 'denom': ''' SELECT denom_id::TEXT, FIRST(denom_name)::TEXT denom_name, FIRST(denom_description)::TEXT denom_description, FIRST(denom_tags)::JSONB denom_tags, FIRST(denom_weight)::NUMERIC denom_weight, 'denominator'::TEXT reltype, FIRST(denom_extra)::JSONB denom_extra, FIRST(denom_type)::TEXT denom_type, FIRST(denom_aggregate)::TEXT denom_aggregate, ARRAY_AGG(DISTINCT numer_id)::TEXT[] numers, ARRAY_AGG(DISTINCT geom_id)::TEXT[] geoms, ARRAY_AGG(DISTINCT denom_timespan)::TEXT[] timespans, ST_Union(DISTINCT ST_SetSRID(the_geom, 4326)) the_geom FROM observatory.obs_meta GROUP BY denom_id ''', 'geom': ''' SELECT geom_id::TEXT, FIRST(geom_name)::TEXT geom_name, FIRST(geom_description)::TEXT geom_description, FIRST(geom_tags)::JSONB geom_tags, FIRST(geom_weight)::NUMERIC geom_weight, FIRST(geom_extra)::JSONB geom_extra, FIRST(geom_type)::TEXT geom_type, FIRST(geom_aggregate)::TEXT geom_aggregate, ST_SetSRID(FIRST(the_geom), 4326)::GEOMETRY(GEOMETRY, 4326) the_geom, ARRAY_AGG(DISTINCT numer_id)::TEXT[] numers, ARRAY_AGG(DISTINCT denom_id)::TEXT[] denoms, ARRAY_AGG(DISTINCT geom_timespan)::TEXT[] timespans FROM observatory.obs_meta GROUP BY geom_id ''', 'timespan': ''' SELECT numer_timespan::TEXT timespan_id, numer_timespan::TEXT timespan_name, NULL::TEXT timespan_description, FIRST(timespan_tags)::JSONB timespan_tags, NULL::NUMERIC timespan_weight, NULL::JSONB timespan_extra, NULL::TEXT timespan_type, NULL::TEXT timespan_aggregate, ARRAY_AGG(DISTINCT numer_id)::TEXT[] numers, ARRAY_AGG(DISTINCT denom_id)::TEXT[] denoms, ARRAY_AGG(DISTINCT geom_id)::TEXT[] geoms, ST_Union(DISTINCT ST_SetSRID(the_geom, 4326)) the_geom FROM observatory.obs_meta GROUP BY numer_timespan ''' }
class ImagesForMeasure(Task): ''' Generate a set of static images for a measure ''' MAP_URL = '{cartodb_url}/api/v1/map'.format( cartodb_url=os.environ['CARTODB_URL']) BASEMAP = { "type": "http", "options": { "urlTemplate": "http://{s}.basemaps.cartocdn.com/light_nolabels/{z}/{x}/{y}.png", "subdomains": "abcd", } } LABELS = { "type": "http", "options": { "urlTemplate": "http://{s}.basemaps.cartocdn.com/light_only_labels/{z}/{x}/{y}.png", "subdomains": "abcd", } } CENTER_ZOOM_BOUNDS = { 'es': [ ( (40.4139017, -3.7350414), 6, None, ), ( (40.4139017, -3.7350414), 8, None, ), ( (40.4139017, -3.7350414), 11, None, ), ( (40.4139017, -3.7050414), 13, None, ), ], 'mx': [ ( (22.979, -101.777), 4, 'mx.inegi.entidad', ), ( (19.316, -99.152), 7, 'mx.inegi.municipio', ), ( (19.441989391028706, -99.14474487304688), 11, 'mx.inegi.ageb', ), ( (19.441989391028706, -99.14474487304688), 13, 'mx.inegi.manzana', ), ], 'uk': [ ( (52.51622086393074, -1.197509765625), 5, None, ), # All England ( (51.50190410761811, -0.120849609375), 9, None, ), # London ( (52.47274306920925, -3.982543945312), 7, None, ), # Wales ( (53.491313790532956, -2.9706787109375), 9, None, ), # Manchester ], 'us': [ ( (37.996162679728116, -97.6904296875), 3, 'us.census.tiger.state_clipped', ), ( (38.16911413556086, -114.884033203125), 5, 'us.census.tiger.county_clipped', ), ( (37.75225820732333, -122.11584777832031), 9, 'us.census.tiger.census_tract_clipped', ), ( (37.75225820732333, -122.44584777832031), 12, 'us.census.tiger.block_group_clipped', ), ], } PALETTES = { 'tags.people': ''' @5:#6c2167; @4:#a24186; @3:#ca699d; @2:#e498b4; @1:#f3cbd3;''', 'tags.money': ''' @5:#1d4f60; @4:#2d7974; @3:#4da284; @2:#80c799; @1:#c4e6c3;''', 'tags.households': ''' @5:#63589f; @4:#9178c4; @3:#b998dd; @2:#dbbaed; @1:#f3e0f7;''', 'tags.housing': ''' @5:#2a5674; @4:#45829b; @3:#68abb8; @2:#96d0d1; @1:#d1eeea;''', 'tags.ratio': ''' @5:#eb4a40; @4:#f17854; @3:#f59e72; @2:#f9c098; @1:#fde0c5;''', 'tags.segmentation': ''' @1:#7F3C8D; @2:#11A579; @3:#3969AC; @4:#F2B701; @5:#E73F74; @6:#80BA5A; @7:#E68310; @8:#008695; @9:#CF1C90; @10:#f97b72; @11:#A5AA99;''', } measure = Parameter() force = BooleanParameter(default=False) def __init__(self, *args, **kwargs): if kwargs.get('force'): target_path = self.output(measure=kwargs['measure']).path try: os.unlink(target_path) except OSError: pass super(ImagesForMeasure, self).__init__(*args, **kwargs) def _generate_config(self, zoom, lon, lat, boundary=None): layers = [] layers.append(self.BASEMAP) session = current_session() measure = session.query(OBSColumn).get(self.measure) mainquery = ''' SELECT numer_aggregate, numer_type, numer_colname, numer_geomref_colname, numer_tablename, geom_geomref_colname, geom_colname, geom_tablename, denom_colname, denom_tablename, denom_geomref_colname FROM observatory.obs_meta WHERE numer_id = '{measure}' {boundary_clause} ORDER BY geom_weight DESC, numer_timespan DESC, geom_colname DESC; ''' query = mainquery.format( measure=self.measure, boundary_clause="AND geom_id = '{}'".format(boundary) if boundary else '') resp = session.execute(query) results = resp.fetchone() # how should we determine fallback resolution? if results is None: query = mainquery.format(measure=self.measure, boundary_clause="") resp = session.execute(query) results = resp.fetchone() numer_aggregate, numer_type, numer_colname, numer_geomref_colname, \ numer_tablename, geom_geomref_colname, geom_colname, \ geom_tablename, denom_colname, \ denom_tablename, denom_geomref_colname = results if denom_colname: cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ "geom.the_geom_webmercator, " \ "numer.{numer_colname} / NULLIF(denom.{denom_colname}, 0) measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer, " \ " {denom_tablename} as denom " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \ " AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} " statssql = "SELECT " \ 'CDB_HeadsTailsBins(array_agg(distinct( ' \ ' (numer.{numer_colname} / ' \ ' NULLIF(denom.{denom_colname}, 0))::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer, " \ " {denom_tablename} as denom " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \ " AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} " elif numer_aggregate == 'sum': cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ "geom.the_geom_webmercator, " \ "numer.{numer_colname} / " \ " ST_Area(geom.the_geom) * 1000000.0 measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " statssql = "SELECT CDB_HeadsTailsBins(array_agg(distinct( " \ ' (numer.{numer_colname} / ST_Area(geom.the_geom) ' \ ' * 1000000.0)::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " else: cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \ " geom.the_geom_webmercator, " \ " numer.{numer_colname} measure " \ "FROM {geom_tablename} as geom, {numer_tablename} as numer " \ " WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " if numer_type.lower() == 'numeric': statssql = "SELECT " \ 'CDB_HeadsTailsBins(array_agg( ' \ ' distinct(numer.{numer_colname}::NUMERIC)), 4) as "headtails" ' \ "FROM {geom_tablename} as geom, " \ " {numer_tablename} as numer " \ "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " else: statssql = ''' SELECT array_agg(category) categories FROM ( SELECT row_number() over () catname, {numer_colname} as category, COUNT(*) cnt FROM {numer_tablename} GROUP BY {numer_colname} ORDER BY COUNT(*) DESC LIMIT 10 ) foo''' cartosql = cartosql.format(geom_colname=geom_colname, numer_colname=numer_colname, geom_tablename=geom_tablename, numer_tablename=numer_tablename, geom_geomref_colname=geom_geomref_colname, numer_geomref_colname=numer_geomref_colname, denom_colname=denom_colname, denom_tablename=denom_tablename, denom_geomref_colname=denom_geomref_colname) statssql = statssql.format(geom_colname=geom_colname, numer_colname=numer_colname, geom_tablename=geom_tablename, numer_tablename=numer_tablename, geom_geomref_colname=geom_geomref_colname, numer_geomref_colname=numer_geomref_colname, denom_colname=denom_colname, denom_tablename=denom_tablename, denom_geomref_colname=denom_geomref_colname) resp = query_cartodb(statssql) if resp.status_code != 200: raise Exception("Unable to obtain statssql: {}".format(resp.text)) if measure.unit(): ramp = self.PALETTES.get(measure.unit().id, self.PALETTES['tags.ratio']) else: ramp = self.PALETTES['tags.ratio'] bucket_css = u'' if numer_type.lower() == 'numeric': buckets = resp.json()['rows'][0]['headtails'] for i, bucket in enumerate(buckets): bucket_css = u''' [measure <= {bucket}] {{ polygon-fill: @{i}; }} '''.format(bucket=bucket, i=i + 1) + bucket_css else: buckets = resp.json()['rows'][0]['categories'] for i, bucket in enumerate(buckets): bucket_css = u''' [measure = "{bucket}"] {{ polygon-fill: @{i}; }} '''.format(bucket=bucket, i=i + 1) + bucket_css layers.append({ 'type': 'mapnik', 'options': { 'layer_name': geom_tablename, 'cartocss': '''/** choropleth visualization */ {ramp} #data {{ polygon-opacity: 0.9; polygon-gamma: 0.5; line-color: #000000; line-width: 0.25; line-opacity: 0.2; line-comp-op: hard-light; polygon-fill: @{bucketlen}; [measure=null]{{ polygon-fill: #cacdce; }} {bucket_css} }}'''.format(ramp=ramp, bucketlen=len(buckets) + 1, bucket_css=bucket_css), 'cartocss_version': "2.1.1", 'sql': cartosql, "table_name": "\"\"." } }) #layers.append(self.LABELS) return { 'layers': layers, 'center': [lon, lat], #'bounds': self.bounds, 'zoom': zoom } def get_named_map(self, map_config): config = {"version": "1.3.0", "layers": map_config} resp = requests.get(self.MAP_URL, headers={ 'content-type': 'application/json' }, params={ 'config': json.dumps(config) }).json() if 'layergroupid' not in resp: raise Exception('Named map returned no layergroupid: {}'.format( pprint(resp))) return resp def run(self): self.output().makedirs() image_urls = [] country = self.measure.split('.')[0] for center, zoom, boundary in self.CENTER_ZOOM_BOUNDS[country]: lon, lat = center if country == 'uk': image_size = ( 300, 700, ) else: image_size = ( 500, 500, ) config = self._generate_config(zoom, lon, lat, boundary) named_map = self.get_named_map(config['layers']) image_urls.append('{cartodb_url}/api/v1/map/static/center/' \ '{layergroupid}/{zoom}/{center_lon}/{center_lat}/{x}/{y}.png'.format( cartodb_url=os.environ['CARTODB_URL'], layergroupid=named_map['layergroupid'], zoom=zoom, center_lon=lon, center_lat=lat, x=image_size[0], y=image_size[1], )) url1 = image_urls.pop(0) LOGGER.info(url1) file1 = StringIO(requests.get(url1, stream=True).content) image1 = ImageOps.expand(Image.open(file1), border=10, fill='white') for url2 in image_urls: LOGGER.info(url2) file2 = StringIO(requests.get(url2, stream=True).content) image2 = ImageOps.expand(Image.open(file2), border=10, fill='white') (width1, height1) = image1.size (width2, height2) = image2.size result_width = width1 + width2 result_height = max(height1, height2) result = Image.new('RGB', (result_width, result_height)) result.paste(im=image1, box=(0, 0)) result.paste(im=image2, box=(width1, 0)) image1 = result image1.save(self.output().path) def complete(self): ''' If we support this country, ''' country = self.measure.split('.')[0] if country in self.CENTER_ZOOM_BOUNDS: return super(ImagesForMeasure, self).complete() else: LOGGER.warn('No info to create images for %s', self.measure) return True def output(self, measure=None): if measure is None: measure = self.measure return LocalTarget(os.path.join('catalog/img', measure + '.png'))
class GenerateRST(Task): force = BooleanParameter(default=False) format = Parameter() preview = BooleanParameter(default=False) images = BooleanParameter(default=False) def __init__(self, *args, **kwargs): super(GenerateRST, self).__init__(*args, **kwargs) if self.force: shell('rm -rf catalog/source/*/*') shell('cp -R catalog/img catalog/source/') shell('mkdir -p catalog/img_thumb') shell('cp -R catalog/img_thumb catalog/source/') def requires(self): session = current_session() requirements = {} for section_subsection, _ in self.output().iteritems(): section_id, subsection_id = section_subsection subsection = session.query(OBSTag).get(subsection_id) if self.images: if '.. cartofigure:: ' in subsection.description: viz_id = re.search(r'\.\. cartofigure:: (\S+)', subsection.description).groups()[0] if self.format == 'pdf': img = GenerateThumb(viz=viz_id) else: img = GenerateStaticImage(viz=viz_id) requirements[viz_id] = img for column in subsection.columns: if column.type.lower() == 'numeric' and column.weight > 0 and not column.id.startswith('uk'): if self.format == 'pdf': img = GenerateThumb(measure=column.id, force=False) else: img = ImagesForMeasure(measure=column.id, force=False) requirements[column.id] = img return requirements def output(self): targets = {} session = current_session() i = 0 for section in session.query(OBSTag).filter(OBSTag.type == 'section'): for subsection in session.query(OBSTag).filter(OBSTag.type == 'subsection'): i += 1 if i > 1 and self.preview: break targets[(section.id, subsection.id)] = LocalTarget( 'catalog/source/{section}/{subsection}.rst'.format( section=strip_tag_id(section.id), subsection=strip_tag_id(subsection.id))) return targets def template_globals(self): image_path = '../img_thumb' if self.format == 'pdf' else '../img' return { 'IMAGE_PATH': image_path } def run(self): session = current_session() for section_subsection, target in self.output().iteritems(): section_id, subsection_id = section_subsection section = session.query(OBSTag).get(section_id) subsection = session.query(OBSTag).get(subsection_id) target.makedirs() fhandle = target.open('w') if '.. cartofigure:: ' in subsection.description: viz_id = re.search(r'\.\. cartofigure:: (\S+)', subsection.description).groups()[0] viz_path = os.path.join('../', *self.input()[viz_id].path.split(os.path.sep)[2:]) subsection.description = re.sub(r'\.\. cartofigure:: (\S+)', '.. figure:: {}'.format(viz_path), subsection.description) columns = [] for col in subsection.columns: if section not in col.tags: continue if col.weight < 1: continue if not col.tables: continue # tags with denominators will appear beneath that denominator if not col.has_denominators(): columns.append(col) # unless the denominator is not in this subsection else: add_to_columns = True for denominator in col.denominators(): if subsection in denominator.tags: add_to_columns = False break if add_to_columns: columns.append(col) columns.sort(lambda x, y: cmp(x.name, y.name)) with open('catalog/source/{}.rst'.format(strip_tag_id(section.id)), 'w') as section_fhandle: section_fhandle.write(SECTION_TEMPLATE.render( section=section, **self.template_globals())) if columns: fhandle.write(SUBSECTION_TEMPLATE.render( subsection=subsection, columns=columns, format=self.format, **self.template_globals() ).encode('utf8')) else: fhandle.write('') fhandle.close()
class GenerateRST(Task): force = BooleanParameter(default=False) format = Parameter() preview = BooleanParameter(default=False) images = BooleanParameter(default=False) def __init__(self, *args, **kwargs): super(GenerateRST, self).__init__(*args, **kwargs) if self.force: shell('rm -rf catalog/source/*/*') shell('cp -R catalog/img catalog/source/') shell('mkdir -p catalog/img_thumb') shell('cp -R catalog/img_thumb catalog/source/') def requires(self): session = current_session() requirements = {} for section_subsection, _ in self.output().iteritems(): section_id, subsection_id = section_subsection #subsection = session.query(OBSTag).get(subsection_id) resp = session.execute(''' SELECT DISTINCT numer_id FROM observatory.obs_meta WHERE numer_tags ? 'section/{section_id}' AND numer_tags ? 'subsection/{subsection_id}' ORDER BY numer_id '''.format(section_id=section_id, subsection_id=subsection_id)) if self.images: for row in resp: column_id = row[0] if column_id.startswith('uk'): if self.format == 'pdf': img = GenerateThumb(measure=column_id, force=False) else: img = ImagesForMeasure(measure=column_id, force=False) requirements[column_id] = img return requirements def output(self): targets = {} session = current_session() i = 0 for section in session.query(OBSTag).filter(OBSTag.type == 'section'): targets[(section.id, 'tags.boundary')] = LocalTarget( 'catalog/source/{section}/boundary.rst'.format( section=strip_tag_id(section.id))) for subsection in session.query(OBSTag).filter( OBSTag.type == 'subsection'): i += 1 if i > 10 and self.preview: break targets[(section.id, subsection.id)] = LocalTarget( 'catalog/source/{section}/{subsection}.rst'.format( section=strip_tag_id(section.id), subsection=strip_tag_id(subsection.id))) targets[('licenses', None)] = LocalTarget('catalog/source/licenses.rst') targets[('sources', None)] = LocalTarget('catalog/source/sources.rst') return targets def template_globals(self): image_path = '../img_thumb' if self.format == 'pdf' else '../img' return {'IMAGE_PATH': image_path} def build_licenses(self, target): session = current_session() fhandle = target.open('w') fhandle.write( LICENSES_TEMPLATE.render(licenses=session.query(OBSTag).filter( OBSTag.type == 'license').order_by(OBSTag.name), **self.template_globals()).encode('utf8')) fhandle.close() def build_sources(self, target): session = current_session() fhandle = target.open('w') fhandle.write( SOURCES_TEMPLATE.render(sources=session.query(OBSTag).filter( OBSTag.type == 'source').order_by(OBSTag.name), **self.template_globals()).encode('utf8')) fhandle.close() def run(self): session = current_session() for section_subsection, target in self.output().iteritems(): section_id, subsection_id = section_subsection if section_id == 'licenses': self.build_licenses(target) continue elif section_id == 'sources': self.build_sources(target) continue section = session.query(OBSTag).get(section_id) subsection = session.query(OBSTag).get(subsection_id) if subsection_id == 'tags.boundary': resp = session.execute(''' SELECT DISTINCT c.id FROM observatory.obs_tag section_t, observatory.obs_column_tag section_ct, observatory.obs_tag subsection_t, observatory.obs_column_tag subsection_ct, observatory.obs_column c WHERE section_t.id = section_ct.tag_id AND subsection_t.id = subsection_ct.tag_id AND c.id = section_ct.column_id AND c.id = subsection_ct.column_id AND subsection_t.id = '{subsection_id}' AND section_t.id = '{section_id}' AND subsection_t.type = 'subsection' AND section_t.type = 'section' GROUP BY c.id ORDER BY c.id '''.format(section_id=section_id, subsection_id=subsection_id)) else: resp = session.execute(''' SELECT DISTINCT numer_id FROM observatory.obs_meta WHERE numer_tags ? 'section/{section_id}' AND numer_tags ? 'subsection/{subsection_id}' ORDER BY numer_id '''.format(section_id=section_id, subsection_id=subsection_id)) target.makedirs() fhandle = target.open('w') columns = [] for col_id in resp: col = session.query(OBSColumn).get(col_id) # tags with denominators will appear beneath that denominator if not col.has_denominators(): columns.append(col) # unless the denominator is not in this subsection else: add_to_columns = True for denominator in col.denominators(): if subsection in denominator.tags: add_to_columns = False break if add_to_columns: columns.append(col) columns.sort(lambda x, y: cmp(x.name, y.name)) with open('catalog/source/{}.rst'.format(strip_tag_id(section_id)), 'w') \ as section_fhandle: section_fhandle.write( SECTION_TEMPLATE.render(section=section, **self.template_globals())) if columns: fhandle.write( SUBSECTION_TEMPLATE.render( subsection=subsection, columns=columns, format=self.format, **self.template_globals()).encode('utf8')) else: fhandle.write('') fhandle.close()