def remove(self, carto_url=None, api_key=None):
        api_key = api_key or os.environ['CARTODB_API_KEY']

        try:
            while True:
                resp = requests.get(
                    '{url}/api/v1/tables/{tablename}?api_key={api_key}'.format(
                        url=carto_url,
                        tablename=self.tablename,
                        api_key=api_key))
                viz_id = resp.json()['id']
                # delete dataset by id DELETE
                # https://observatory.cartodb.com/api/v1/viz/ed483a0b-7842-4610-9f6c-8591273b8e5c
                try:
                    requests.delete(
                        '{url}/api/v1/viz/{viz_id}?api_key={api_key}'.format(
                            url=carto_url, viz_id=viz_id, api_key=api_key),
                        timeout=1)
                except requests.Timeout:
                    pass
        except ValueError:
            pass
        query_cartodb('DROP TABLE IF EXISTS {tablename}'.format(
            tablename=self.tablename))
        assert not self.exists()
 def run(self):
     self._complete = True
     queries = [
         '''
           ALTER TABLE acris_real_property_groupby_{year}
           ADD COLUMN bldgarea_orig Numeric
         ''', '''
           UPDATE acris_real_property_groupby_{year}
           SET bldgarea_orig = bldgarea
         ''', '''
           UPDATE acris_real_property_groupby_{year}
           SET bldgarea = CASE WHEN bldgarea_orig > 50000 THEN NULL ELSE bldgarea_orig END
         ''', '''
           ALTER TABLE acris_real_property_groupby_{year}
           ADD COLUMN lotarea_orig Numeric
         ''', '''
           UPDATE acris_real_property_groupby_{year}
           SET lotarea_orig = lotarea
         ''', '''
           UPDATE acris_real_property_groupby_{year}
           SET lotarea = CASE WHEN lotarea_orig > 50000 THEN NULL ELSE lotarea_orig END
         '''
     ]
     for q in queries:
         resp = query_cartodb(q.format(year=self.year),
                              carto_url='https://' + self.username +
                              '.carto.com',
                              api_key=self.api_key)
         if 'already exists' in resp.text:
             continue
         else:
             assert resp.status_code == 200
Beispiel #3
0
 def run(self):
     query_cartodb(self.FIRST_AGGREGATE)
     CartoDBTarget(tablename='obs_meta').remove()
     import_api({
         'table_name': 'obs_meta',
         'sql': self.QUERY.replace('\n', ' '),
         'privacy': 'public',
     })
     for dimension, query in self.DIMENSIONS.iteritems():
         CartoDBTarget(tablename='obs_meta_{}'.format(dimension)).remove()
         import_api({
             'table_name': 'obs_meta_{}'.format(dimension),
             'sql': query.replace('\n', ' '),
             'privacy': 'public',
         })
     self._complete = True
 def exists(self):
     resp = query_cartodb(
         'SELECT row_number() over () FROM "{tablename}" LIMIT 1'.format(
             tablename=self.tablename),
         api_key=self.api_key,
         carto_url=self.carto_url)
     if resp.status_code != 200:
         return False
     return resp.json()['total_rows'] > 0
Beispiel #5
0
    def requires(self):
        tables = {}
        session = current_session()
        existing_table_versions = dict([
            (r['tablename'], r['version']) for r in query_cartodb(
                'SELECT * FROM obs_table'
            ).json()['rows']
        ])
        for table in session.query(OBSTable):
            if should_upload(table):
                tables[table.tablename] = table.version

        for tablename, version in tables.iteritems():
            if version > existing_table_versions.get(tablename):
                force = True
            else:
                force = self.force
            yield TableToCartoViaImportAPI(table=tablename, force=force)
    def run(self):

        resp = query_cartodb('SELECT tablename FROM obs_table')
        tablenames = set([r['tablename'] for r in resp.json()['rows']])
        remote_tables = []
        for page in range(self.start, self.end + 1):
            remote_tables.extend(
                shell("curl -s '{cartodb_url}/datasets?page={page}' "
                      "| grep -Eo 'obs_[0-f]{{40}}' | uniq".format(
                          cartodb_url=os.environ['CARTODB_URL'],
                          page=page)).strip().split('\n'))
        for table in remote_tables:
            LOGGER.info('keeping %s', table)
            if table not in tablenames:
                LOGGER.info('removing %s', table)
                try:
                    CartoDBTarget(table).remove()
                except Exception as err:
                    LOGGER.warn(err)
Beispiel #7
0
    def requires(self):
        existing_table_versions = dict([
            (r['tablename'], r['version'])
            for r in query_cartodb('SELECT * FROM obs_table').json()['rows']
        ])
        tables = dict([(k, v) for k, v in current_session().execute('''
            SELECT tablename, t.version
            FROM observatory.obs_table t,
                 observatory.obs_column_table ct,
                 observatory.obs_column c
            WHERE t.id = ct.table_id
              AND c.id = ct.column_id
              AND t.tablename NOT IN ('obs_ffebc3eb689edab4faa757f75ca02c65d7db7327')
              AND c.weight > 0
            ''')])

        for tablename, version in tables.iteritems():
            if version > existing_table_versions.get(tablename):
                force = True
            else:
                force = self.force
            yield TableToCartoViaImportAPI(table=tablename, force=force)
Beispiel #8
0
    def _generate_config(self, zoom, lon, lat, boundary=None):
        layers = []
        layers.append(self.BASEMAP)
        session = current_session()
        measure = session.query(OBSColumn).get(self.measure)
        mainquery = '''
SELECT numer_aggregate, numer_type,
       numer_colname, numer_geomref_colname,
       numer_tablename,
       geom_geomref_colname,
       geom_colname, geom_tablename,
       denom_colname, denom_tablename, denom_geomref_colname
FROM observatory.obs_meta
WHERE numer_id = '{measure}' {boundary_clause}
ORDER BY geom_weight DESC, numer_timespan DESC, geom_colname DESC;
        '''
        query = mainquery.format(
            measure=self.measure,
            boundary_clause="AND geom_id = '{}'".format(boundary)
            if boundary else '')

        resp = session.execute(query)
        results = resp.fetchone()

        # how should we determine fallback resolution?
        if results is None:
            query = mainquery.format(measure=self.measure, boundary_clause="")
            resp = session.execute(query)
            results = resp.fetchone()

        numer_aggregate, numer_type, numer_colname, numer_geomref_colname, \
                numer_tablename, geom_geomref_colname, geom_colname, \
                geom_tablename, denom_colname, \
                denom_tablename, denom_geomref_colname = results

        if denom_colname:
            cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \
                    "geom.the_geom_webmercator, " \
                    "numer.{numer_colname} / NULLIF(denom.{denom_colname}, 0) measure " \
                    "FROM {geom_tablename} as geom, {numer_tablename} as numer, " \
                    "     {denom_tablename} as denom " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \
                    "  AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} "
            statssql = "SELECT  " \
                    'CDB_HeadsTailsBins(array_agg(distinct( ' \
                    '      (numer.{numer_colname} / ' \
                    '      NULLIF(denom.{denom_colname}, 0))::NUMERIC)), 4) as "headtails" ' \
                    "FROM {geom_tablename} as geom, " \
                    "     {numer_tablename} as numer, " \
                    "     {denom_tablename} as denom " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \
                    "  AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} "
        elif numer_aggregate == 'sum':
            cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \
                    "geom.the_geom_webmercator, " \
                    "numer.{numer_colname} / " \
                    "  ST_Area(geom.the_geom) * 1000000.0 measure " \
                    "FROM {geom_tablename} as geom, {numer_tablename} as numer " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
            statssql = "SELECT CDB_HeadsTailsBins(array_agg(distinct( " \
                    '  (numer.{numer_colname} / ST_Area(geom.the_geom) ' \
                    '      * 1000000.0)::NUMERIC)), 4) as "headtails" ' \
                    "FROM {geom_tablename} as geom, " \
                    "     {numer_tablename} as numer " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
        else:
            cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \
                    "  geom.the_geom_webmercator, " \
                    "  numer.{numer_colname} measure " \
                    "FROM {geom_tablename} as geom, {numer_tablename} as numer " \
                    "  WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
            if numer_type.lower() == 'numeric':
                statssql = "SELECT " \
                        'CDB_HeadsTailsBins(array_agg( ' \
                        '  distinct(numer.{numer_colname}::NUMERIC)), 4) as "headtails" ' \
                        "FROM {geom_tablename} as geom, " \
                        "     {numer_tablename} as numer " \
                        "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
            else:
                statssql = '''
                SELECT array_agg(category) categories FROM (
                SELECT row_number() over () catname, {numer_colname} as category, COUNT(*) cnt
                FROM {numer_tablename}
                GROUP BY {numer_colname} ORDER BY COUNT(*) DESC
                LIMIT 10
                ) foo'''

        cartosql = cartosql.format(geom_colname=geom_colname,
                                   numer_colname=numer_colname,
                                   geom_tablename=geom_tablename,
                                   numer_tablename=numer_tablename,
                                   geom_geomref_colname=geom_geomref_colname,
                                   numer_geomref_colname=numer_geomref_colname,
                                   denom_colname=denom_colname,
                                   denom_tablename=denom_tablename,
                                   denom_geomref_colname=denom_geomref_colname)
        statssql = statssql.format(geom_colname=geom_colname,
                                   numer_colname=numer_colname,
                                   geom_tablename=geom_tablename,
                                   numer_tablename=numer_tablename,
                                   geom_geomref_colname=geom_geomref_colname,
                                   numer_geomref_colname=numer_geomref_colname,
                                   denom_colname=denom_colname,
                                   denom_tablename=denom_tablename,
                                   denom_geomref_colname=denom_geomref_colname)

        resp = query_cartodb(statssql)
        if resp.status_code != 200:
            raise Exception("Unable to obtain statssql: {}".format(resp.text))

        if measure.unit():
            ramp = self.PALETTES.get(measure.unit().id,
                                     self.PALETTES['tags.ratio'])
        else:
            ramp = self.PALETTES['tags.ratio']

        bucket_css = u''
        if numer_type.lower() == 'numeric':
            buckets = resp.json()['rows'][0]['headtails']

            for i, bucket in enumerate(buckets):
                bucket_css = u'''
    [measure <= {bucket}] {{
       polygon-fill: @{i};
    }}
                '''.format(bucket=bucket, i=i + 1) + bucket_css
        else:
            buckets = resp.json()['rows'][0]['categories']
            for i, bucket in enumerate(buckets):
                bucket_css = u'''
    [measure = "{bucket}"] {{
       polygon-fill: @{i};
    }}
                '''.format(bucket=bucket, i=i + 1) + bucket_css

        layers.append({
            'type': 'mapnik',
            'options': {
                'layer_name':
                geom_tablename,
                'cartocss':
                '''/** choropleth visualization */

{ramp}

#data {{
  polygon-opacity: 0.9;
  polygon-gamma: 0.5;
  line-color: #000000;
  line-width: 0.25;
  line-opacity: 0.2;
  line-comp-op: hard-light;
  polygon-fill: @{bucketlen};

  [measure=null]{{
     polygon-fill: #cacdce;
  }}
  {bucket_css}
}}'''.format(ramp=ramp, bucketlen=len(buckets) + 1, bucket_css=bucket_css),
                'cartocss_version':
                "2.1.1",
                'sql':
                cartosql,
                "table_name":
                "\"\"."
            }
        })
        #layers.append(self.LABELS)
        return {
            'layers': layers,
            'center': [lon, lat],
            #'bounds': self.bounds,
            'zoom': zoom
        }
Beispiel #9
0
    def _generate_config(self, zoom, lon, lat, boundary=None):
        layers = []
        layers.append(self.BASEMAP)
        session = current_session()
        measure = session.query(OBSColumn).get(self.measure)
        mainquery = '''
SELECT numer_aggregate,
       numer_colname, numer_geomref_colname,
       numer_tablename,
       geom_geomref_colname,
       geom_colname, geom_tablename,
       denom_colname, denom_tablename, denom_geomref_colname
FROM observatory.obs_meta
WHERE numer_id = '{measure}' {boundary_clause}
ORDER BY geom_weight DESC, numer_timespan DESC, geom_colname DESC;
        '''
        query = mainquery.format(
            measure=self.measure,
            boundary_clause="AND geom_id = '{}'".format(boundary) if boundary else '')

        resp = session.execute(query)
        results = resp.fetchone()

        # how should we determine fallback resolution?
        if results is None:
            query = mainquery.format(
                measure=self.measure,
                boundary_clause="")
            resp = session.execute(query)
            results = resp.fetchone()

        numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename, \
                geom_geomref_colname, geom_colname, geom_tablename, denom_colname, \
                denom_tablename, denom_geomref_colname = results

        if denom_colname:
            cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \
                    "geom.the_geom_webmercator, " \
                    "numer.{numer_colname} / NULLIF(denom.{denom_colname}, 0) measure " \
                    "FROM {geom_tablename} as geom, {numer_tablename} as numer, " \
                    "     {denom_tablename} as denom " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \
                    "  AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} "
            statssql = "SELECT  " \
                    'CDB_HeadsTailsBins(array_agg(distinct( ' \
                    '      (numer.{numer_colname} / ' \
                    '      NULLIF(denom.{denom_colname}, 0))::NUMERIC)), 4) as "headtails" ' \
                    "FROM {geom_tablename} as geom, " \
                    "     {numer_tablename} as numer, " \
                    "     {denom_tablename} as denom " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} " \
                    "  AND numer.{numer_geomref_colname} = denom.{denom_geomref_colname} "
        elif numer_aggregate == 'sum':
            cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \
                    "geom.the_geom_webmercator, " \
                    "numer.{numer_colname} / " \
                    "  ST_Area(geom.the_geom_webmercator) * 1000000.0 measure " \
                    "FROM {geom_tablename} as geom, {numer_tablename} as numer " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
            statssql = "SELECT CDB_HeadsTailsBins(array_agg(distinct( " \
                    '  (numer.{numer_colname} / ST_Area(geom.the_geom_webmercator) ' \
                    '      * 1000000.0)::NUMERIC)), 4) as "headtails" ' \
                    "FROM {geom_tablename} as geom, " \
                    "     {numer_tablename} as numer " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
        else:
            cartosql = "SELECT geom.cartodb_id, geom.{geom_colname} as the_geom, " \
                    "  geom.the_geom_webmercator, " \
                    "  numer.{numer_colname} measure " \
                    "FROM {geom_tablename} as geom, {numer_tablename} as numer " \
                    "  WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "
            statssql = "SELECT " \
                    'CDB_HeadsTailsBins(array_agg( ' \
                    '  distinct(numer.{numer_colname}::NUMERIC)), 4) as "headtails" ' \
                    "FROM {geom_tablename} as geom, " \
                    "     {numer_tablename} as numer " \
                    "WHERE geom.{geom_geomref_colname} = numer.{numer_geomref_colname} "

        cartosql = cartosql.format(geom_colname=geom_colname,
                                   numer_colname=numer_colname,
                                   geom_tablename=geom_tablename,
                                   numer_tablename=numer_tablename,
                                   geom_geomref_colname=geom_geomref_colname,
                                   numer_geomref_colname=numer_geomref_colname,
                                   denom_colname=denom_colname,
                                   denom_tablename=denom_tablename,
                                   denom_geomref_colname=denom_geomref_colname)
        statssql = statssql.format(geom_colname=geom_colname,
                                   numer_colname=numer_colname,
                                   geom_tablename=geom_tablename,
                                   numer_tablename=numer_tablename,
                                   geom_geomref_colname=geom_geomref_colname,
                                   numer_geomref_colname=numer_geomref_colname,
                                   denom_colname=denom_colname,
                                   denom_tablename=denom_tablename,
                                   denom_geomref_colname=denom_geomref_colname)

        resp = query_cartodb(statssql)
        assert resp.status_code == 200
        headtails = resp.json()['rows'][0]['headtails']

        if measure.unit():
            ramp = self.PALETTES.get(measure.unit().id, self.PALETTES['tags.ratio'])
        else:
            ramp = self.PALETTES['tags.ratio']

        bucket_css = u''
        for i, bucket in enumerate(headtails):
            bucket_css = u'''
[measure <= {bucket}] {{
   polygon-fill: @{i};
}}
            '''.format(bucket=bucket, i=i+1) + bucket_css

        layers.append({
            'type': 'mapnik',
            'options': {
                'layer_name': numer_tablename,
                'cartocss': '''/** choropleth visualization */

{ramp}

#data {{
  polygon-opacity: 0.9;
  polygon-gamma: 0.5;
  line-color: #000000;
  line-width: 0.25;
  line-opacity: 0.2;
  line-comp-op: hard-light;
  polygon-fill: @{bucketlen};

  [measure=null]{{
     polygon-fill: #cacdce;
  }}
  {bucket_css}
}}'''.format(
    ramp=ramp,
    bucketlen=len(headtails) + 1,
    bucket_css=bucket_css),
                'cartocss_version': "2.1.1",
                'sql': cartosql,
                "table_name": "\"\"."
            }
        })
        #layers.append(self.LABELS)
        return {
            'layers': layers,
            'center': [lon, lat],
            #'bounds': self.bounds,
            'zoom': zoom
        }