Beispiel #1
0
 def __init__(self, table, cities):
     self.cities = cities
     self.prefix = table
     self.archive = self.prefix + "_archive"
     self.max_wait = 5
     if not table_exists(self.archive):
         postgres_query(AQICN_TABLE.format(table=self.archive), commit=True)
Beispiel #2
0
 def __init__(self, table, cities):
     self.cities = cities
     self.prefix = table
     self.archive = self.prefix + "_archive"
     self.max_wait = 5
     if not table_exists(self.archive):
         postgres_query(AQICN_TABLE.format(table=self.archive), commit=True)
Beispiel #3
0
 def create_indicator_table(self, indicator):
     """
     Create a database table for the given water quality indicator
     :param indicator: table name for the indicator
     :return: None
     """
     with open(os.path.join(script_dir,
                            'resources/create_table.sql')) as sql:
         postgres_query(sql.read().format(tablename=indicator), commit=True)
Beispiel #4
0
 def create_indicator_table(self, indicator):
     """
     Create a database table for the given water quality indicator
     :param indicator: table name for the indicator
     :return: None
     """
     with open(os.path.join(script_dir,
                            'resources/create_table.sql')) as sql:
         postgres_query(sql.read().format(tablename=indicator), commit=True)
Beispiel #5
0
 def update_indicator_table(self, csvfile):
     """
     Insert water quality measurement data from a csv file
     into the appropriate indicator database table.
     :param csvfile: CSV file containing measurement data
     :return: None
     """
     date_cols = ("ActivityStartDate", "ActivityEndDate")
     indicator = csvfile.replace('_Result.csv', '')
     if not table_exists(indicator):
         self.create_indicator_table(indicator)
     with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin:
         csvreader = csv.reader(csvin)
         headers = None
         for row in csvreader:
             if not headers:
                 headers = ['"{}"'.format(x.replace('/', '_')) for x in row]
             else:
                 insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format(
                     indicator,
                     ','.join(headers)
                 )
                 query_format = []
                 for i, val in enumerate(row):
                     attribute = headers[i].strip('"')
                     id_idx = headers.index('"ActivityIdentifier"')
                     query_format.append("%s")
                     if attribute in date_cols and val:
                         time_idx = headers.index(
                             '"{}_Time"'.format(
                                 attribute.replace("Date", "Time")))
                         zone_idx = headers.index(
                             '"{}_TimeZoneCode"'.format(
                                 attribute.replace("Date", "Time")))
                         time_str = "{} {} {}".format(
                             val, row[time_idx], row[zone_idx])
                         row[i] = time_str
                     else:
                         if not val or val == '.' or val == 'None':
                             row[i] = None
                 insert_sql += '{}'.format(
                     ','.join('{}'.format(x) for x in query_format)) + \
                     ' WHERE NOT EXISTS (SELECT 1 from ' + \
                     '{} WHERE "ActivityIdentifier" = \'{}\');'.format(
                         indicator, re.sub('\'{1}', '\'\'', row[id_idx]))
                 postgres_query(insert_sql, params=tuple(row), commit=True)
     purge_old_data(indicator, date_cols[0], self.days_to_keep)
     if not table_exists(indicator + self.suffix):
         view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \
             ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \
             ' INNER JOIN ' + self.station_table + ' g on ' + \
             ' i."MonitoringLocationIdentifier" = ' + \
             ' g.monitoringlocationidentifier;'
         postgres_query(view_sql, commit=True)
         self.post_geoserver_vector(indicator + self.suffix)
Beispiel #6
0
 def process(self):
     if not table_exists(self.prefix):
         postgres_query(AQICN_TABLE.format(table=self.prefix), commit=True)
     logger.debug("Start %s" % datetime.datetime.now())
     if not self.cities:
         self.getCities()
     logger.debug("There are %s cities" % str(len(self.cities)))
     pool = ThreadPool(self.pool_size)
     for citylist in self.split_list(self.pool_size):
         pool.apply_async(thread_parse, args=(self.prefix, citylist))
     pool.close()
     pool.join()
Beispiel #7
0
 def process(self):
     if not table_exists(self.prefix):
         postgres_query(AQICN_TABLE.format(table=self.prefix), commit=True)
     logger.debug("Start %s" % datetime.datetime.now())
     if not self.cities:
         self.getCities()
     logger.debug("There are %s cities" % str(len(self.cities)))
     pool = ThreadPool(self.pool_size)
     for citylist in self.split_list(self.pool_size):
         pool.apply_async(thread_parse, args=(self.prefix, citylist))
     pool.close()
     pool.join()
Beispiel #8
0
    def purge_old_data(self):
        """
        Remove old data from weekly, monthly, and yearly PostGIS tables
        """
        today = datetime.date.today()
        last_week = (today - datetime.timedelta(days=7)).strftime("%Y-%m-%d")
        last_month = (today - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
        last_year = (today - datetime.timedelta(days=365)).strftime("%Y-%m-%d")

        for interval, table in zip([last_week, last_month, last_year],
                                   self.tables):
            postgres_query(
                "DELETE FROM {} where CAST(time as timestamp) < '{}';".format(
                    table, interval), commit=True)
Beispiel #9
0
    def purge_old_data(self):
        """
        Remove old data from weekly, monthly, and yearly PostGIS tables
        """
        today = datetime.date.today()
        last_week = (today - datetime.timedelta(days=7)).strftime("%Y-%m-%d")
        last_month = (today - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
        last_year = (today - datetime.timedelta(days=365)).strftime("%Y-%m-%d")

        for interval, table in zip([last_week, last_month, last_year],
                                   self.tables):
            postgres_query(
                "DELETE FROM {} where CAST(time as timestamp) < '{}';".format(
                    table, interval), commit=True)
Beispiel #10
0
    def update_layer(self, layer):
        """
        Create or update the MMWR layer in GeoNode
        :param layer: Layer to update (weekly or archive)
        :return: None
        """
        csvfile = "{}.csv".format(self.prefix)
        vrt_file = os.path.join(self.tmp_dir, '{}.vrt'.format(self.prefix))
        csvt_file = os.path.join(self.tmp_dir, '{}.csvt'.format(self.prefix))
        if not os.path.exists(vrt_file):
            with open(vrt_file, 'w') as vrt:
                vrt.write(
                    vrt_content.format(name=csvfile.replace('.csv', ''),
                                       csv=os.path.join(self.tmp_dir,
                                                        csvfile)))
        if not os.path.exists(csvt_file):
            with open(csvt_file, 'w') as csvt:
                csvt.write(csvt_content)

        db = ogc_server_settings.datastore_db
        table = '{}_{}'.format(self.prefix, layer).lower()
        option = 'overwrite' if layer.lower() == 'weekly' else 'append'
        ogr2ogr_exec("-{option} -skipfailures -f PostgreSQL \
            \"PG:host={db_host} user={db_user} password={db_pass} \
            dbname={db_name}\" {vrt} -nln {table}".format(
            db_host=db["HOST"],
            db_user=db["USER"],
            db_pass=db["PASSWORD"],
            db_name=db["NAME"],
            vrt="{}".format(vrt_file),
            option=option,
            table=table))
        if not layer_exists(table, ogc_server_settings.server.get('DATASTORE'),
                            DEFAULT_WORKSPACE):
            constraint = 'ALTER TABLE {table} ADD CONSTRAINT ' \
                         '{table}_unique UNIQUE (place, report_date)'\
                .format(table=table)
            postgres_query(constraint, commit=True)
            self.post_geoserver_vector(table)
        if not style_exists(table):
            with open(os.path.join(script_dir,
                                   'resources/mmwr.sld')) as sldfile:
                sld = sldfile.read().format(layername=table)
                self.set_default_style(table, table, sld)
        self.update_geonode(table,
                            title='{} {}'.format(self.base_title, layer),
                            description=self.description,
                            extra_keywords=['category:Population'])
        self.truncate_gs_cache(table)
Beispiel #11
0
 def run(self):
     if not table_exists(self.prefix):
         postgres_query(WHISP_TABLE.format(table=self.prefix), commit=True)
         self.import_archive()
     self.scrape()
     if not layer_exists(self.prefix,
                         ogc_server_settings.server.get('DATASTORE'),
                         DEFAULT_WORKSPACE):
         self.post_geoserver_vector(self.prefix)
     if not style_exists(self.prefix):
         with open(os.path.join(script_dir, 'resources/whisp.sld')) as sld:
             self.set_default_style(self.prefix, self.prefix, sld.read())
     self.update_geonode(self.prefix,
                         title=self.title,
                         description=self.description)
     self.truncate_gs_cache(self.prefix)
     self.cleanup()
Beispiel #12
0
    def update_layer(self, layer):
        """
        Create or update the MMWR layer in GeoNode
        :param layer: Layer to update (weekly or archive)
        :return: None
        """
        csvfile = "{}.csv".format(self.prefix)
        vrt_file = os.path.join(self.tmp_dir, '{}.vrt'.format(self.prefix))
        csvt_file = os.path.join(self.tmp_dir, '{}.csvt'.format(self.prefix))
        if not os.path.exists(vrt_file):
            with open(vrt_file, 'w') as vrt:
                vrt.write(vrt_content.format(
                    name=csvfile.replace('.csv', ''),
                    csv=os.path.join(self.tmp_dir, csvfile)))
        if not os.path.exists(csvt_file):
            with open(csvt_file, 'w') as csvt:
                csvt.write(csvt_content)

        db = ogc_server_settings.datastore_db
        table = '{}_{}'.format(self.prefix, layer).lower()
        option = 'overwrite' if layer.lower() == 'weekly' else 'append'
        ogr2ogr_exec("-{option} -skipfailures -f PostgreSQL \
            \"PG:host={db_host} user={db_user} password={db_pass} \
            dbname={db_name}\" {vrt} -nln {table}".format(
            db_host=db["HOST"], db_user=db["USER"],
            db_pass=db["PASSWORD"], db_name=db["NAME"],
            vrt="{}".format(vrt_file), option=option, table=table))
        if not layer_exists(table,
                            ogc_server_settings.server.get('DATASTORE'),
                            DEFAULT_WORKSPACE):
            constraint = 'ALTER TABLE {table} ADD CONSTRAINT ' \
                         '{table}_unique UNIQUE (place, report_date)'\
                .format(table=table)
            postgres_query(constraint, commit=True)
            self.post_geoserver_vector(table)
        if not style_exists(table):
            with open(os.path.join(
                    script_dir, 'resources/mmwr.sld')) as sldfile:
                sld = sldfile.read().format(layername=table)
                self.set_default_style(table, table, sld)
        self.update_geonode(
            table,
            title='{} {}'.format(self.base_title, layer),
            description=self.description)
        self.truncate_gs_cache(table)
Beispiel #13
0
 def run(self):
     if not table_exists(self.prefix):
         postgres_query(WHISP_TABLE.format(table=self.prefix), commit=True)
         self.import_archive()
     self.scrape()
     if not layer_exists(self.prefix,
                         ogc_server_settings.server.get('DATASTORE'),
                         DEFAULT_WORKSPACE):
         self.post_geoserver_vector(self.prefix)
     if not style_exists(self.prefix):
             with open(os.path.join(script_dir,
                                    'resources/whisp.sld')) as sld:
                 self.set_default_style(self.prefix, self.prefix, sld.read())
     self.update_geonode(self.prefix,
                         title=self.title,
                         description=self.description)
     self.truncate_gs_cache(self.prefix)
     self.cleanup()
Beispiel #14
0
    def update_station_table(self, csvfile):
        """
        Insert data on water quality monitoring stations
        from a csv file into the database
        :param csvfile: CSV file containing station data
        :return: None
        """
        vrt_content = ("""<OGRVRTDataSource>
                <OGRVRTLayer name="{name}">
                    <SrcDataSource>{csv}</SrcDataSource>
                    <GeometryType>wkbPoint</GeometryType>
                    <LayerSRS>WGS84</LayerSRS>
                    <GeometryField encoding="PointFromColumns"
                    x="LongitudeMeasure" y="LatitudeMeasure"/>
                </OGRVRTLayer>
            </OGRVRTDataSource>
            """)
        station_table = self.station_table
        needs_index = not table_exists(station_table)

        db = ogc_server_settings.datastore_db
        vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt'))
        csv_name = os.path.basename(csvfile).replace(".csv", "")
        if not os.path.exists(vrt_file):
            with open(vrt_file, 'w') as vrt:
                vrt.write(
                    vrt_content.format(name=csv_name,
                                       csv=os.path.join(self.tmp_dir,
                                                        csvfile)))
        ogr2ogr_exec("-append -skipfailures -f PostgreSQL \
            \"PG:host={db_host} user={db_user} password={db_pass} \
            dbname={db_name}\" {vrt} -nln {table}".format(
            db_host=db["HOST"],
            db_user=db["USER"],
            db_pass=db["PASSWORD"],
            db_name=db["NAME"],
            vrt="{}".format(vrt_file),
            table=station_table))
        if needs_index:
            sql = 'ALTER TABLE {} '.format(station_table) + \
                  'ADD CONSTRAINT monitoringlocationidentifier_key ' + \
                  'UNIQUE (monitoringlocationidentifier)'
            logger.debug(sql)
            postgres_query(sql, commit=True)
Beispiel #15
0
    def update_station_table(self, csvfile):
        """
        Insert data on water quality monitoring stations
        from a csv file into the database
        :param csvfile: CSV file containing station data
        :return: None
        """
        vrt_content = (
            """<OGRVRTDataSource>
                <OGRVRTLayer name="{name}">
                    <SrcDataSource>{csv}</SrcDataSource>
                    <GeometryType>wkbPoint</GeometryType>
                    <LayerSRS>WGS84</LayerSRS>
                    <GeometryField encoding="PointFromColumns"
                    x="LongitudeMeasure" y="LatitudeMeasure"/>
                </OGRVRTLayer>
            </OGRVRTDataSource>
            """)
        station_table = self.station_table
        needs_index = not table_exists(station_table)

        db = ogc_server_settings.datastore_db
        vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt'))
        csv_name = os.path.basename(csvfile).replace(".csv", "")
        if not os.path.exists(vrt_file):
            with open(vrt_file, 'w') as vrt:
                vrt.write(vrt_content.format(
                    name=csv_name, csv=os.path.join(self.tmp_dir, csvfile)))
        ogr2ogr_exec("-append -skipfailures -f PostgreSQL \
            \"PG:host={db_host} user={db_user} password={db_pass} \
            dbname={db_name}\" {vrt} -nln {table}".format(
            db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"],
            db_name=db["NAME"], vrt="{}".format(vrt_file), table=station_table))
        if needs_index:
            sql = 'ALTER TABLE {} '.format(station_table) + \
                  'ADD CONSTRAINT monitoringlocationidentifier_key ' + \
                  'UNIQUE (monitoringlocationidentifier)'
            logger.debug(sql)
            postgres_query(sql, commit=True)
Beispiel #16
0
 def save_data(self, city):
     for item in city['data']:
         if city['data'][item] == '-':
             city['data'][item] = 'NULL'
     measurements = ','.join(x[4:] for x in city['data'].keys())
     values = ','.join(
         [x for x in city['data'].values()])
     city_name = unicode(city['city'])
     kv = ','.join(['{} = {}'.format(k, v) for k, v in zip(
         [x[4:] for x in city['data'].keys()],
         [x for x in city['data'].values()]
     )])
     sql_str = unicode(AQICN_SQL.format(
         table=self.prefix,
         time=city['dateTime'].strftime('%Y-%m-%d %H:%M:%S'),
         lat=city['g'][0],
         lng=city['g'][1],
         city=city_name.replace('\'', '\'\''),
         keys=measurements,
         val=values,
         kv=kv,
         cntry=city['country']
     ))
     postgres_query(sql_str, commit=True)
Beispiel #17
0
 def save_data(self, city):
     for item in city['data']:
         if city['data'][item] == '-':
             city['data'][item] = 'NULL'
     measurements = ','.join(x[4:] for x in city['data'].keys())
     values = ','.join(
         [x for x in city['data'].values()])
     city_name = unicode(city['city'])
     kv = ','.join(['{} = {}'.format(k, v) for k, v in zip(
         [x[4:] for x in city['data'].keys()],
         [x for x in city['data'].values()]
     )])
     sql_str = unicode(AQICN_SQL.format(
         table=self.prefix,
         time=city['dateTime'].strftime('%Y-%m-%d %H:%M:%S'),
         lat=city['g'][0],
         lng=city['g'][1],
         city=city_name.replace('\'', '\'\''),
         keys=measurements,
         val=values,
         kv=kv,
         cntry=city['country']
     ))
     postgres_query(sql_str, commit=True)
Beispiel #18
0
 def update_indicator_table(self, csvfile):
     """
     Insert water quality measurement data from a csv file
     into the appropriate indicator database table.
     :param csvfile: CSV file containing measurement data
     :return: None
     """
     date_cols = ("ActivityStartDate", "ActivityEndDate")
     indicator = csvfile.replace('_Result.csv', '')
     if not table_exists(indicator):
         self.create_indicator_table(indicator)
     with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin:
         csvreader = csv.reader(csvin)
         headers = None
         for row in csvreader:
             if not headers:
                 headers = ['"{}"'.format(x.replace('/', '_')) for x in row]
             else:
                 insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format(
                     indicator,
                     ','.join(headers)
                 )
                 query_format = []
                 for i, val in enumerate(row):
                     attribute = headers[i].strip('"')
                     id_idx = headers.index('"ActivityIdentifier"')
                     query_format.append("%s")
                     if attribute in date_cols and val:
                         time_idx = headers.index(
                             '"{}_Time"'.format(
                                 attribute.replace("Date", "Time")))
                         zone_idx = headers.index(
                             '"{}_TimeZoneCode"'.format(
                                 attribute.replace("Date", "Time")))
                         time_str = "{} {} {}".format(
                             val, row[time_idx], row[zone_idx])
                         row[i] = time_str
                     else:
                         if not val or val == '.' or val == 'None':
                             row[i] = None
                 insert_sql += '{}'.format(
                     ','.join('{}'.format(x) for x in query_format)) + \
                     ' WHERE NOT EXISTS (SELECT 1 from ' + \
                     '{} WHERE "ActivityIdentifier" = \'{}\');'.format(
                         indicator, re.sub('\'{1}', '\'\'', row[id_idx]))
                 try:
                     postgres_query(
                         insert_sql, params=tuple(row), commit=True)
                 except Exception as e:
                     logger.error("The query failed: {} with parameters: {}".
                                  format(insert_sql, row))
                     logger.error(traceback.format_exc())
                     if not self.skip_errors:
                         raise e
     purge_old_data(indicator, date_cols[0], self.days_to_keep)
     if not table_exists(indicator + self.suffix):
         view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \
             ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \
             ' INNER JOIN ' + self.station_table + ' g on ' + \
             ' i."MonitoringLocationIdentifier" = ' + \
             ' g.monitoringlocationidentifier;'
         postgres_query(view_sql, commit=True)
         self.post_geoserver_vector(indicator + self.suffix)
Beispiel #19
0
 def insert_row(self, data):
     postgres_query(WHISP_SQL.format(
         table=self.prefix, geom=data['the_geom']), params=data, commit=True)
Beispiel #20
0
 def insert_row(self, data):
     postgres_query(WHISP_SQL.format(table=self.prefix,
                                     geom=data['the_geom']),
                    params=data,
                    commit=True)