def __init__(self, table, cities): self.cities = cities self.prefix = table self.archive = self.prefix + "_archive" self.max_wait = 5 if not table_exists(self.archive): postgres_query(AQICN_TABLE.format(table=self.archive), commit=True)
def create_indicator_table(self, indicator): """ Create a database table for the given water quality indicator :param indicator: table name for the indicator :return: None """ with open(os.path.join(script_dir, 'resources/create_table.sql')) as sql: postgres_query(sql.read().format(tablename=indicator), commit=True)
def update_indicator_table(self, csvfile): """ Insert water quality measurement data from a csv file into the appropriate indicator database table. :param csvfile: CSV file containing measurement data :return: None """ date_cols = ("ActivityStartDate", "ActivityEndDate") indicator = csvfile.replace('_Result.csv', '') if not table_exists(indicator): self.create_indicator_table(indicator) with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin: csvreader = csv.reader(csvin) headers = None for row in csvreader: if not headers: headers = ['"{}"'.format(x.replace('/', '_')) for x in row] else: insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format( indicator, ','.join(headers) ) query_format = [] for i, val in enumerate(row): attribute = headers[i].strip('"') id_idx = headers.index('"ActivityIdentifier"') query_format.append("%s") if attribute in date_cols and val: time_idx = headers.index( '"{}_Time"'.format( attribute.replace("Date", "Time"))) zone_idx = headers.index( '"{}_TimeZoneCode"'.format( attribute.replace("Date", "Time"))) time_str = "{} {} {}".format( val, row[time_idx], row[zone_idx]) row[i] = time_str else: if not val or val == '.' or val == 'None': row[i] = None insert_sql += '{}'.format( ','.join('{}'.format(x) for x in query_format)) + \ ' WHERE NOT EXISTS (SELECT 1 from ' + \ '{} WHERE "ActivityIdentifier" = \'{}\');'.format( indicator, re.sub('\'{1}', '\'\'', row[id_idx])) postgres_query(insert_sql, params=tuple(row), commit=True) purge_old_data(indicator, date_cols[0], self.days_to_keep) if not table_exists(indicator + self.suffix): view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \ ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \ ' INNER JOIN ' + self.station_table + ' g on ' + \ ' i."MonitoringLocationIdentifier" = ' + \ ' g.monitoringlocationidentifier;' postgres_query(view_sql, commit=True) self.post_geoserver_vector(indicator + self.suffix)
def process(self): if not table_exists(self.prefix): postgres_query(AQICN_TABLE.format(table=self.prefix), commit=True) logger.debug("Start %s" % datetime.datetime.now()) if not self.cities: self.getCities() logger.debug("There are %s cities" % str(len(self.cities))) pool = ThreadPool(self.pool_size) for citylist in self.split_list(self.pool_size): pool.apply_async(thread_parse, args=(self.prefix, citylist)) pool.close() pool.join()
def purge_old_data(self): """ Remove old data from weekly, monthly, and yearly PostGIS tables """ today = datetime.date.today() last_week = (today - datetime.timedelta(days=7)).strftime("%Y-%m-%d") last_month = (today - datetime.timedelta(days=30)).strftime("%Y-%m-%d") last_year = (today - datetime.timedelta(days=365)).strftime("%Y-%m-%d") for interval, table in zip([last_week, last_month, last_year], self.tables): postgres_query( "DELETE FROM {} where CAST(time as timestamp) < '{}';".format( table, interval), commit=True)
def update_layer(self, layer): """ Create or update the MMWR layer in GeoNode :param layer: Layer to update (weekly or archive) :return: None """ csvfile = "{}.csv".format(self.prefix) vrt_file = os.path.join(self.tmp_dir, '{}.vrt'.format(self.prefix)) csvt_file = os.path.join(self.tmp_dir, '{}.csvt'.format(self.prefix)) if not os.path.exists(vrt_file): with open(vrt_file, 'w') as vrt: vrt.write( vrt_content.format(name=csvfile.replace('.csv', ''), csv=os.path.join(self.tmp_dir, csvfile))) if not os.path.exists(csvt_file): with open(csvt_file, 'w') as csvt: csvt.write(csvt_content) db = ogc_server_settings.datastore_db table = '{}_{}'.format(self.prefix, layer).lower() option = 'overwrite' if layer.lower() == 'weekly' else 'append' ogr2ogr_exec("-{option} -skipfailures -f PostgreSQL \ \"PG:host={db_host} user={db_user} password={db_pass} \ dbname={db_name}\" {vrt} -nln {table}".format( db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"], db_name=db["NAME"], vrt="{}".format(vrt_file), option=option, table=table)) if not layer_exists(table, ogc_server_settings.server.get('DATASTORE'), DEFAULT_WORKSPACE): constraint = 'ALTER TABLE {table} ADD CONSTRAINT ' \ '{table}_unique UNIQUE (place, report_date)'\ .format(table=table) postgres_query(constraint, commit=True) self.post_geoserver_vector(table) if not style_exists(table): with open(os.path.join(script_dir, 'resources/mmwr.sld')) as sldfile: sld = sldfile.read().format(layername=table) self.set_default_style(table, table, sld) self.update_geonode(table, title='{} {}'.format(self.base_title, layer), description=self.description, extra_keywords=['category:Population']) self.truncate_gs_cache(table)
def run(self): if not table_exists(self.prefix): postgres_query(WHISP_TABLE.format(table=self.prefix), commit=True) self.import_archive() self.scrape() if not layer_exists(self.prefix, ogc_server_settings.server.get('DATASTORE'), DEFAULT_WORKSPACE): self.post_geoserver_vector(self.prefix) if not style_exists(self.prefix): with open(os.path.join(script_dir, 'resources/whisp.sld')) as sld: self.set_default_style(self.prefix, self.prefix, sld.read()) self.update_geonode(self.prefix, title=self.title, description=self.description) self.truncate_gs_cache(self.prefix) self.cleanup()
def update_layer(self, layer): """ Create or update the MMWR layer in GeoNode :param layer: Layer to update (weekly or archive) :return: None """ csvfile = "{}.csv".format(self.prefix) vrt_file = os.path.join(self.tmp_dir, '{}.vrt'.format(self.prefix)) csvt_file = os.path.join(self.tmp_dir, '{}.csvt'.format(self.prefix)) if not os.path.exists(vrt_file): with open(vrt_file, 'w') as vrt: vrt.write(vrt_content.format( name=csvfile.replace('.csv', ''), csv=os.path.join(self.tmp_dir, csvfile))) if not os.path.exists(csvt_file): with open(csvt_file, 'w') as csvt: csvt.write(csvt_content) db = ogc_server_settings.datastore_db table = '{}_{}'.format(self.prefix, layer).lower() option = 'overwrite' if layer.lower() == 'weekly' else 'append' ogr2ogr_exec("-{option} -skipfailures -f PostgreSQL \ \"PG:host={db_host} user={db_user} password={db_pass} \ dbname={db_name}\" {vrt} -nln {table}".format( db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"], db_name=db["NAME"], vrt="{}".format(vrt_file), option=option, table=table)) if not layer_exists(table, ogc_server_settings.server.get('DATASTORE'), DEFAULT_WORKSPACE): constraint = 'ALTER TABLE {table} ADD CONSTRAINT ' \ '{table}_unique UNIQUE (place, report_date)'\ .format(table=table) postgres_query(constraint, commit=True) self.post_geoserver_vector(table) if not style_exists(table): with open(os.path.join( script_dir, 'resources/mmwr.sld')) as sldfile: sld = sldfile.read().format(layername=table) self.set_default_style(table, table, sld) self.update_geonode( table, title='{} {}'.format(self.base_title, layer), description=self.description) self.truncate_gs_cache(table)
def update_station_table(self, csvfile): """ Insert data on water quality monitoring stations from a csv file into the database :param csvfile: CSV file containing station data :return: None """ vrt_content = ("""<OGRVRTDataSource> <OGRVRTLayer name="{name}"> <SrcDataSource>{csv}</SrcDataSource> <GeometryType>wkbPoint</GeometryType> <LayerSRS>WGS84</LayerSRS> <GeometryField encoding="PointFromColumns" x="LongitudeMeasure" y="LatitudeMeasure"/> </OGRVRTLayer> </OGRVRTDataSource> """) station_table = self.station_table needs_index = not table_exists(station_table) db = ogc_server_settings.datastore_db vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt')) csv_name = os.path.basename(csvfile).replace(".csv", "") if not os.path.exists(vrt_file): with open(vrt_file, 'w') as vrt: vrt.write( vrt_content.format(name=csv_name, csv=os.path.join(self.tmp_dir, csvfile))) ogr2ogr_exec("-append -skipfailures -f PostgreSQL \ \"PG:host={db_host} user={db_user} password={db_pass} \ dbname={db_name}\" {vrt} -nln {table}".format( db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"], db_name=db["NAME"], vrt="{}".format(vrt_file), table=station_table)) if needs_index: sql = 'ALTER TABLE {} '.format(station_table) + \ 'ADD CONSTRAINT monitoringlocationidentifier_key ' + \ 'UNIQUE (monitoringlocationidentifier)' logger.debug(sql) postgres_query(sql, commit=True)
def update_station_table(self, csvfile): """ Insert data on water quality monitoring stations from a csv file into the database :param csvfile: CSV file containing station data :return: None """ vrt_content = ( """<OGRVRTDataSource> <OGRVRTLayer name="{name}"> <SrcDataSource>{csv}</SrcDataSource> <GeometryType>wkbPoint</GeometryType> <LayerSRS>WGS84</LayerSRS> <GeometryField encoding="PointFromColumns" x="LongitudeMeasure" y="LatitudeMeasure"/> </OGRVRTLayer> </OGRVRTDataSource> """) station_table = self.station_table needs_index = not table_exists(station_table) db = ogc_server_settings.datastore_db vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt')) csv_name = os.path.basename(csvfile).replace(".csv", "") if not os.path.exists(vrt_file): with open(vrt_file, 'w') as vrt: vrt.write(vrt_content.format( name=csv_name, csv=os.path.join(self.tmp_dir, csvfile))) ogr2ogr_exec("-append -skipfailures -f PostgreSQL \ \"PG:host={db_host} user={db_user} password={db_pass} \ dbname={db_name}\" {vrt} -nln {table}".format( db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"], db_name=db["NAME"], vrt="{}".format(vrt_file), table=station_table)) if needs_index: sql = 'ALTER TABLE {} '.format(station_table) + \ 'ADD CONSTRAINT monitoringlocationidentifier_key ' + \ 'UNIQUE (monitoringlocationidentifier)' logger.debug(sql) postgres_query(sql, commit=True)
def save_data(self, city): for item in city['data']: if city['data'][item] == '-': city['data'][item] = 'NULL' measurements = ','.join(x[4:] for x in city['data'].keys()) values = ','.join( [x for x in city['data'].values()]) city_name = unicode(city['city']) kv = ','.join(['{} = {}'.format(k, v) for k, v in zip( [x[4:] for x in city['data'].keys()], [x for x in city['data'].values()] )]) sql_str = unicode(AQICN_SQL.format( table=self.prefix, time=city['dateTime'].strftime('%Y-%m-%d %H:%M:%S'), lat=city['g'][0], lng=city['g'][1], city=city_name.replace('\'', '\'\''), keys=measurements, val=values, kv=kv, cntry=city['country'] )) postgres_query(sql_str, commit=True)
def update_indicator_table(self, csvfile): """ Insert water quality measurement data from a csv file into the appropriate indicator database table. :param csvfile: CSV file containing measurement data :return: None """ date_cols = ("ActivityStartDate", "ActivityEndDate") indicator = csvfile.replace('_Result.csv', '') if not table_exists(indicator): self.create_indicator_table(indicator) with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin: csvreader = csv.reader(csvin) headers = None for row in csvreader: if not headers: headers = ['"{}"'.format(x.replace('/', '_')) for x in row] else: insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format( indicator, ','.join(headers) ) query_format = [] for i, val in enumerate(row): attribute = headers[i].strip('"') id_idx = headers.index('"ActivityIdentifier"') query_format.append("%s") if attribute in date_cols and val: time_idx = headers.index( '"{}_Time"'.format( attribute.replace("Date", "Time"))) zone_idx = headers.index( '"{}_TimeZoneCode"'.format( attribute.replace("Date", "Time"))) time_str = "{} {} {}".format( val, row[time_idx], row[zone_idx]) row[i] = time_str else: if not val or val == '.' or val == 'None': row[i] = None insert_sql += '{}'.format( ','.join('{}'.format(x) for x in query_format)) + \ ' WHERE NOT EXISTS (SELECT 1 from ' + \ '{} WHERE "ActivityIdentifier" = \'{}\');'.format( indicator, re.sub('\'{1}', '\'\'', row[id_idx])) try: postgres_query( insert_sql, params=tuple(row), commit=True) except Exception as e: logger.error("The query failed: {} with parameters: {}". format(insert_sql, row)) logger.error(traceback.format_exc()) if not self.skip_errors: raise e purge_old_data(indicator, date_cols[0], self.days_to_keep) if not table_exists(indicator + self.suffix): view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \ ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \ ' INNER JOIN ' + self.station_table + ' g on ' + \ ' i."MonitoringLocationIdentifier" = ' + \ ' g.monitoringlocationidentifier;' postgres_query(view_sql, commit=True) self.post_geoserver_vector(indicator + self.suffix)
def insert_row(self, data): postgres_query(WHISP_SQL.format( table=self.prefix, geom=data['the_geom']), params=data, commit=True)
def insert_row(self, data): postgres_query(WHISP_SQL.format(table=self.prefix, geom=data['the_geom']), params=data, commit=True)