Ejemplo n.º 1
0
 def update_indicator_table(self, csvfile):
     """
     Insert water quality measurement data from a csv file
     into the appropriate indicator database table.
     :param csvfile: CSV file containing measurement data
     :return: None
     """
     date_cols = ("ActivityStartDate", "ActivityEndDate")
     indicator = csvfile.replace('_Result.csv', '')
     if not table_exists(indicator):
         self.create_indicator_table(indicator)
     with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin:
         csvreader = csv.reader(csvin)
         headers = None
         for row in csvreader:
             if not headers:
                 headers = ['"{}"'.format(x.replace('/', '_')) for x in row]
             else:
                 insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format(
                     indicator,
                     ','.join(headers)
                 )
                 query_format = []
                 for i, val in enumerate(row):
                     attribute = headers[i].strip('"')
                     id_idx = headers.index('"ActivityIdentifier"')
                     query_format.append("%s")
                     if attribute in date_cols and val:
                         time_idx = headers.index(
                             '"{}_Time"'.format(
                                 attribute.replace("Date", "Time")))
                         zone_idx = headers.index(
                             '"{}_TimeZoneCode"'.format(
                                 attribute.replace("Date", "Time")))
                         time_str = "{} {} {}".format(
                             val, row[time_idx], row[zone_idx])
                         row[i] = time_str
                     else:
                         if not val or val == '.' or val == 'None':
                             row[i] = None
                 insert_sql += '{}'.format(
                     ','.join('{}'.format(x) for x in query_format)) + \
                     ' WHERE NOT EXISTS (SELECT 1 from ' + \
                     '{} WHERE "ActivityIdentifier" = \'{}\');'.format(
                         indicator, re.sub('\'{1}', '\'\'', row[id_idx]))
                 postgres_query(insert_sql, params=tuple(row), commit=True)
     purge_old_data(indicator, date_cols[0], self.days_to_keep)
     if not table_exists(indicator + self.suffix):
         view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \
             ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \
             ' INNER JOIN ' + self.station_table + ' g on ' + \
             ' i."MonitoringLocationIdentifier" = ' + \
             ' g.monitoringlocationidentifier;'
         postgres_query(view_sql, commit=True)
         self.post_geoserver_vector(indicator + self.suffix)
Ejemplo n.º 2
0
 def __init__(self, table, cities):
     self.cities = cities
     self.prefix = table
     self.archive = self.prefix + "_archive"
     self.max_wait = 5
     if not table_exists(self.archive):
         postgres_query(AQICN_TABLE.format(table=self.archive), commit=True)
Ejemplo n.º 3
0
 def __init__(self, table, cities):
     self.cities = cities
     self.prefix = table
     self.archive = self.prefix + "_archive"
     self.max_wait = 5
     if not table_exists(self.archive):
         postgres_query(AQICN_TABLE.format(table=self.archive), commit=True)
Ejemplo n.º 4
0
 def process(self):
     if not table_exists(self.prefix):
         postgres_query(AQICN_TABLE.format(table=self.prefix), commit=True)
     logger.debug("Start %s" % datetime.datetime.now())
     if not self.cities:
         self.getCities()
     logger.debug("There are %s cities" % str(len(self.cities)))
     pool = ThreadPool(self.pool_size)
     for citylist in self.split_list(self.pool_size):
         pool.apply_async(thread_parse, args=(self.prefix, citylist))
     pool.close()
     pool.join()
Ejemplo n.º 5
0
 def process(self):
     if not table_exists(self.prefix):
         postgres_query(AQICN_TABLE.format(table=self.prefix), commit=True)
     logger.debug("Start %s" % datetime.datetime.now())
     if not self.cities:
         self.getCities()
     logger.debug("There are %s cities" % str(len(self.cities)))
     pool = ThreadPool(self.pool_size)
     for citylist in self.split_list(self.pool_size):
         pool.apply_async(thread_parse, args=(self.prefix, citylist))
     pool.close()
     pool.join()
Ejemplo n.º 6
0
 def run(self):
     if not table_exists(self.prefix):
         postgres_query(WHISP_TABLE.format(table=self.prefix), commit=True)
         self.import_archive()
     self.scrape()
     if not layer_exists(self.prefix,
                         ogc_server_settings.server.get('DATASTORE'),
                         DEFAULT_WORKSPACE):
         self.post_geoserver_vector(self.prefix)
     if not style_exists(self.prefix):
         with open(os.path.join(script_dir, 'resources/whisp.sld')) as sld:
             self.set_default_style(self.prefix, self.prefix, sld.read())
     self.update_geonode(self.prefix,
                         title=self.title,
                         description=self.description)
     self.truncate_gs_cache(self.prefix)
     self.cleanup()
Ejemplo n.º 7
0
 def run(self):
     if not table_exists(self.prefix):
         postgres_query(WHISP_TABLE.format(table=self.prefix), commit=True)
         self.import_archive()
     self.scrape()
     if not layer_exists(self.prefix,
                         ogc_server_settings.server.get('DATASTORE'),
                         DEFAULT_WORKSPACE):
         self.post_geoserver_vector(self.prefix)
     if not style_exists(self.prefix):
             with open(os.path.join(script_dir,
                                    'resources/whisp.sld')) as sld:
                 self.set_default_style(self.prefix, self.prefix, sld.read())
     self.update_geonode(self.prefix,
                         title=self.title,
                         description=self.description)
     self.truncate_gs_cache(self.prefix)
     self.cleanup()
Ejemplo n.º 8
0
    def update_station_table(self, csvfile):
        """
        Insert data on water quality monitoring stations
        from a csv file into the database
        :param csvfile: CSV file containing station data
        :return: None
        """
        vrt_content = ("""<OGRVRTDataSource>
                <OGRVRTLayer name="{name}">
                    <SrcDataSource>{csv}</SrcDataSource>
                    <GeometryType>wkbPoint</GeometryType>
                    <LayerSRS>WGS84</LayerSRS>
                    <GeometryField encoding="PointFromColumns"
                    x="LongitudeMeasure" y="LatitudeMeasure"/>
                </OGRVRTLayer>
            </OGRVRTDataSource>
            """)
        station_table = self.station_table
        needs_index = not table_exists(station_table)

        db = ogc_server_settings.datastore_db
        vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt'))
        csv_name = os.path.basename(csvfile).replace(".csv", "")
        if not os.path.exists(vrt_file):
            with open(vrt_file, 'w') as vrt:
                vrt.write(
                    vrt_content.format(name=csv_name,
                                       csv=os.path.join(self.tmp_dir,
                                                        csvfile)))
        ogr2ogr_exec("-append -skipfailures -f PostgreSQL \
            \"PG:host={db_host} user={db_user} password={db_pass} \
            dbname={db_name}\" {vrt} -nln {table}".format(
            db_host=db["HOST"],
            db_user=db["USER"],
            db_pass=db["PASSWORD"],
            db_name=db["NAME"],
            vrt="{}".format(vrt_file),
            table=station_table))
        if needs_index:
            sql = 'ALTER TABLE {} '.format(station_table) + \
                  'ADD CONSTRAINT monitoringlocationidentifier_key ' + \
                  'UNIQUE (monitoringlocationidentifier)'
            logger.debug(sql)
            postgres_query(sql, commit=True)
Ejemplo n.º 9
0
    def update_station_table(self, csvfile):
        """
        Insert data on water quality monitoring stations
        from a csv file into the database
        :param csvfile: CSV file containing station data
        :return: None
        """
        vrt_content = (
            """<OGRVRTDataSource>
                <OGRVRTLayer name="{name}">
                    <SrcDataSource>{csv}</SrcDataSource>
                    <GeometryType>wkbPoint</GeometryType>
                    <LayerSRS>WGS84</LayerSRS>
                    <GeometryField encoding="PointFromColumns"
                    x="LongitudeMeasure" y="LatitudeMeasure"/>
                </OGRVRTLayer>
            </OGRVRTDataSource>
            """)
        station_table = self.station_table
        needs_index = not table_exists(station_table)

        db = ogc_server_settings.datastore_db
        vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt'))
        csv_name = os.path.basename(csvfile).replace(".csv", "")
        if not os.path.exists(vrt_file):
            with open(vrt_file, 'w') as vrt:
                vrt.write(vrt_content.format(
                    name=csv_name, csv=os.path.join(self.tmp_dir, csvfile)))
        ogr2ogr_exec("-append -skipfailures -f PostgreSQL \
            \"PG:host={db_host} user={db_user} password={db_pass} \
            dbname={db_name}\" {vrt} -nln {table}".format(
            db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"],
            db_name=db["NAME"], vrt="{}".format(vrt_file), table=station_table))
        if needs_index:
            sql = 'ALTER TABLE {} '.format(station_table) + \
                  'ADD CONSTRAINT monitoringlocationidentifier_key ' + \
                  'UNIQUE (monitoringlocationidentifier)'
            logger.debug(sql)
            postgres_query(sql, commit=True)
Ejemplo n.º 10
0
 def update_indicator_table(self, csvfile):
     """
     Insert water quality measurement data from a csv file
     into the appropriate indicator database table.
     :param csvfile: CSV file containing measurement data
     :return: None
     """
     date_cols = ("ActivityStartDate", "ActivityEndDate")
     indicator = csvfile.replace('_Result.csv', '')
     if not table_exists(indicator):
         self.create_indicator_table(indicator)
     with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin:
         csvreader = csv.reader(csvin)
         headers = None
         for row in csvreader:
             if not headers:
                 headers = ['"{}"'.format(x.replace('/', '_')) for x in row]
             else:
                 insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format(
                     indicator,
                     ','.join(headers)
                 )
                 query_format = []
                 for i, val in enumerate(row):
                     attribute = headers[i].strip('"')
                     id_idx = headers.index('"ActivityIdentifier"')
                     query_format.append("%s")
                     if attribute in date_cols and val:
                         time_idx = headers.index(
                             '"{}_Time"'.format(
                                 attribute.replace("Date", "Time")))
                         zone_idx = headers.index(
                             '"{}_TimeZoneCode"'.format(
                                 attribute.replace("Date", "Time")))
                         time_str = "{} {} {}".format(
                             val, row[time_idx], row[zone_idx])
                         row[i] = time_str
                     else:
                         if not val or val == '.' or val == 'None':
                             row[i] = None
                 insert_sql += '{}'.format(
                     ','.join('{}'.format(x) for x in query_format)) + \
                     ' WHERE NOT EXISTS (SELECT 1 from ' + \
                     '{} WHERE "ActivityIdentifier" = \'{}\');'.format(
                         indicator, re.sub('\'{1}', '\'\'', row[id_idx]))
                 try:
                     postgres_query(
                         insert_sql, params=tuple(row), commit=True)
                 except Exception as e:
                     logger.error("The query failed: {} with parameters: {}".
                                  format(insert_sql, row))
                     logger.error(traceback.format_exc())
                     if not self.skip_errors:
                         raise e
     purge_old_data(indicator, date_cols[0], self.days_to_keep)
     if not table_exists(indicator + self.suffix):
         view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \
             ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \
             ' INNER JOIN ' + self.station_table + ' g on ' + \
             ' i."MonitoringLocationIdentifier" = ' + \
             ' g.monitoringlocationidentifier;'
         postgres_query(view_sql, commit=True)
         self.post_geoserver_vector(indicator + self.suffix)