def pg_srs_constraint(self): log.info('set srs constraint') db = PostGIS(self.cfg.get_dict()) srid = self.srid sql = "ALTER TABLE gml_objects DROP CONSTRAINT enforce_srid_gml_bounded_by;" db.tx_execute(sql) sql = "ALTER TABLE gml_objects ADD CONSTRAINT enforce_srid_gml_bounded_by CHECK (st_srid(gml_bounded_by) = (%s));" % srid db.tx_execute(sql)
def init(self): # Connect only once to DB log.info('Init: connect to DB') self.db = PostGIS(self.cfg.get_dict()) self.db.connect() # Let superclass read file list from Apache URL HttpInput.init(self)
def write(self, packet): if packet.data is None: return packet log.info('executing SQL') db = PostGIS(self.cfg.get_dict()) rowcount = db.tx_execute(packet.data) log.info('executed SQL, rowcount=%d' % rowcount) return packet
def get_feature_types(self): log.info('reading all featuretypes from DB') db = PostGIS(self.cfg.get_dict()) db.connect() sql = "SELECT id,qname FROM feature_types" db.execute(sql) cur = db.cursor for record in cur: self.feature_type_ids[record[1]] = record[0]
def __init__(self, configdict, section): SqliteDbInput.__init__(self, configdict, section) self.progress_query = self.cfg.get('progress_query') self.progress_update = self.cfg.get('progress_update') # Connect only once to DB log.info('Init: connect to Postgres DB') self.progress_db = PostGIS(self.cfg.get_dict()) self.progress_db.connect()
def init(self): # Connect only once to DB log.info('Init: connect to DB') self.db = PostGIS(self.cfg.get_dict()) self.db.connect() # If no explicit column names given, get from DB meta info self.columns = self.column_names if self.column_names is None: self.columns = self.db.get_column_names(self.cfg.get('table'), self.cfg.get('schema'))
def init(self, config_dict): self.config_dict = config_dict self.process_name = config_dict['process_name'] self.db = PostGIS(config_dict) self.db.connect() ids = dict() parameters = dict() models = dict() state = dict() # Query ANN Calibration Model and its State from DB for each calibrated sensor. if self.model_query is not None and len(self.sensor_model_names) > 0: log.info('Getting calibration models and state from database') for k in self.sensor_model_names: v = self.sensor_model_names[k] id, param, model = self.query_model(v) ids[k] = id parameters[k] = param models[k] = model model_state = self.query_state(id) state[k] = model_state else: log.info('No query for fetching calibration models given or no ' 'mapping for calibration models to gas components given.') # Put Model and State info in the Device definitions. for k in ids: SENSOR_DEFS[k]['converter_model']['model_id'] = ids[k] for k in parameters: SENSOR_DEFS[k]['converter_model']['running_mean_weights'] = parameters[k] for k in models: SENSOR_DEFS[k]['converter_model']['mlp_regressor'] = models[k] for k, v in state.iteritems(): for device_id, device_state in v.iteritems(): for gas, state in device_state.iteritems(): v[device_id][gas] = RunningMean.from_dict(state) SENSOR_DEFS[k]['converter_model']['state'] = v
def init(self): self.db = PostGIS(self.cfg.get_dict()) self.db.connect() # One time: get all device ids self.fetch_devices()
def read(self, packet): if packet.is_end_of_stream(): return packet if self.db is None: # First time read log.info("reading records from blobstore..") self.db = PostGIS(self.cfg.get_dict()) self.db.connect() sql = self.cfg.get('sql') self.rowcount = self.db.execute(sql) self.cur = self.db.cursor log.info("Read records rowcount=%d" % self.rowcount) # Init separate connection to fetch objects referenced by xlink:href self.xlink_db = PostGIS(self.cfg.get_dict()) self.xlink_db.connect() # Query active while self.cur is not None: if self.buffer is None: self.buffer = self.init_buf() self.buffer.write(self.start_container) # Get next blob record record = self.cur.fetchone() # End of all records if record is None: # End of records: start closing self.buffer.write(self.end_container) self.cur = None self.db.commit() # Only create doc if there are features in the buffer if self.feature_count > 0: self.buffer_to_doc(packet) packet.set_end_of_doc() break else: # New record: embed feature blob in feature tags and write to buffer feature_blob = self.write_feature(record) # If we have local xlinks: fetch the related features as well from the DB and # output them within the same document (local href resolvable) # TODO: in some cases we may need to be recursive (xlinks in xlinked features...) # First construct a single query for all xlinks xlink_sql = None for xlink in self.regex_xlink_href.finditer(feature_blob): gml_id = xlink.group(1).strip('"').strip('#') # We don't want multiple occurences of the same xlinked feature if gml_id in self.xlink_ids: continue self.xlink_ids.add(gml_id) if xlink_sql is None: xlink_sql = "SELECT binary_object from gml_objects where gml_id = '%s'" % gml_id else: xlink_sql += "OR gml_id = '%s'" % gml_id # Should we retrieve and write xlinked features? if xlink_sql is not None: # Fetch from DB self.xlink_db.execute(xlink_sql) while True: # Get next blob record xlink_record = self.xlink_db.cursor.fetchone() if xlink_record is None: break self.write_feature(xlink_record) # Should we output a doc if self.feature_count >= self.max_features_per_doc: # End of records: create XML doc self.buffer.write(self.end_container) self.buffer_to_doc(packet) break if self.cur is None: # All records handled: close off packet.set_end_of_stream() # log.info("[%s]" % packet.data) return packet
def init(self): # Connect only once to DB log.info('Init: connect to DB') self.db = PostGIS(self.cfg.get_dict()) self.db.connect() self.init_columns()
def init(self): InfluxDbInput.init(self) # PostGIS for tracking Harvesting progress. # Tracking is automatically updated via a TRIGGER (see db-schema-raw). postgis_cfg = { 'host': self.pg_host, 'port': self.pg_port, 'database': self.pg_database, 'user': self.pg_user, 'password': self.pg_password, 'schema': self.pg_schema } self.tracking_db = PostGIS(postgis_cfg) self.tracking_db.connect() # One time: get all measurements and related info and store in structure measurements = self.get_measurement_names() for measurement in measurements: # Optional mapping from MEASUREMENT name to a device id # Otherwise device_is is Measurement name device_id = measurement if self.meas_name_to_device_id: if measurement not in self.meas_name_to_device_id: log.warn('No device_id mapped for measurement (table) %s' % measurement) continue device_id = self.meas_name_to_device_id[measurement] date_start_s, start_ts = self.get_start_time(measurement) date_end_s, end_ts = self.get_end_time(measurement) start_ts = self.date_str_to_whole_hour_nanos(date_start_s) end_ts *= NANOS_FACTOR # Shift time for current_ts from progress table if already in progress # otherwise use start time of measurement. current_ts = start_ts row_count = self.tracking_db.execute(self.progress_query + device_id) if row_count > 0: # Already in progress progress_rec = self.tracking_db.cursor.fetchone() ymd_last = str(progress_rec[4]) year_last = ymd_last[0:4] month_last = ymd_last[4:6] day_last = ymd_last[6:] hour_last = progress_rec[5] # e.g. 2017-11-17T11:00:00.411Z date_str = '%s-%s-%sT%d:00:00.000Z' % (year_last, month_last, day_last, hour_last - 1) current_ts = self.date_str_to_whole_hour_nanos(date_str) # skip to next hour # current_ts += (3600 * NANOS_FACTOR) # Store all info per device (measurement table) in list of dict self.measurements_info.append({ 'name': measurement, 'date_start_s': date_start_s, 'start_ts': start_ts, 'date_end_s': date_end_s, 'end_ts': end_ts, 'current_ts': current_ts, 'device_id': device_id }) print("measurements_info: %s" % str(self.measurements_info))
def write(self, packet): if packet.data is None: return packet gml_doc = packet.data log.info('inserting features in DB') db = PostGIS(self.cfg.get_dict()) db.connect() # print self.to_string(gml_doc, False, False) # NS = {'base': 'urn:x-inspire:specification:gmlas:BaseTypes:3.2', 'gml': 'http://www.opengis.net/gml/3.2'} # featureMembers = gml_doc.xpath('//base:member/*', namespaces=NS) featureMembers = gml_doc.xpath("//*[local-name() = '%s']/*" % self.feature_member_tag) count = 0 gml_ns = None for childNode in featureMembers: if gml_ns is None: if childNode.nsmap.has_key('gml'): gml_ns = childNode.nsmap['gml'] else: if childNode.nsmap.has_key('GML'): gml_ns = childNode.nsmap['GML'] gml_id = childNode.get('{%s}id' % gml_ns) feature_type_id = self.feature_type_ids[childNode.tag] # Find a GML geometry in the GML NS ogrGeomWKT = None # gmlMembers = childNode.xpath(".//gml:Point|.//gml:Curve|.//gml:Surface|.//gml:MultiSurface", namespaces=NS) gmlMembers = childNode.xpath( ".//*[local-name() = 'Point']|.//*[local-name() = 'Polygon']|.//*[local-name() = 'Curve']|.//*[local-name() = 'Surface']|.//*[local-name() = 'MultiSurface']" ) geom_str = None for gmlMember in gmlMembers: if geom_str is None: geom_str = etree.tostring(gmlMember) # no need for GDAL Python bindings for now, maybe when we'll optimize with COPY iso INSERT # ogrGeom = ogr.CreateGeometryFromGML(str(gmlStr)) # if ogrGeom is not None: # ogrGeomWKT = ogrGeom.ExportToWkt() # if ogrGeomWKT is not None: # break blob = etree.tostring(childNode, pretty_print=False, xml_declaration=False, encoding='UTF-8') if geom_str is None: sql = "INSERT INTO gml_objects(gml_id, ft_type, binary_object) VALUES (%s, %s, %s)" parameters = (gml_id, feature_type_id, db.make_bytea(blob)) else: # ST_SetSRID(ST_GeomFromGML(%s)),-1) sql = "INSERT INTO gml_objects(gml_id, ft_type, binary_object, gml_bounded_by) VALUES (%s, %s, %s, ST_SetSRID( ST_GeomFromGML(%s),%s) )" parameters = (gml_id, feature_type_id, db.make_bytea(blob), geom_str, self.srid) if db.execute(sql, parameters) == -1: log.error( "feat num# = %d error inserting feature blob=%s (but continuing)" % (count, blob)) # will fail but we will close connection also db.commit() # proceed... log.info('retrying to proceed with remaining features...') db = PostGIS(self.cfg.get_dict()) db.connect() count = 0 count += 1 exception = db.commit() if exception is not None: log.error("error in commit") log.info("inserted %s features" % count) return packet
def delete_features(self): log.info('deleting ALL features in DB') db = PostGIS(self.cfg.get_dict()) db.tx_execute("TRUNCATE gml_objects")
def init(self): self.db = PostGIS(self.cfg.get_dict()) self.db.connect()
def init(self): InfluxDbInput.init(self) postgis_cfg = { 'host': self.pg_host, 'port': self.pg_port, 'database': self.pg_database, 'user': self.pg_user, 'password': self.pg_password, 'schema': self.pg_schema } self.db = PostGIS(postgis_cfg) self.db.connect() # One time: get all measurements and related info and store in structure self.measurements = self.query_db('SHOW MEASUREMENTS') for measurement in self.measurements: measurement_name = measurement['name'] date_start_s = self.query_db( 'SELECT FIRST(calibrated), time FROM %s' % measurement_name)[0]['time'] start_ts = self.date_str_to_ts_nanos(date_start_s) date_end_s = self.query_db( 'SELECT LAST(calibrated), time FROM %s' % measurement_name)[0]['time'] end_ts = self.date_str_to_ts_nanos(date_end_s) device_id = measurement_name if self.meas_name_to_device_id: if measurement_name not in self.meas_name_to_device_id: log.error( 'No device_id mapped for measurement (table) %s' % measurement_name) raise Exception device_id = self.meas_name_to_device_id[measurement_name] # Shift time for current_ts from progress table if already in progress # otherwise use start time of measurement. current_ts = start_ts row_count = self.db.execute(self.progress_query + device_id) if row_count > 0: progress_rec = self.db.cursor.fetchone() ymd_last = str(progress_rec[4]) year_last = ymd_last[0:4] month_last = ymd_last[4:6] day_last = ymd_last[6:] hour_last = progress_rec[5] # e.g. 2017-11-17T11:00:00.411Z date_str = '%s-%s-%sT%d:00:00.0Z' % (year_last, month_last, day_last, hour_last) current_ts = self.date_str_to_ts_nanos(date_str) # skip to next hour current_ts += (3600 * NANOS_FACTOR) # Store all info per device (measurement table) in list of dict self.measurements_info.append({ 'name': measurement_name, 'date_start_s': date_start_s, 'start_ts': start_ts, 'date_end_s': date_end_s, 'end_ts': end_ts, 'current_ts': current_ts, 'device_id': device_id }) print(str(self.measurements_info))