def pg_srs_constraint(self):
     log.info('set srs constraint')
     db = PostGIS(self.cfg.get_dict())
     srid = self.srid
     sql = "ALTER TABLE gml_objects DROP CONSTRAINT enforce_srid_gml_bounded_by;"
     db.tx_execute(sql)
     sql = "ALTER TABLE gml_objects ADD CONSTRAINT enforce_srid_gml_bounded_by CHECK  (st_srid(gml_bounded_by) = (%s));" % srid
     db.tx_execute(sql)
    def init(self):
        # Connect only once to DB
        log.info('Init: connect to DB')
        self.db = PostGIS(self.cfg.get_dict())
        self.db.connect()

        # Let superclass read file list from Apache URL
        HttpInput.init(self)
Exemple #3
0
    def write(self, packet):
        if packet.data is None:
            return packet

        log.info('executing SQL')
        db = PostGIS(self.cfg.get_dict())
        rowcount = db.tx_execute(packet.data)
        log.info('executed SQL, rowcount=%d' % rowcount)
        return packet
 def get_feature_types(self):
     log.info('reading all featuretypes from DB')
     db = PostGIS(self.cfg.get_dict())
     db.connect()
     sql = "SELECT id,qname FROM feature_types"
     db.execute(sql)
     cur = db.cursor
     for record in cur:
         self.feature_type_ids[record[1]] = record[0]
    def __init__(self, configdict, section):
        SqliteDbInput.__init__(self, configdict, section)
        self.progress_query = self.cfg.get('progress_query')
        self.progress_update = self.cfg.get('progress_update')

        # Connect only once to DB
        log.info('Init: connect to Postgres DB')
        self.progress_db = PostGIS(self.cfg.get_dict())
        self.progress_db.connect()
Exemple #6
0
    def init(self):
        # Connect only once to DB
        log.info('Init: connect to DB')
        self.db = PostGIS(self.cfg.get_dict())
        self.db.connect()

        # If no explicit column names given, get from DB meta info
        self.columns = self.column_names
        if self.column_names is None:
            self.columns = self.db.get_column_names(self.cfg.get('table'),
                                                    self.cfg.get('schema'))
Exemple #7
0
    def init(self, config_dict):

        self.config_dict = config_dict
        self.process_name = config_dict['process_name']
        self.db = PostGIS(config_dict)
        self.db.connect()

        ids = dict()
        parameters = dict()
        models = dict()
        state = dict()

        # Query ANN Calibration Model and its State from DB for each calibrated sensor.
        if self.model_query is not None and len(self.sensor_model_names) > 0:
            log.info('Getting calibration models and state from database')
            for k in self.sensor_model_names:
                v = self.sensor_model_names[k]
                id, param, model = self.query_model(v)
                ids[k] = id
                parameters[k] = param
                models[k] = model

                model_state = self.query_state(id)
                state[k] = model_state

        else:
            log.info('No query for fetching calibration models given or no '
                     'mapping for calibration models to gas components given.')

        # Put Model and State info in the Device definitions.
        for k in ids:
            SENSOR_DEFS[k]['converter_model']['model_id'] = ids[k]
        for k in parameters:
            SENSOR_DEFS[k]['converter_model']['running_mean_weights'] = parameters[k]
        for k in models:
            SENSOR_DEFS[k]['converter_model']['mlp_regressor'] = models[k]
        for k, v in state.iteritems():
            for device_id, device_state in v.iteritems():
                for gas, state in device_state.iteritems():
                    v[device_id][gas] = RunningMean.from_dict(state)
            SENSOR_DEFS[k]['converter_model']['state'] = v
Exemple #8
0
    def init(self):
        self.db = PostGIS(self.cfg.get_dict())
        self.db.connect()

        # One time: get all device ids
        self.fetch_devices()
Exemple #9
0
    def read(self, packet):
        if packet.is_end_of_stream():
            return packet

        if self.db is None:
            # First time read
            log.info("reading records from blobstore..")
            self.db = PostGIS(self.cfg.get_dict())
            self.db.connect()
            sql = self.cfg.get('sql')
            self.rowcount = self.db.execute(sql)
            self.cur = self.db.cursor
            log.info("Read records rowcount=%d" % self.rowcount)

            # Init separate connection to fetch objects referenced by xlink:href
            self.xlink_db = PostGIS(self.cfg.get_dict())
            self.xlink_db.connect()

        # Query active
        while self.cur is not None:
            if self.buffer is None:
                self.buffer = self.init_buf()
                self.buffer.write(self.start_container)

            # Get next blob record
            record = self.cur.fetchone()

            # End of all records
            if record is None:
                # End of records: start closing
                self.buffer.write(self.end_container)
                self.cur = None
                self.db.commit()

                # Only create doc if there are features in the buffer
                if self.feature_count > 0:
                    self.buffer_to_doc(packet)
                packet.set_end_of_doc()
                break
            else:
                # New record: embed feature blob in feature tags and write to buffer
                feature_blob = self.write_feature(record)

                # If we have local xlinks: fetch the related features as well from the DB and
                # output them within the same document (local href resolvable)
                # TODO: in some cases we may need to be recursive (xlinks in xlinked features...)

                # First construct a single query for all xlinks
                xlink_sql = None
                for xlink in self.regex_xlink_href.finditer(feature_blob):
                    gml_id = xlink.group(1).strip('"').strip('#')
                    # We don't want multiple occurences of the same xlinked feature
                    if gml_id in self.xlink_ids:
                        continue

                    self.xlink_ids.add(gml_id)
                    if xlink_sql is None:
                        xlink_sql = "SELECT binary_object from gml_objects where gml_id = '%s'" % gml_id
                    else:
                        xlink_sql += "OR gml_id = '%s'" % gml_id

                # Should we retrieve and write xlinked features?
                if xlink_sql is not None:
                    # Fetch from DB
                    self.xlink_db.execute(xlink_sql)
                    while True:
                        # Get next blob record
                        xlink_record = self.xlink_db.cursor.fetchone()
                        if xlink_record is None:
                            break
                        self.write_feature(xlink_record)

                # Should we output a doc
                if self.feature_count >= self.max_features_per_doc:
                    # End of records: create XML doc
                    self.buffer.write(self.end_container)
                    self.buffer_to_doc(packet)
                    break

        if self.cur is None:
            # All records handled: close off
            packet.set_end_of_stream()
            # log.info("[%s]" % packet.data)

        return packet
Exemple #10
0
 def init(self):
     # Connect only once to DB
     log.info('Init: connect to DB')
     self.db = PostGIS(self.cfg.get_dict())
     self.db.connect()
     self.init_columns()
Exemple #11
0
    def init(self):
        InfluxDbInput.init(self)

        # PostGIS for tracking Harvesting progress.
        # Tracking is automatically updated via a TRIGGER (see db-schema-raw).
        postgis_cfg = {
            'host': self.pg_host,
            'port': self.pg_port,
            'database': self.pg_database,
            'user': self.pg_user,
            'password': self.pg_password,
            'schema': self.pg_schema
        }
        self.tracking_db = PostGIS(postgis_cfg)
        self.tracking_db.connect()

        # One time: get all measurements and related info and store in structure
        measurements = self.get_measurement_names()
        for measurement in measurements:
            # Optional mapping from MEASUREMENT name to a device id
            # Otherwise device_is is Measurement name
            device_id = measurement
            if self.meas_name_to_device_id:
                if measurement not in self.meas_name_to_device_id:
                    log.warn('No device_id mapped for measurement (table) %s' %
                             measurement)
                    continue

                device_id = self.meas_name_to_device_id[measurement]

            date_start_s, start_ts = self.get_start_time(measurement)
            date_end_s, end_ts = self.get_end_time(measurement)
            start_ts = self.date_str_to_whole_hour_nanos(date_start_s)
            end_ts *= NANOS_FACTOR

            # Shift time for current_ts from progress table if already in progress
            # otherwise use start time of measurement.
            current_ts = start_ts
            row_count = self.tracking_db.execute(self.progress_query +
                                                 device_id)
            if row_count > 0:
                # Already in progress
                progress_rec = self.tracking_db.cursor.fetchone()
                ymd_last = str(progress_rec[4])
                year_last = ymd_last[0:4]
                month_last = ymd_last[4:6]
                day_last = ymd_last[6:]
                hour_last = progress_rec[5]
                # e.g. 2017-11-17T11:00:00.411Z
                date_str = '%s-%s-%sT%d:00:00.000Z' % (year_last, month_last,
                                                       day_last, hour_last - 1)
                current_ts = self.date_str_to_whole_hour_nanos(date_str)
                # skip to next hour
                # current_ts += (3600 * NANOS_FACTOR)

            # Store all info per device (measurement table) in list of dict
            self.measurements_info.append({
                'name': measurement,
                'date_start_s': date_start_s,
                'start_ts': start_ts,
                'date_end_s': date_end_s,
                'end_ts': end_ts,
                'current_ts': current_ts,
                'device_id': device_id
            })

        print("measurements_info: %s" % str(self.measurements_info))
Exemple #12
0
    def write(self, packet):
        if packet.data is None:
            return packet

        gml_doc = packet.data
        log.info('inserting features in DB')
        db = PostGIS(self.cfg.get_dict())
        db.connect()
        #        print self.to_string(gml_doc, False, False)
        #        NS = {'base': 'urn:x-inspire:specification:gmlas:BaseTypes:3.2', 'gml': 'http://www.opengis.net/gml/3.2'}
        #        featureMembers = gml_doc.xpath('//base:member/*', namespaces=NS)
        featureMembers = gml_doc.xpath("//*[local-name() = '%s']/*" %
                                       self.feature_member_tag)
        count = 0
        gml_ns = None
        for childNode in featureMembers:
            if gml_ns is None:
                if childNode.nsmap.has_key('gml'):
                    gml_ns = childNode.nsmap['gml']
                else:
                    if childNode.nsmap.has_key('GML'):
                        gml_ns = childNode.nsmap['GML']

            gml_id = childNode.get('{%s}id' % gml_ns)

            feature_type_id = self.feature_type_ids[childNode.tag]

            # Find a GML geometry in the GML NS
            ogrGeomWKT = None
            #            gmlMembers = childNode.xpath(".//gml:Point|.//gml:Curve|.//gml:Surface|.//gml:MultiSurface", namespaces=NS)
            gmlMembers = childNode.xpath(
                ".//*[local-name() = 'Point']|.//*[local-name() = 'Polygon']|.//*[local-name() = 'Curve']|.//*[local-name() = 'Surface']|.//*[local-name() = 'MultiSurface']"
            )
            geom_str = None
            for gmlMember in gmlMembers:
                if geom_str is None:
                    geom_str = etree.tostring(gmlMember)
                #                   no need for GDAL Python bindings for now, maybe when we'll optimize with COPY iso INSERT
            #                    ogrGeom = ogr.CreateGeometryFromGML(str(gmlStr))
            #                    if ogrGeom is not None:
            #                        ogrGeomWKT = ogrGeom.ExportToWkt()
            #                        if ogrGeomWKT is not None:
            #                            break

            blob = etree.tostring(childNode,
                                  pretty_print=False,
                                  xml_declaration=False,
                                  encoding='UTF-8')

            if geom_str is None:
                sql = "INSERT INTO gml_objects(gml_id, ft_type, binary_object) VALUES (%s, %s, %s)"
                parameters = (gml_id, feature_type_id, db.make_bytea(blob))
            else:
                # ST_SetSRID(ST_GeomFromGML(%s)),-1)
                sql = "INSERT INTO gml_objects(gml_id, ft_type, binary_object, gml_bounded_by) VALUES (%s, %s, %s, ST_SetSRID( ST_GeomFromGML(%s),%s) )"
                parameters = (gml_id, feature_type_id, db.make_bytea(blob),
                              geom_str, self.srid)

            if db.execute(sql, parameters) == -1:
                log.error(
                    "feat num# = %d error inserting feature blob=%s (but continuing)"
                    % (count, blob))

                # will fail but we will close connection also
                db.commit()

                # proceed...
                log.info('retrying to proceed with remaining features...')
                db = PostGIS(self.cfg.get_dict())
                db.connect()
                count = 0

            count += 1

        exception = db.commit()
        if exception is not None:
            log.error("error in commit")

        log.info("inserted %s features" % count)
        return packet
Exemple #13
0
 def delete_features(self):
     log.info('deleting ALL features in DB')
     db = PostGIS(self.cfg.get_dict())
     db.tx_execute("TRUNCATE gml_objects")
 def init(self):
     self.db = PostGIS(self.cfg.get_dict())
     self.db.connect()
    def init(self):
        InfluxDbInput.init(self)
        postgis_cfg = {
            'host': self.pg_host,
            'port': self.pg_port,
            'database': self.pg_database,
            'user': self.pg_user,
            'password': self.pg_password,
            'schema': self.pg_schema
        }
        self.db = PostGIS(postgis_cfg)
        self.db.connect()

        # One time: get all measurements and related info and store in structure
        self.measurements = self.query_db('SHOW MEASUREMENTS')
        for measurement in self.measurements:
            measurement_name = measurement['name']
            date_start_s = self.query_db(
                'SELECT FIRST(calibrated), time FROM %s' %
                measurement_name)[0]['time']
            start_ts = self.date_str_to_ts_nanos(date_start_s)
            date_end_s = self.query_db(
                'SELECT LAST(calibrated), time FROM %s' %
                measurement_name)[0]['time']
            end_ts = self.date_str_to_ts_nanos(date_end_s)
            device_id = measurement_name
            if self.meas_name_to_device_id:
                if measurement_name not in self.meas_name_to_device_id:
                    log.error(
                        'No device_id mapped for measurement (table) %s' %
                        measurement_name)
                    raise Exception

                device_id = self.meas_name_to_device_id[measurement_name]

            # Shift time for current_ts from progress table if already in progress
            # otherwise use start time of measurement.
            current_ts = start_ts
            row_count = self.db.execute(self.progress_query + device_id)
            if row_count > 0:
                progress_rec = self.db.cursor.fetchone()
                ymd_last = str(progress_rec[4])
                year_last = ymd_last[0:4]
                month_last = ymd_last[4:6]
                day_last = ymd_last[6:]
                hour_last = progress_rec[5]
                # e.g. 2017-11-17T11:00:00.411Z
                date_str = '%s-%s-%sT%d:00:00.0Z' % (year_last, month_last,
                                                     day_last, hour_last)
                current_ts = self.date_str_to_ts_nanos(date_str)
                # skip to next hour
                current_ts += (3600 * NANOS_FACTOR)

            # Store all info per device (measurement table) in list of dict
            self.measurements_info.append({
                'name': measurement_name,
                'date_start_s': date_start_s,
                'start_ts': start_ts,
                'date_end_s': date_end_s,
                'end_ts': end_ts,
                'current_ts': current_ts,
                'device_id': device_id
            })

        print(str(self.measurements_info))