# -*- coding: utf-8 -*- # # Writes the payload of a packet as a string to a file. # Based on outputs.fileoutput.FileOutput. # # Author: Frank Steggink # from stetl.component import Config from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT import os log = Util.get_log('packetwriter') class PacketWriter(Filter): """ Writes the payload of a packet as a string to a file. consumes=FORMAT.any, produces=FORMAT.string """ # Start attribute config meta @Config(ptype=str, default=None, required=True) def file_path(self): """ File path to write content to. Required: True
#!/usr/bin/env python # # Extracts arrays of etree GML features from an GML etree document. # # Author: Just van den Broecke # from stetl.util import Util from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log('gmlfeatureextractor') class GmlFeatureExtractor(Filter): """ Extract arrays of GML features etree elements from etree docs. consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array """ # XPATH Query base for extracting features by (non-namespaced thus local-name) tagname xpath_base = "//*[local-name() = '%s']" # Constructor def __init__(self, configdict, section='gml_feature_extractor'): Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array) log.info("cfg = %s" % self.cfg.to_string()) # Build the Xpath expresion from configures tagnames self.feature_tags = self.cfg.get('feature_tags').split(',')
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Executes the given command and returns the captured output. # # Author: Frank Steggink # import subprocess import os from stetl.component import Config from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('execfilter') class ExecFilter(Filter): """ Executes any command (abstract base class). """ @Config(ptype=str, default='', required=False) def env_args(self): """ Provides of list of environment variables which will be used when executing the given command. Example: env_args = pgpassword=postgres othersetting=value~with~spaces """ pass
# -*- coding: utf-8 -*- # # Output classes for ETL, databases. # # Author: Just van den Broecke # from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT from stetl.component import Config from stetl.postgis import PostGIS log = Util.get_log('dboutput') class DbOutput(Output): """ Output to any database (abstract base class). """ def __init__(self, configdict, section, consumes): Output.__init__(self, configdict, section, consumes) def write(self, packet): return packet class PostgresDbOutput(DbOutput): """ Output to PostgreSQL database. Input is an SQL string.
#!/usr/bin/env python # # Splits stream of GML lines into etree docs. # # Author: Just van den Broecke # import codecs from deprecated.sphinx import deprecated from stetl.util import Util, etree, StringIO from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log('gmlsplitter') @deprecated( version='1.0.4', reason= 'Use the more robust XmlElementStreamerFileInput + XmlAssembler instead!!!' ) class GmlSplitter(Filter): """ Split a stream of text XML lines into documents TODO phase out consumes=FORMAT.xml_line_stream, produces=FORMAT.etree_doc """ def __init__(self, configdict, section='gml_splitter'): Filter.__init__(self, configdict, section,
#!/usr/bin/env python # # Converts Stetl Packet FORMATs. This can be used to connect # Stetl components with different output/input formats. # # Author:Just van den Broecke import json from stetl.component import Config from stetl.util import Util, etree from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("formatconverter") class FormatConverter(Filter): """ Converts (almost) any packet format (if converter available). consumes=FORMAT.any, produces=FORMAT.any but actual formats are changed at initialization based on the input to output format to be converted via the input_format and output_format config parameters. """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties. @Config(ptype=dict, default=None, required=False) def converter_args(self):
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Splits stream of XML elements into etree docs. # # Author: Just van den Broecke # from stetl.util import Util, etree from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log('xmlassembler') class XmlAssembler(Filter): """ Split a stream of etree DOM XML elements (usually Features) into etree DOM docs. Consumes and buffers elements until max_elements reached, will then produce an etree doc. consumes=FORMAT.etree_element_stream, produces=FORMAT.etree_doc """ xpath_base = "//*[local-name() = '%s']" # Constructor def __init__(self, configdict, section): Filter.__init__(self, configdict, section, consumes=FORMAT.etree_element_stream, produces=FORMAT.etree_doc)
# # Filter that prepares a GFS file which can be used to load with ogr2ogr. # # Author: Frank Steggink import os import re import subprocess from stetl.component import Config from stetl.filter import Filter from stetl.packet import FORMAT from stetl.util import Util, etree from string import Template log = Util.get_log("gfspreparationfilter") class GfsPreparationFilter(Filter): """ This filter prepares a GFS file, so any GML data will be loaded optimally with ogr2ogr. This is done by limiting the input GFS to only the feature types which actually occur in the data, and by adding feature count elements. """ XSLT_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes" omit-xml-declaration="yes"/> <xsl:strip-space elements="*"/> <xsl:template match="/ | @* | node()"> <xsl:copy> <xsl:apply-templates select="@* | node()" />
# -*- coding: utf-8 -*- # # Output classes for ETL, executing commands. # # Author: Frank Steggink # import subprocess import os import shutil from stetl.component import Config from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('execoutput') class ExecOutput(Output): """ Executes any command (abstract base class). """ def __init__(self, configdict, section, consumes): Output.__init__(self, configdict, section, consumes) def write(self, packet): return packet def execute_cmd(self, cmd): use_shell = True if os.name == 'nt': use_shell = False
# -*- coding: utf-8 -*- # # Example of user-defined component. # # Author:Just van den Broecke from stetl.util import Util from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("myfilter") class MyFilter(Filter): # Constructor def __init__(self, configdict, section): Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc) def invoke(self, packet): if packet.data is None: return packet return self.do_something(packet) def do_something(self, packet): log.info("CALLING MyFilter OK!!!!") data = packet.data self.show_data(data) return packet def show_data(self, data):
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Base classes for HTTP output like WFS-T and SOS-T or any other HTTP writing service. # # Author: Just van den Broecke # from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT from stetl.component import Config import httplib import base64 log = Util.get_log('httpoutput') class HttpOutput(Output): """ Output via HTTP protocol, usually via POST. consumes=FORMAT.any """ @Config(ptype=str, default=None, required=True) def host(self): """ The hostname/IP addr for target request. Required: True Default: None
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Input classes for ETL via GDAL OGR. # # Author: Just van den Broecke # import subprocess from stetl.util import Util from stetl.input import Input from stetl.packet import FORMAT log = Util.get_log("ogrinput") class OgrPostgisInput(Input): """ Input from PostGIS via ogr2ogr command. TODO: look to use Fiona or direct OGR via Python. produces=FORMAT.xml_line_stream """ # TODO make this template configurable so we can have generic ogr2ogr input.... pg_conn_tmpl = "PG:host=%s dbname=%s active_schema=%s user=%s password=%s port=%s" cmd_tmpl = "ogr2ogr|-t_srs|%s|-s_srs|%s|-f|GML|%s|-dsco|FORMAT=%s|-lco|DIM=%s|%s|-SQL|%s|-nln|%s|%s" # Constructor def __init__(self, configdict, section): Input.__init__(self, configdict, section, produces=FORMAT.xml_line_stream)
#!/usr/bin/env python # # Output classes for ETL. # # Author: Just van den Broecke # import subprocess import os import shutil from stetl.component import Config from stetl.output import Output from stetl.util import Util, gdal, ogr, osr from stetl.packet import FORMAT log = Util.get_log('ogroutput') class OgrOutput(Output): """ Direct GDAL OGR output via Python OGR wrapper. Via the Python API http://gdal.org/python OGR Features are written. This output can write almost any geospatial, OGR-defined, dataformat. consumes=FORMAT.ogr_feature or FORMAT.ogr_feature_array """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties.
# -*- coding: utf-8 -*- # # MeasurementsDbInput: Reads RIVM raw AQ/LML file data from measurements table and converts to recordlist # # Author:Just van den Broecke from stetl.util import Util, etree from stetl.inputs.dbinput import PostgresDbInput from stetl.packet import FORMAT from stetl.postgis import PostGIS from datetime import datetime log = Util.get_log("MeasurementsDbInput") class MeasurementsDbInput(PostgresDbInput): """ Reads RIVM raw AQ/LML file data from measurements table and converts to recordlist """ def __init__(self, configdict, section): PostgresDbInput.__init__(self, configdict, section) self.progress_query = self.cfg.get('progress_query') self.progress_update = self.cfg.get('progress_update') self.db = None def after_chain_invoke(self, packet): """ Called right after entire Component Chain invoke. Used to update last id of processed file record. """ log.info('Updating progress table with last_id= %d' % self.last_id)
# # Author: Pieter Marsman - 2016 import sys import traceback from stetl.component import Config from stetl.filter import Filter from stetl.inputs.dbinput import PostgresDbInput from stetl.packet import FORMAT from stetl.util import Util from dateutil import parser from sensordefs import * log = Util.get_log("Extractor") class ExtractFilter(Filter): """ Filter to consume single raw record with sensor (single hour) timeseries values and extract these for each component. Input is a single timeseries record for a single hour with all sensorvalues for a single device within that hour. """ @Config(ptype=list, default=[], required=True) def sensor_names(self): """ The output sensor names to extract. Required: True Default: []
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Filter: XML validation. # # NB: you need to have installed libxml2 2.8.0 or newer! # Older libxml2 versions like 2.7.8 have a bug which causes failure in GML Schema # parsing. See https://bugzilla.gnome.org/show_bug.cgi?id=630130 # # Author:Just van den Broecke # from stetl.util import Util, etree from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("xmlvalidator") class XmlSchemaValidator(Filter): """ Validates an etree doc and prints result to log. consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc """ # Constructor def __init__(self, configdict, section): Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc) self.enabled = self.cfg.get_bool('enabled', True) self.xsd = self.cfg.get('xsd') log.info("Building the Schema once with (GML XSD) dependencies for schema=%s (be patient...)" % self.xsd)
# -*- coding: utf-8 -*- # # Input classes for ETL, Files. # # Author: Just van den Broecke # from stetl.input import Input from stetl.util import Util, etree from stetl.packet import FORMAT log = Util.get_log('fileinput') class FileInput(Input): """ Abstract base class for specific FileInputs. """ def __init__(self, configdict, section, produces): Input.__init__(self, configdict, section, produces) # path to file or files: can be a dir or files or even multiple, comma separated self.file_path = self.cfg.get('file_path') # The filename pattern according to Python glob.glob self.filename_pattern = self.cfg.get('filename_pattern', '*.[gxGX][mM][lL]') # Recurse into directories ? self.depth_search = self.cfg.get_bool('depth_search', False) # Create the list of files to be used as input self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search)
# -*- coding: utf-8 -*- # # Input classes for ETL via HTTP. # # Author: Just van den Broecke # from stetl.input import Input from stetl.util import Util from stetl.packet import FORMAT import urllib import urllib2 log = Util.get_log('httpinput') class HttpInput(Input): """ Input via HTTP protocol. produces=FORMAT.any """ def __init__(self, configdict, section, produces=FORMAT.any): Input.__init__(self, configdict, section, produces) # url and optional parameters self.url = self.cfg.get('url') self.parameters = self.cfg.get('parameters') # http://docs.python.org/2/howto/urllib2.html
#!/usr/bin/env python # # Transformation of an etree doc with XSLT. # # Author:Just van den Broecke from stetl.component import Config from stetl.util import Util, etree from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("xsltfilter") class XsltFilter(Filter): """ Invokes XSLT processor (via lxml) for given XSLT script on an etree doc. consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc """ @Config(ptype=str, required=True) def script(self): """ Path to XSLT script file. """ pass # Constructor def __init__(self, configdict, section): Filter.__init__(self, configdict,
# Expands an archive file into a collection of files. # # Author: Just van den Broecke 2021 # import os.path from stetl.component import Config from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('archiveexpander') class ArchiveExpander(Filter): """ Abstract Base Class. Expands an archive file into a collection of files. consumes=FORMAT.string, produces=FORMAT.string """ # Start attribute config meta @Config(ptype=str, default='temp_dir', required=True) def target_dir(self): """ Target directory to write the extracted files to. """ pass @Config(ptype=bool, default=False, required=False)
# -*- coding: utf-8 -*- # # String filtering. # # Author:Just van den Broecke from stetl.component import Config from stetl.util import Util from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("stringfilter") class StringFilter(Filter): """ Base class for any string filtering """ # Constructor def __init__(self, configdict, section, consumes, produces): Filter.__init__(self, configdict, section, consumes, produces) def invoke(self, packet): if packet.data is None: return packet return self.filter_string(packet) def filter_string(self, packet): pass
# -*- coding: utf-8 -*- # # Input classes for ETL, databases. # # Author: Just van den Broecke # from stetl.component import Config from stetl.input import Input from stetl.util import Util from stetl.packet import FORMAT from stetl.postgis import PostGIS log = Util.get_log('dbinput') class DbInput(Input): """ Input from any database (abstract base class). """ def __init__(self, configdict, section, produces): Input.__init__(self, configdict, section, produces=produces) def read(self, packet): return packet class SqlDbInput(DbInput): """ Input using a query from any SQL-based RDBMS (abstract base class). """
# Output classes for ETL with SensorThings API. # # Author: Just van den Broecke # from os import path import requests import json import base64 from stetl.util import Util from stetl.packet import FORMAT from stetl.component import Config from stetl.outputs.httpoutput import HttpOutput log = Util.get_log('staoutput') class STAOutput(HttpOutput): """ Output via SensorThings API (STA) over plain HTTP using the HttpOutput base class. See examples: http://www.sensorup.com/docs/?python consumes=FORMAT.record_array """ @Config(ptype=str, default='application/json;charset=UTF-8', required=False) def content_type(self): """
#!/usr/bin/env python # -*- coding: utf-8 -*- # # POST data via WFS Transactional protocol (WFS-T). # # Author: Just van den Broecke # from stetl.component import Config from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT import httplib log = Util.get_log('wfsoutput') class WFSTOutput(Output): """ Insert features via WFS-T (WFS Transaction) OGC protocol from an etree doc. consumes=FORMAT.etree_doc """ # Start attribute config meta @Config(ptype=str, required=True, default=None) def wfs_host(self): """ Hostname-part of URL e.g. geodata.ngr.nl. """ pass
# -*- coding: utf-8 -*- # # MeasurementsDbInput: Reads SmartEm raw AQ/LML file data from measurements table and converts to recordlist # # Author:Just van den Broecke from stetl.util import Util, etree from stetl.inputs.dbinput import PostgresDbInput from stetl.packet import FORMAT from stetl.postgis import PostGIS from datetime import datetime log = Util.get_log("MeasurementsDbInput") class MeasurementsDbInput(PostgresDbInput): """ Reads SmartEm raw AQ/LML file data from measurements table and converts to recordlist """ def __init__(self, configdict, section): PostgresDbInput.__init__(self, configdict, section) self.progress_query = self.cfg.get('progress_query') self.progress_update = self.cfg.get('progress_update') self.db = None def after_chain_invoke(self, packet): """ Called right after entire Component Chain invoke. Used to update last id of processed file record. """
# -*- coding: utf-8 -*- # # Output classes for ETL. # # Author: Just van den Broecke # from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('standardoutput') # class StandardOutput(Output): """ Print any input to standard output. consumes=FORMAT.any """ def __init__(self, configdict, section): Output.__init__(self, configdict, section, consumes=FORMAT.any) def write(self, packet): if packet.data is None: return packet # Default: print to stdout print(packet.to_string()) return packet
# -*- coding: utf-8 -*- # # Writes the payload of a packet as a string to a file. # Based on outputs.fileoutput.FileOutput. # # Author: Frank Steggink # from stetl.component import Config from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT import os log = Util.get_log('packetwriter') class PacketWriter(Filter): """ Writes the payload of a packet as a string to a file. consumes=FORMAT.any, produces=FORMAT.string """ # Start attribute config meta @Config(ptype=str, default=None, required=True) def file_path(self): """ File path to write content to. """ pass
from stetl.component import Config from stetl.inputs.dbinput import PostgresDbInput from stetl.util import Util from smartem.sosinput import SosInput log = Util.get_log("RIVMSosInput") class RIVMSosInput(SosInput, PostgresDbInput): """ Specialized SOS Input for RIVM SOS, adds progress tracking. """ @Config(ptype=str, required=True) def progress_query(self): """ Query to fetch progress for feature Required: True """ def __init__(self, configdict, section): SosInput.__init__(self, configdict, section) PostgresDbInput.__init__(self, configdict, section) self.progress = dict() def init(self): SosInput.init(self) PostgresDbInput.init(self) progress_list = self.do_query(self.progress_query) for progress_row in progress_list:
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Output classes for ETL. # # Author: Just van den Broecke # from stetl.outputs.httpoutput import HttpOutput from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('sosoutput') class SOSTOutput(HttpOutput): """ Output via SOS-T protocol over plain HTTP. consumes=FORMAT.record """ def __init__(self, configdict, section): HttpOutput.__init__(self, configdict, section, consumes=FORMAT.record_array) self.content_type = self.cfg.get('content_type', 'application/json;charset=UTF-8') self.sos_request = self.cfg.get('sos_request', 'insert-observation') # Template file, to be used as POST body with substituted values self.template_file_ext = self.cfg.get('template_file_ext', 'json')
# Transformation of any input using Python Templating as # meant in: https://wiki.python.org/moin/Templating. # A TemplatingFilter typically is configured with a template file. # The input is typically the Template context, the variables to be substituted. # The output is a string passed to the next Filter or Output. # # Author:Just van den Broecke from stetl.util import Util, ogr, osr from stetl.component import Config from stetl.filter import Filter from stetl.packet import FORMAT from string import Template import os log = Util.get_log("templatingfilter") class TemplatingFilter(Filter): """ Abstract base class for specific template-based filters. See https://wiki.python.org/moin/Templating Subclasses implement a specific template language like Python string.Template, Mako, Genshi, Jinja2, consumes=FORMAT.any, produces=FORMAT.string """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties.
#!/usr/bin/env python # # Transformation of an etree doc with XSLT. # # Author:Just van den Broecke from stetl.component import Config from stetl.util import Util, etree from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("xsltfilter") class XsltFilter(Filter): """ Invokes XSLT processor (via lxml) for given XSLT script on an etree doc. consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc """ @Config(ptype=str, required=True) def script(self): """ Path to XSLT script file. """ pass # Constructor def __init__(self, configdict, section): Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc)
# -*- coding: utf-8 -*- # # Reads an XML file and returns XML elements. # Based on inputs.fileinput.XmlElementStreamFileInput. # # Author: Frank Steggink # from copy import deepcopy from stetl.component import Config from stetl.filter import Filter from stetl.util import Util, etree from stetl.packet import FORMAT log = Util.get_log('xmlelementreader') class XmlElementReader(Filter): """ Extracts XML elements from a file, outputs each feature element in Packet. Parsing is streaming (no internal DOM buildup) so any file size can be handled. Use this class for your big GML files! consumes=FORMAT.string, produces=FORMAT.etree_element """ # Start attribute config meta @Config(ptype=list, default=None, required=True) def element_tags(self): """ Comma-separated string of XML (feature) element tag names of the elements that should be extracted
# Filter that does noting, just passes any data through. # # Author:Just van den Broecke from stetl.util import Util from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("nullfilter") class NullFilter(Filter): """ Pass-through Filter, does nothing. Mainly used in Test Cases. """ # Constructor def __init__(self, configdict, section, consumes=FORMAT.any, produces=FORMAT.any): Filter.__init__(self, configdict, section, consumes, produces) def invoke(self, packet): return packet
#!/usr/bin/env python # # Input classes for ETL. # # Author: Just van den Broecke # import codecs import re from stetl.component import Config from stetl.postgis import PostGIS from stetl.input import Input from stetl.util import Util, etree, StringIO from stetl.packet import FORMAT log = Util.get_log('deegreeinput') class DeegreeBlobstoreInput(Input): """ Read features from deegree Blobstore DB into an etree doc. produces=FORMAT.etree_doc """ # Start attribute config meta @Config(ptype=int, required=False, default=10000) def max_features_per_doc(self): """ Max features to read from input feature GML stream per internal document.
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Output Components for deegree server storage (www.deegree.org). # # Author: Just van den Broecke # # NB deegree also supports WFS-T! # from stetl.postgis import PostGIS from stetl.output import Output from stetl.util import Util, etree from stetl.packet import FORMAT import os log = Util.get_log('deegreeoutput') class DeegreeBlobstoreOutput(Output): """ Insert features into deegree Blobstore from an etree doc. consumes=FORMAT.etree_doc """ def __init__(self, configdict, section): Output.__init__(self, configdict, section, consumes=FORMAT.etree_doc) self.overwrite = self.cfg.get_bool('overwrite') self.srid = self.cfg.get_int('srid', -1) self.feature_member_tag = self.cfg.get('feature_member_tag') self.feature_type_ids = {} def init(self):
# Transformation of any input using Python Templating as # meant in: https://wiki.python.org/moin/Templating. # A TemplatingFilter typically is configured with a template file. # The input is typically the Template context, the variables to be substituted. # The output is a string passed to the next Filter or Output. # # Author:Just van den Broecke import os from stetl.util import Util, ogr, osr from stetl.component import Config from stetl.filter import Filter from stetl.packet import FORMAT from string import Template log = Util.get_log("templatingfilter") class TemplatingFilter(Filter): """ Abstract base class for specific template-based filters. See https://wiki.python.org/moin/Templating Subclasses implement a specific template language like Python string.Template, Mako, Genshi, Jinja2, consumes=FORMAT.any, produces=FORMAT.string """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties.
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Converts Stetl Packet FORMATs. This can be used to connect # Stetl components with different output/input formats. # # Author:Just van den Broecke from stetl.component import Config from stetl.util import Util, etree from stetl.filter import Filter from stetl.packet import FORMAT import json log = Util.get_log("formatconverter") class FormatConverter(Filter): """ Converts (almost) any packet format (if converter available). consumes=FORMAT.any, produces=FORMAT.any but actual formats are changed at initialization based on the input to output format to be converted via the input_format and output_format config parameters. """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties. @Config(ptype=dict, default=None, required=False)
# # Filter to consume a raw record of Smart Emission data (one hour for one device) , refining these, producing records. # # Author: Just van den Broecke - 2015 import sys, traceback from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT from stetl.component import Config import pytz from sensordefs import * log = Util.get_log("RefineFilter") class RefineFilter(Filter): """ Filter to consume single raw record with sensor (single hour) timeseries values and produce refined record for each component. Refinement entails: calibration (e.g. Ohm to ug/m3) and aggregation (hour-values). Input is a single timeseries record for a single hour with all sensorvalues for a single device within that hour. """ @Config(ptype=list, default=[], required=True) def sensor_names(self): """ The output sensor names to refine. Required: True
#!/usr/bin/env python # # Extracts data from a string using a regular expression and generates a record. # # Author: Frank Steggink import re from stetl.component import Config from stetl.filter import Filter from stetl.packet import FORMAT from stetl.util import Util log = Util.get_log("regexfilter") class RegexFilter(Filter): """ Extracts data from a string using a regular expression and returns the named groups as a record. consumes=FORMAT.string, produces=FORMAT.record """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties. @Config(ptype=str, default=None, required=True) def pattern_string(self): """ Regex pattern string. Should contain named groups. """ pass
# # Filter that deals with subfeatures in BGT GML files. # # Author: Frank Steggink import os from copy import deepcopy # We need specifically lxml, because of the incremental XML generation from lxml import etree from stetl.component import Config from stetl.filter import Filter from stetl.packet import FORMAT from stetl.util import Util log = Util.get_log("subfeaturehandler") class SubFeatureHandler(Filter): """ This filter checks whether the data file contains the given parent features. If this is the case, the parent feature and subfeatures are split into different features. """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties. @Config(ptype=str, default=None, required=True) def temp_file(self): """
# Extracts a file from a ZIP file, and saves it as the given file name. # # Author: Frank Steggink # from stetl.component import Config from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('zipfileextractor') BUFFER_SIZE = 1024 * 1024 * 1024 class ZipFileExtractor(Filter): """ Extracts a file from a ZIP file, and saves it as the given file name. consumes=FORMAT.record, produces=FORMAT.string """ # Start attribute config meta @Config(ptype=str, default=None, required=True) def file_path(self): """ File name to write the extracted file to. """ pass @Config(ptype=bool, default=True, required=False) def delete_file(self):
# -*- coding: utf-8 -*- # # Filter that does noting, just passes any data through. # # Author:Just van den Broecke from stetl.util import Util from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("nullfilter") class NullFilter(Filter): """ Pass-through Filter, does nothing. Mainly used in Test Cases. """ # Constructor def __init__(self, configdict, section, consumes=FORMAT.any, produces=FORMAT.any): Filter.__init__(self, configdict, section, consumes, produces) def invoke(self, packet): return packet
# -*- coding: utf-8 -*- # # RawSensorInput: harvest raw timeseries from CityGIS Sensor REST API. # Use PostGIS DB to track progress of harvesting. # # Author:Just van den Broecke from stetl.util import Util from stetl.inputs.httpinput import HttpInput from stetl.packet import FORMAT from stetl.postgis import PostGIS log = Util.get_log("RawSensorInput") class RawSensorInput(HttpInput): """ Raw Sensor REST API (CityGIS) version for HttpInput: adds check for each file if it is already in DB. """ def __init__(self, configdict, section, produces=FORMAT.record): HttpInput.__init__(self, configdict, section, produces) self.query = self.cfg.get('query') self.db = None def init(self): # Connect only once to DB log.info('Init: connect to DB') self.db = PostGIS(self.cfg.get_dict()) self.db.connect() # Let superclass read file list from Apache URL
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Extracts data from a string using a regular expression and generates a record. # # Author: Frank Steggink from stetl.component import Config from stetl.filter import Filter from stetl.packet import FORMAT from stetl.util import Util import re log = Util.get_log("regexfilter") class RegexFilter(Filter): """ Extracts data from a string using a regular expression and returns the named groups as a record. consumes=FORMAT.string, produces=FORMAT.record """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide # read-only config values from the configured properties. @Config(ptype=str, default=None, required=True) def pattern_string(self): """ Regex pattern string. Should contain named groups. """
# -*- coding: utf-8 -*- # # RawSensorLastInput: fetch last raw values from CityGIS/Intemo Raw Sensor REST API. # # Author:Just van den Broecke import time from datetime import datetime, timedelta from stetl.component import Config from stetl.util import Util from stetl.packet import FORMAT from smartem.util.utc import zulu_to_gmt from smartem.rawsensorapi import RawSensorAPIInput log = Util.get_log("RawSensorAPI") class RawSensorLastInput(RawSensorAPIInput): """ Raw Sensor REST API (CityGIS) to fetch last values for all devices. """ @Config(ptype=list, default=[], required=True) def sensor_names(self): """ The output sensor names to refine. Required: True Default: [] """ pass
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Input classes for ETL via GDAL OGR. # # Author: Just van den Broecke # import subprocess from stetl.component import Config from stetl.util import Util, gdal, ogr from stetl.input import Input from stetl.packet import FORMAT log = Util.get_log('ogrinput') class OgrInput(Input): """ Direct GDAL OGR input via Python OGR wrapper. Via the Python API http://gdal.org/python an OGR data source is accessed and from each layer the Features are read. Each Layer corresponds to a "doc", so for multi-layer sources the 'end-of-doc' flag is set after a Layer has been read. This input can read almost any geospatial dataformat. One can use the features directly in a Stetl Filter or use a converter to e.g. convert to GeoJSON structures. produces=FORMAT.ogr_feature or FORMAT.ogr_feature_array (all features) """ # Start attribute config meta # Applying Decorator pattern with the Config class to provide
# -*- coding: utf-8 -*- # # Smart Emission DB input classes. # # Author: Just van den Broecke from stetl.component import Config from stetl.util import Util from stetl.inputs.dbinput import PostgresDbInput log = Util.get_log("SmartemDbInput") class RawDbInput(PostgresDbInput): """ Reads raw Smartem Harvested json data from timeseries table and converts to recordlist. """ @Config(ptype=str, required=True, default=None) def last_gid_query(self): """ The query (string) to fetch last gid that was processed. """ pass @Config(ptype=str, required=True, default=None) def gids_query(self): """ The query (string) to fetch all gid's (id's) to be processed. """ pass
# -*- coding: utf-8 -*- # # Output to File classes. # # Author: Just van den Broecke # from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT import os log = Util.get_log('fileoutput') class FileOutput(Output): """ Pretty print XML to file from an etree doc. consumes=FORMAT.etree_doc """ def __init__(self, configdict, section): Output.__init__(self, configdict, section, consumes=FORMAT.etree_doc) log.info("working dir %s" % os.getcwd()) def write(self, packet): if packet.data is None: return packet file_path = self.cfg.get('file_path') return self.write_file(packet, file_path)
# Packet buffering. # # Author:Just van den Broecke import copy from stetl.util import Util from stetl.filter import Filter from stetl.packet import FORMAT log = Util.get_log("packetbuffer") class PacketBuffer(Filter): """ Buffers all incoming Packets, main use is unit-testing to inspect Packets after ETL is done. """ # Constructor def __init__(self, configdict, section): Filter.__init__(self, configdict, section, consumes=FORMAT.any, produces=FORMAT.any) self.packet_list = [] def invoke(self, packet): # Buffer Packet and pass-through, we need a deep copy as Packets may be cleared/reused self.packet_list.append(copy.copy(packet)) return packet
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Output classes for ETL. # # Author: Just van den Broecke # from os import sys, path from stetl.outputs.httpoutput import HttpOutput from stetl.util import Util from stetl.packet import FORMAT from stetl.component import Config log = Util.get_log('sosoutput') class SOSTOutput(HttpOutput): """ Output via SOS-T protocol over plain HTTP. consumes=FORMAT.record_array """ @Config(ptype=str, default='application/json;charset=UTF-8', required=True) def content_type(self): """ The content type (for template). Required: True Default: application/json;charset=UTF-8 """ pass
# -*- coding: utf-8 -*- # # Extracts a file from a ZIP file, and saves it as the given file name. # # Author: Frank Steggink # from stetl.component import Config from stetl.filter import Filter from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('zipfileextractor') BUFFER_SIZE = 1024 * 1024 * 1024 class ZipFileExtractor(Filter): """ Extracts a file from a ZIP file, and saves it as the given file name. consumes=FORMAT.record, produces=FORMAT.string """ # Start attribute config meta @Config(ptype=str, default=None, required=True) def file_path(self): """ File name to write the extracted file to. """ pass
# Output classes for ETL, executing commands. # # Author: Frank Steggink # import subprocess import os import shutil from stetl.component import Config from stetl.output import Output from stetl.util import Util from stetl.packet import FORMAT log = Util.get_log('execoutput') class ExecOutput(Output): """ Executes any command (abstract base class). """ @Config(ptype=str, default='', required=False) def env_args(self): """ Provides of list of environment variables which will be used when executing the given command. Example: env_args = pgpassword=postgres othersetting=value~with~spaces """ pass @Config(ptype=str, default='=', required=False) def env_separator(self):