def classify(self): log.debug(u'Classifying beekeeper fields {fieldnames}', fieldnames=self.fieldnames) # TODO: Can we account for multiple occurrences of "weightX" fields for mapping more than one scale? weight_synonyms = u'(weight|wght|gewicht)' temperature_synonyms = u'(temperature|temp|temperatur)' outside_synonyms = u'(outside|out|air|außen|aussen)' candidates = { 'weight_total': [ self.from_words(weight_synonyms, 'total', exclude=['stddev']), self.from_words(weight_synonyms, exclude=['stddev']), ], 'temperature_outside': [ self.from_words(temperature_synonyms, outside_synonyms), self.from_words(temperature_synonyms), ], 'temperature_inside': [ self.from_words(temperature_synonyms, 'inside'), self.from_words(temperature_synonyms), ], } #pprint(candidates) results = SmartBunch() for name, patterns in candidates.items(): fieldname = self.find_match(patterns) if fieldname is not None: results[name] = fieldname log.info(u'Classified beekeeper fields "{fields}" from "{fieldnames}"', fields=results.dump(), fieldnames=self.fieldnames) return results
def topic_to_topology(self, topic): """ Decode MQTT topic segments implementing the »basic strategy«. The topology hierarchy is directly specified by the MQTT topic and is made up of a two path segments:: realm / node The topology identifiers are specified as: - "realm" is the designated root realm. You should prefix the topic name with this label when opting in for all features of the telemetry platform. For other purposes, feel free to publish to any MQTT topic you like. - "node" is the node identifier. Choose anything you like. This usually gets transmitted from an embedded device node. """ # decode the topic m = self.matcher.match(topic) if m: address = SmartBunch(m.groupdict()) else: address = {} return address
def topology_to_storage(self, topology, message_type=None): """ Encode topology segment identifiers to database address. A database server usually has the concept of multiple databases, each with multiple tables. With other databases than RDBMS, they might be named differently, but the concept in general is the same. The topology information (realm, node) will get mapped to the database name and measurement name to compute the storage location for measurements. - realm + node = database name - sensors | events = table name """ # Derive database table suffix from message type. table_suffix = self.get_table_suffix(topology, message_type) # Use topology information as blueprint for storage address. storage = SmartBunch(topology) # Format and sanitize all input parameters used for database addressing. # Todo: Investigate using tags additionally to / instead of only "storage.measurement". sanitize = self.sanitize_db_identifier storage.label = sanitize('{}'.format(storage.node)) storage.database = sanitize('{}_{}'.format(storage.realm, storage.node)) storage.measurement = sanitize('{}'.format(table_suffix)) storage.measurement_events = sanitize('{}'.format('events')) return storage
def topology_to_storage(self, topology): """ Encode topology segment identifiers to database address. A database server usually has the concept of multiple databases, each with multiple tables. With other databases than RDBMS, they might be named differently, but the concept in general is the same. When mapping the topology quadruple (realm, network, gateway, node) in the form of: - realm + network = database name - gateway + node = table name We have a perfect fit for computing the slot where to store the measurements. """ # TODO: Investigate using tags additionally to / instead of database.measurement # data: Regular endpoint # loop: WeeWX (TODO: Move to specific vendor configuration.) if topology.slot.startswith('data') or topology.slot.startswith('loop'): suffix = 'sensors' elif topology.slot.startswith('event'): suffix = 'events' else: suffix = 'unknown' # Use topology information as blueprint for storage address storage = SmartBunch(topology) # Format and sanitize all input parameters used for database addressing sanitize = self.sanitize_db_identifier storage.label = sanitize('{}-{}'.format(storage.gateway, storage.node)) storage.database = sanitize('{}_{}'.format(storage.realm, storage.network)) storage.measurement = sanitize('{}_{}_{}'.format(storage.gateway, storage.node, suffix)) storage.measurement_events = sanitize('{}_{}_{}'.format(storage.gateway, storage.node, 'events')) return storage
def topic_to_topology(self, topic): """ Decode MQTT topic segments implementing the »quadruple hierarchy strategy«. The topology hierarchy is directly specified by the MQTT topic and is made up of a minimum of four identifiers describing the core structure:: realm / network / gateway / node The topology identifiers are specified as: - "realm" is the designated root realm. You should prefix the topic name with this label when opting in for all features of the telemetry platform. For other purposes, feel free to publish to any MQTT topic you like. - "network" is your personal realm. Choose anything you like or use an `Online GUID Generator <https://www.guidgenerator.com/>`_ to gain maximum uniqueness. - "gateway" is your gateway identifier. Choose anything you like. This does not have to be very unique, so you might use labels having the names of sites. While you are the owner of this namespace hierarchy, remember these labels might be visible on the collaborative ether, though. So the best thing would be to give kind of nicknames to your sites which don't identify their location. - "1" is your node identifier. Choose anything you like. This usually gets transmitted from an embedded device node. Remember one device node might have multiple sensors attached, which is beyond the scope of the collection platform: We just accept bunches of named measurement values, no matter which sensors they might originate from. In other words: We don't need nor favor numeric sensor identifiers, let's give them names! """ # regular expression pattern for decoding MQTT topic address segments pattern = r'^(?P<realm>.+?)/(?P<network>.+?)/(?P<gateway>.+?)/(?P<node>.+?)(?:/(?P<slot>.+?))?$' # decode the topic p = re.compile(pattern) m = p.match(topic) if m: address = SmartBunch(m.groupdict()) else: address = {} return address
def get_dashboard_identity(self, storage_location, topology=None): # Compute effective topology information topology = topology or {} realm = topology.get('realm', 'default') network = topology.get('network', storage_location.database) # Derive dashboard uid and name from topology information identity = SmartBunch( uid=u'{realm}-{network}-instant'.format(realm=realm, network=network), name=u'{realm}-{network}'.format(realm=realm, network=network), title=u'{realm}-{network}'.format(realm=realm, network=network), # TODO: Use real title after fully upgrading to new Grafana API (i.e. don't use get-by-slug anymore!) #title=u'Hiveeyes Rohdaten im Netzwerk ' + network, ) #print identity.prettify() return identity
def get_dashboard_identity(self, storage_location, topology=None): # Compute effective topology information topology = topology or {} realm = topology.get('realm', 'default') network = topology.get('network', storage_location.database) # Derive dashboard uid and name from topology information nodename = u'{gateway} / {node}'.format(gateway=topology.gateway, node=topology.node) identity = SmartBunch( #uid=u'{realm}-{network}-instant'.format(realm=realm, network=network), name=self.strategy.topology_to_label(topology), title=self.strategy.topology_to_label(topology), # TODO: Use real title after fully upgrading to new Grafana API (i.e. don't use get-by-slug anymore!) # title=u'Hiveeyes Umweltcockpit für Meßknoten {nodename} im Netzwerk {network}'.format(nodename=nodename, network=network), # title=u'Hiveeyes Ertragscockpit für Meßknoten {nodename} im Netzwerk {network}'.format(nodename=nodename, network=network), ) #print identity.prettify() return identity
def topology_to_storage(self, topology): """ Encode topology segment identifiers to database address. A database server usually has the concept of multiple databases, each with multiple tables. With other databases than RDBMS, they might be named differently, but the concept in general is the same. When mapping the topology quadruple (realm, network, gateway, node) in the form of: - realm + network = database name - gateway + node = table name We have a perfect fit for computing the slot where to store the measurements. """ # Todo: Investigate using tags additionally to / instead of database.measurement # Todo: Move specific stuff about WeeWX or Tasmota to some device-specific knowledgebase. # data: Regular endpoint # loop: WeeWX # SENSOR: Sonoff-Tasmota if topology.slot.startswith('data') or topology.slot.startswith('loop') \ or topology.slot.endswith('SENSOR') or topology.slot.endswith('STATE'): suffix = 'sensors' elif topology.slot.startswith('event'): suffix = 'events' else: suffix = 'unknown' # Use topology information as blueprint for storage address storage = SmartBunch(topology) # Format and sanitize all input parameters used for database addressing sanitize = self.sanitize_db_identifier storage.label = sanitize('{}-{}'.format(storage.gateway, storage.node)) storage.database = sanitize('{}_{}'.format(storage.realm, storage.network)) storage.measurement = sanitize('{}_{}_{}'.format( storage.gateway, storage.node, suffix)) storage.measurement_events = sanitize('{}_{}_{}'.format( storage.gateway, storage.node, 'events')) return storage
def get_dashboard_identity(self, storage_location, topology=None): # Compute effective topology information topology = topology or {} realm = topology.get('realm', 'default') if 'network' in topology: name = topology.network else: name = topology.node # Derive dashboard uid and name from topology information identity = SmartBunch( #uid=u'{realm}-{name}-instant'.format(realm=realm, name=name), name=u'{realm}-{name}'.format(realm=realm, name=name), title=u'{realm}-{name}'.format(realm=realm, name=name), # TODO: Use real title after fully upgrading to new Grafana API (i.e. don't use get-by-slug anymore!) #title=u'Raw data for realm={realm} network={network}'.format(realm=realm, network=network), ) #print identity.prettify() return identity
def topic_to_topology(self, topic): """ Decode MQTT topic segments implementing the »quadruple hierarchy strategy«. The topology hierarchy is directly specified by the MQTT topic and is made up of a minimum of four identifiers describing the core structure:: realm / network / gateway / node The topology identifiers are specified as: - "realm" is the designated root realm. You should prefix the topic name with this label when opting in for all features of the telemetry platform. For other purposes, feel free to publish to any MQTT topic you like. - "network" is your personal realm. Choose anything you like or use an `Online GUID Generator <https://www.guidgenerator.com/>`_ to gain maximum uniqueness. - "gateway" is your gateway identifier. Choose anything you like. This does not have to be very unique, so you might use labels having the names of sites. While you are the owner of this namespace hierarchy, remember these labels might be visible on the collaborative ether, though. You might want to assign nicknames to your sites to not identify their location. - "node" is the node identifier. Choose anything you like. This usually gets transmitted from an embedded device node. """ # Decode the topic. m = self.matcher.match(topic) if m: address = SmartBunch(m.groupdict()) else: address = {} return address
def tame_refresh_interval(self, preset='standard', force=False): """ Tame refresh interval for all dashboards. :param mode: Which taming preset to use. Currently, only "standard" is implemented, which is also the default preset. Introduction ------------ The default dashboard refresh interval of 5 seconds is important for instant-on workbench operations. However, the update interval is usually just about 5 minutes after the sensor node is in the field. Problem ------- Having high refresh rates on many dashboards can increase the overall system usage significantly, depending on how many users are displaying them in their browsers and the complexity of the database queries issued when rendering the dashboard. Solution -------- In order to reduce the overall load on the data acquisition system, the refresh interval of dashboards not updated since a configurable threshold time is decreased according to rules of built-in presets. The default "standard" preset currently implements the following rules: - Leave all dashboards completely untouched which have been updated during the last 14 days - Apply a refresh interval of 5 minutes for all dashboards having the "live" tag - Completely disable refreshing for all dashboards having the "historical" tag - Apply a refresh interval of 30 minutes for all other dashboards """ dashboard_list = self.grafana_api.get_dashboards() log.info( 'Taming dashboard refresh interval with preset="{preset}" for {count} dashboards', preset=preset, count=len(dashboard_list)) # Date of 14 days in the past before_14_days = arrow.utcnow().shift(days=-14) for dashboard_meta in dashboard_list: dashboard_meta = SmartBunch.bunchify(dashboard_meta) #print dashboard_meta.prettify() whoami = u'title="{title}", uid="{uid}"'.format( title=dashboard_meta['title'], uid=dashboard_meta['uid']) # Request dashboard by uid dashboard_uid = dashboard_meta['uid'] response = self.grafana_api.get_dashboard_by_uid(dashboard_uid) response = SmartBunch.bunchify(response) # Get effective dashboard information from response folder_id = response.meta.folderId dashboard = response.dashboard # Compute new dashboard refresh interval by applying taming rules # Units: Mwdhmsy # 1. Check dashboard modification time against threshold modification_time = arrow.get(response.meta.updated) if not force and modification_time > before_14_days: log.debug( 'Skip taming dashboard with {whoami}, it has recently been modified', whoami=whoami) continue # 2. Apply refresh interval by looking at the dashboard tags if 'live' in dashboard_meta.tags: refresh_interval = '5m' elif 'historical' in dashboard_meta.tags: refresh_interval = None else: refresh_interval = '30m' # Skip update procedure if refresh interval hasn't changed at all if refresh_interval == dashboard.refresh: continue # Set new refresh interval dashboard.refresh = refresh_interval # Update dashboard log.debug( 'Taming dashboard with {whoami} to refresh interval of {interval}', whoami=whoami, interval=refresh_interval) response = self.grafana_api.grafana_client.dashboards.db.create( dashboard=dashboard, folderId=folder_id) # Report about the outcome if response['status'] == 'success': log.info('Successfully tamed dashboard with {whoami}', whoami=whoami) else: log.warn('Failed taming dashboard with {whoami}', whoami=whoami)
def tame_refresh_interval(self, preset='standard', force=False): """ Tame refresh interval for all dashboards. :param mode: Which taming preset to use. Currently, only "standard" is implemented, which is also the default preset. Introduction ------------ The default dashboard refresh interval of 5 seconds is important for instant-on workbench operations. However, the update interval is usually just about 5 minutes after the sensor node is in the field. Problem ------- Having high refresh rates on many dashboards can increase the overall system usage significantly, depending on how many users are displaying them in their browsers and the complexity of the database queries issued when rendering the dashboard. Solution -------- In order to reduce the overall load on the data acquisition system, the refresh interval of dashboards not updated since a configurable threshold time is decreased according to rules of built-in presets. The default "standard" preset currently implements the following rules: - Leave all dashboards completely untouched which have been updated during the last 14 days - Apply a refresh interval of 5 minutes for all dashboards having the "live" tag - Completely disable refreshing for all dashboards having the "historical" tag - Apply a refresh interval of 30 minutes for all other dashboards """ dashboard_list = self.grafana_api.get_dashboards() log.info('Taming dashboard refresh interval with preset="{preset}" for {count} dashboards', preset=preset, count=len(dashboard_list)) # Date of 14 days in the past before_14_days = arrow.utcnow().shift(days=-14) for dashboard_meta in dashboard_list: dashboard_meta = SmartBunch.bunchify(dashboard_meta) #print dashboard_meta.prettify() whoami = u'title="{title}", uid="{uid}"'.format(title=dashboard_meta['title'], uid=dashboard_meta['uid']) # Request dashboard by uid dashboard_uid = dashboard_meta['uid'] response = self.grafana_api.get_dashboard_by_uid(dashboard_uid) response = SmartBunch.bunchify(response) # Get effective dashboard information from response folder_id = response.meta.folderId dashboard = response.dashboard # Compute new dashboard refresh interval by applying taming rules # Units: Mwdhmsy # 1. Check dashboard modification time against threshold modification_time = arrow.get(response.meta.updated) if not force and modification_time > before_14_days: log.debug('Skip taming dashboard with {whoami}, it has recently been modified', whoami=whoami) continue # 2. Apply refresh interval by looking at the dashboard tags if 'live' in dashboard_meta.tags: refresh_interval = '5m' elif 'historical' in dashboard_meta.tags: refresh_interval = None else: refresh_interval = '30m' # Skip update procedure if refresh interval hasn't changed at all if refresh_interval == dashboard.refresh: continue # Set new refresh interval dashboard.refresh = refresh_interval # Update dashboard log.debug('Taming dashboard with {whoami} to refresh interval of {interval}', whoami=whoami, interval=refresh_interval) response = self.grafana_api.grafana_client.dashboards.db.create(dashboard=dashboard, folderId=folder_id) # Report about the outcome if response['status'] == 'success': log.info('Successfully tamed dashboard with {whoami}', whoami=whoami) else: log.warn('Failed taming dashboard with {whoami}', whoami=whoami)