#!/usr/bin/env python3 import src.core.data_utils as du import src.core.error_utils as eu import requests import time egauge_error = eu.error_template("`egauge` data-acquisition step") # primary entry point for scrape of egauges. # Returns any acquired data & updated nonce. def acquire(project,config,state): starts,stops = setup_times(project,config,state) gauges = config['gauges'] nonce = {k:v for k,v in starts.items()} data = [] fltr = lambda r : r.timestamp for gid in gauges: print('querying egauge: {}...'.format(gid)) raw = query(gauges[gid],starts[gid],stops[gid]) if not raw: continue rows = fmt_query(gid,raw) if not rows: continue fltr = lambda r: r.timestamp nonce[gid] = max(rows,key=fltr).timestamp data += rows print('egauge queries complete...\n') if 'filter' in config: data = run_filters(config['filter'],data) state['nonce'] = nonce
#!/usr/bin/env python3 from src.core.error_utils import error_template import src.core.data_utils as du from collections import namedtuple import time # Defines order of # execution in reshape runtime. ORD = 2 field_error = error_template('`field` based data-reshaping step') # primary entry point. def reshape(project,config,state,data): mkerr = field_error('attempting to run declared sub-steps') settings = config.get('settings',{}) default = ["modify","generate"] declared = [k for k in config if not k == 'settings'] for step in declared: if not step in default: error = mkerr('unrecognized sub-step section: ' + step) raise Exception(error) order = settings.get('in-order',default) rows = [r for r in data] for step in order: if not step in declared: continue if step == 'modify': state,rows = run_modifications(project,config,state,rows) elif step == 'generate': state,rows = run_generators(project,config,state,rows) else:
#!/usr/bin/env python3 from src.core.error_utils import errdata, error_template import src.core.data_utils as du import src.core.file_utils as fu # Determines order of application # during mulit-phase reshaping process. ORD = 0 reshape_error = error_template('`value` based data-reshaping step') # reshape : proj -> conf -> state -> data -> (state,data) def reshape(project,config,state,data): settings = config['settings'] actions = ['filter','generate','replace'] if 'in-order' in settings: order = settings['in-order'] else: order = [a for a in actions if a in config] errors = [] for action in order: substate = state[action] if action in state else {} if action == 'filter': substate,data = run_filters(project,config,substate,data) elif action == 'generate': substate,data = run_generators(project,config,substate,data) elif action == 'replace': substate,data = run_replacements(project,config,substate,data) else: raise Exception('unknown action: ' + action) if substate: state[action] = substate else: state.pop(action,None)
#!/usr/bin/env python3 from src.core.data_utils import Row, get_uid_generator, check_config, make_time_specs from src.core.error_utils import error_template import requests import datetime # an experimental shortcut to nicer errors :) webctrl_error = error_template('`webctrl` data-acquisition step') # Primary entry point for webctrl scrape. # Returns data & updated state. def acquire(project, config, state, start_time, end_time): settings = config['settings'] # check that config is valid, parse sensor # parameters, and generate time specifications # for all active sensors. params, times = setup(project, config, state) # generate a wrapper around `exec_query` with # user-supplied configurations pre-applied. query = new_query(config['settings'], start_time, end_time) # initialize collectors for formatted # data and the new `nonce` values. nonce, buffs, data = {}, {}, [] # iteratively scrape all sensors. for uid, spec in params.items(): # break out the webctrl-path and identity values from `spec`. path, *ident = (spec[k] for k in ('path', 'node', 'name', 'unit')) # pull start-time and maximum step from `times`. start, step = times[uid]['init'], times[uid]['step'] # get the buffer if it exists (default to empty list).
#!/usr/bin/env python3 from src.core.data_utils import Row from src.core.error_utils import error_template,mklog import src.core.file_utils as fu import src.core.pgrm_utils as pu import time import os.path as path import os static_error = error_template('`static` data-acquisition step') # primary entrr point: project -> config -> state -> (state,data) def acquire(project,config,state): print('running `static` data-acquisition method...\n') # get contents of the `settings` field; defaults to empty dict. settings = config.get('settings',{}) # get the list of parser specifications. parsers = config['parser'] # quick dir formatter; appends '/' if needed, but passes `None`s. dir_fmt = lambda d: (d if d.endswith('/') else d + '/') if d else d # generate the default directory to find files to parse. src_default = settings.get('source','tmp/inputs/{}/'.format(project)) # generate the default directory to move successfully parsed files to. fmt_default = settings.get('on-fmt','tmp/archive/{}/static/'.format(project)) # generate the default directory to move unsuccessfully parsed files to. err_default = settings.get('on-err','tmp/errors/{}/static/'.format(project)) # default directory to save data from parsed files individually raw_default = settings.get('on-raw',False) # an integer representing the current time; this is used by the # `save_raw` function to ensure that files from different sources # are all sorted by the same timestamp.
import src.core.error_utils as error_utils from collections import namedtuple import time import toml import os.path as path import os import sys import csv # Default data type to be returned by all data-acquisition # scripts. Key requirement for interoperability between # various steps/components ( esp. acquisition & reshaping ). Row = namedtuple('row', ['node', 'name', 'unit', 'timestamp', 'value']) # error template for errors relating to `data_utils` data_error = error_utils.error_template('`data_utils`') def fmt_string(target): target = str(target).strip() elements = [e for e in target.split(' ') if e] formatted = '-'.join(elements).lower() return formatted def row_generator(node, name, unit): node = fmt_string(node) name = fmt_string(name) unit = fmt_string(unit) gen = lambda t, v: Row(node, name, unit, float(t), float(v)) return gen