def _apply_units_to_numpy_data_readers(parameters, data): """ Apply units to data originally loaded by :class:`NumPyLoadTxtReader` or :class:`NumPyGenFromTxtReader`. :param parameters: Dictionary of data source parameters read from JSON file. :type parameters: dict :param data: Dictionary of data read """ # apply header units header_param = parameters.get('header') # default is None # check for headers if header_param: fields = header_param['fields'] # header fields # dictionary of header field parameters header_fields = {field[0]: field[1:] for field in fields} # loop over fieldnames for k, val in header_fields.iteritems(): # check for units in header field parameters if len(val) > 1: data[k] *= UREG(str(val[1])) # apply units # apply other data units data_units = parameters['data'].get('units') # default is None if data_units: for k, val in data_units.iteritems(): data[k] *= UREG(str(val)) # apply units return data
def test_hdf5_reader(): """ Test :class:`carousel.contrib.readers.HDF5Reader` :return: readers and data """ setup_hdf5_test_data() # test 1: load data from hdf5 dataset array by node params = { 'GHI': { 'units': 'W/m**2', 'extras': { 'node': '/data/GHI' } }, 'DNI': { 'units': 'W/m**2', 'extras': { 'node': '/data/DNI' } }, 'Tdry': { 'units': 'degC', 'extras': { 'node': '/data/Tdry' } } } reader1 = HDF5Reader(params) assert isinstance(reader1, DataReader) data1 = reader1.load_data(H5TEST1) assert np.allclose(data1['GHI'], H5TABLE['GlobalHorizontalRadiation']) assert data1['GHI'].units == UREG('W/m**2') assert np.allclose(data1['DNI'], H5TABLE['DirectNormalRadiation']) assert data1['DNI'].units == UREG('W/m**2') assert np.allclose(data1['Tdry'], H5TABLE['DryBulbTemperature']) assert data1['Tdry'].units == UREG.degC # test 2: load data from hdf5 dataset table by node and member name params['GHI']['extras']['node'] = 'data' params['GHI']['extras']['member'] = 'GlobalHorizontalRadiation' params['DNI']['extras']['node'] = 'data' params['DNI']['extras']['member'] = 'DirectNormalRadiation' params['Tdry']['extras']['node'] = 'data' params['Tdry']['extras']['member'] = 'DryBulbTemperature' reader2 = HDF5Reader(params) assert isinstance(reader1, DataReader) data2 = reader2.load_data(H5TEST2) assert np.allclose(data2['GHI'], H5TABLE['GlobalHorizontalRadiation']) assert data1['GHI'].units == UREG('W/m**2') assert np.allclose(data2['DNI'], H5TABLE['DirectNormalRadiation']) assert data1['DNI'].units == UREG('W/m**2') assert np.allclose(data2['Tdry'], H5TABLE['DryBulbTemperature']) assert data1['Tdry'].units == UREG.degC teardown_hdf5_test_data() return reader1, data1, reader2, data2
def load_data(self, filename, *args, **kwargs): """ Load text data from different sheets. """ # load text data data = super(MixedTextXLS, self).load_data(filename) # iterate through sheets in parameters for sheet_params in self.parameters.itervalues(): # iterate through the parameters on each sheet for param, pval in sheet_params.iteritems(): pattern = pval.get('pattern', EFG_PATTERN) # get pattern re_meth = pval.get('method', 'search') # get re method # whitelist re methods, getattr could be considered harmful if re_meth in RE_METH: re_meth = getattr(re, pval.get('method', 'search')) else: msg = 'Only', '"%s", ' * len(RE_METH) % tuple(RE_METH) msg += 'regex methods are allowed.' raise AttributeError(msg) # if not isinstance(data[param], basestring): # re_meth = lambda p, dp: [re_meth(p, d) for d in dp] match = re_meth(pattern, data[param]) # get matches if match: try: match = match.groups() except AttributeError: match = [m.groups() for m in match] npdata = np.array(match, dtype=float).squeeze() data[param] = npdata * UREG(str(pval.get('units') or '')) else: raise MixedTextNoMatchError(re_meth, pattern, data[param]) return data
def _apply_units(data_data, data_units, fname): """ Apply units to data. :param data_data: NumPy structured array with data from fname. :type data_data: :class:`numpy.ndarray` :param data_units: Units of fields in data_data. :type data_units: dict :param fname: Name of file from which data_data was read. :type fname: str :returns: Dictionary of data with units applied. :rtype: dict :raises: :exc:`~carousel.core.exceptions.UnnamedDataError` """ data_names = data_data.dtype.names # raise error if NumPy data doesn't have names if not data_names: raise UnnamedDataError(fname) data = dict.fromkeys(data_names) # dictionary of data read by NumPy # iterate over data read by NumPy for data_name in data_names: if data_name in data_units: # if units specified in parameters, then convert to string units = str(data_units[data_name]) data[data_name] = data_data[data_name] * UREG(units) elif np.issubdtype(data_data[data_name].dtype, str): # if no units specified and is string data[data_name] = data_data[data_name].tolist() else: data[data_name] = data_data[data_name] return data
def __init__(self): #: outputs initial value self.initial_value = {} #: size of outputs self.size = {} #: outputs uncertainty self.uncertainty = {} #: variance self.variance = {} #: jacobian self.jacobian = {} #: outputs isconstant flag self.isconstant = {} #: outputs isproperty flag self.isproperty = {} #: name of corresponding time series, ``None`` if no time series self.timeseries = {} #: name of :class:`Output` superclass self.output_source = {} #: calculation outputs self.outputs = {} for k, v in self.parameters.iteritems(): self.initial_value[k] = v.get('init') # returns None if missing self.size[k] = v.get('size') or 1 # minimum size is 1 self.uncertainty[k] = None # uncertainty for outputs is calculated self.isconstant[k] = v.get('isconstant', False) # True or False self.isproperty[k] = v.get('isproperty', False) # True or False units = str(v.get('units', '')) # default is non-dimensional # NOTE: np.empty is faster than zeros! self.outputs[k] = Q_(np.zeros((1, self.size[k])), UREG(units)) # NOTE: Initial values are assigned and outputs resized when # simulation "start" method is called from the model. self.timeseries[k] = v.get('timeseries') # None if not time series self.output_source[k] = self.__class__.__name__ # output source
def apply_units_to_cache(self, data): """ Apply units to :class:`ParameterizedXLS` data reader. """ # parameter parameter_name = self.parameters['parameter']['name'] parameter_units = str(self.parameters['parameter']['units']) data[parameter_name] *= UREG(parameter_units) # data self.parameters.pop('parameter') return super(ParameterizedXLS, self).apply_units_to_cache(data)
def test_lazy_loop_calculator_cls(): """Test the lazy loop calculator class.""" calc = { 'formula': 'pythagorian_thm', 'args': { 'data': { 'adjacent': 'a', 'opposite': 'b' }, 'outputs': {} }, 'returns': ['c'] } formula_reg = FormulaRegistry() formula_reg.register( {'pythagorian_thm': UREG.wraps(*PYTHAGOREAN_UNITS)(f_pythagorian_thm)}, args={'pythagorian_thm': ['adjacent', 'opposite']}, units={'pythagorian_thm': PYTHAGOREAN_UNITS}, isconstant={'pythagorian_thm': None}) data_reg = DataRegistry() data_reg.register( { 'a': [3., 5., 7., 9., 11.] * UREG('cm'), 'b': [4., 12., 24., 40., 60.] * UREG('cm') }, uncertainty=None, variance=None, isconstant={ 'a': True, 'b': True }) out_reg = OutputRegistry() out_reg.register({'c': np.zeros(5) * UREG.m}) # repeat args are listed as formula names, not data reg names! calculator = LazyLoopingCalculator(repeat_args=['adjacent', 'opposite']) calculator.calculate(calc, formula_reg, data_reg, out_reg) assert np.allclose(out_reg['c'].m, PYTHAGOREAN_TRIPLES) # check magnitudes assert out_reg['c'].u == UREG.m # output units are meters return out_reg
def load_data(self, filename, *args, **kwargs): """ Load parameterized data from different sheets. """ # load parameterized data data = super(ParameterizedXLS, self).load_data(filename) # add parameter to data parameter_name = self.parameterization['parameter']['name'] parameter_values = self.parameterization['parameter']['values'] parameter_units = str(self.parameterization['parameter']['units']) data[parameter_name] = parameter_values * UREG(parameter_units) # number of sheets num_sheets = len(self.parameterization['parameter']['sheets']) # parse and concatenate parameterized data for key in self.parameterization['data']: units = str(self.parameterization['data'][key].get('units')) or '' datalist = [] for n in xrange(num_sheets): k = key + '_' + str(n) datalist.append(data[k].reshape((1, -1))) data.pop(k) # remove unused data keys data[key] = np.concatenate(datalist, axis=0) * UREG(units) return data
def apply_units_to_cache(self, data): """ Apply units to cached data read using :class:`JSONReader`. :param data: Cached data. :type data: dict :return: data with units """ # iterate through sheets in parameters # iterate through the parameters on each sheet for param, pval in self.parameters.iteritems(): # try to apply units try: data[param] *= UREG(str(pval.get('units') or '')) except TypeError: continue return data
def __init__(self): meta = getattr(self, CalcBase._meta_attr) parameters = getattr(self, CalcBase._param_attr) #: ``True`` if always calculated (day and night) self.always_calc = dict.fromkeys( parameters, getattr(meta, 'always_calc', False) ) freq = getattr(meta, 'frequency', [1, '']) #: frequency calculation is calculated in intervals or units of time self.frequency = dict.fromkeys(parameters, freq[0] * UREG(str(freq[1]))) #: dependencies self.dependencies = dict.fromkeys( parameters, getattr(meta, 'dependencies', []) ) #: name of :class:`Calc` superclass self.calc_source = dict.fromkeys(parameters, self.__class__.__name__) #: calculator self.calculator = dict.fromkeys( parameters, getattr(meta, 'calculator', Calculator) ) #: ``True`` if calculations are dynamic, ``False`` if static self.is_dynamic = dict.fromkeys( parameters, getattr(meta, 'is_dynamic', False) ) #: calculations self.calcs = {} for k, v in parameters.iteritems(): self.calcs[k] = { key: v[key] for key in ('formula', 'args', 'returns') } keys = ('dependencies', 'always_calc', 'frequency', 'calculator', 'is_dynamic') for key in keys: value = v.get(key) if value is not None: getattr(self, key)[k] = value
def index_registry(args, reg, ts=None, idx=None): """ Index into a :class:`~carousel.core.Registry` to return arguments from :class:`~carousel.core.data_sources.DataRegistry` and :class:`~carousel.core.outputs.OutputRegistry` based on the calculation parameter file. :param args: Arguments field from the calculation parameter file. :param reg: Registry in which to index to get the arguments. :type reg: :class:`~carousel.core.data_sources.DataRegistry`, :class:`~carousel.core.outputs.OutputRegistry` :param ts: Time step [units of time]. :param idx: [None] Index of current time step for dynamic calculations. Required arguments for static and dynamic calculations are specified in the calculation parameter file by the "args" key. Arguments can be from either the data registry or the outputs registry, which is denoted by the "data" and "outputs" keys. Each argument is a dictionary whose key is the name of the argument in the formula specified and whose value can be one of the following: * The name of the argument in the registry :: {"args": {"outputs": {"T_bypass": "******"}}} maps the formula argument "T_bypass" to the outputs registry item "T_bypass_diode". * A list with the name of the argument in the registry as the first element and a negative integer denoting the index relative to the current timestep as the second element :: {"args": {"data": {"T_cell": ["Tcell", -1]}}} indexes the previous timestep of "Tcell" from the data registry. * A list with the name of the argument in the registry as the first element and a list of positive integers denoting the index into the item from the registry as the second element :: {"args": {"data": {"cov": ["bypass_diode_covariance", [2]]}}} indexes the third element of "bypass_diode_covariance". * A list with the name of the argument in the registry as the first element, a negative real number denoting the time relative to the current timestep as the second element, and the units of the time as the third :: {"args": {"data": {"T_cell": ["Tcell", -1, 'day']}}} indexes the entire previous day of "Tcell". """ # TODO: move this to new Registry method or __getitem__ # TODO: replace idx with datetime object and use timeseries to interpolate # into data, not necessary for outputs since that will conform to idx rargs = dict.fromkeys(args) # make dictionary from arguments # iterate over arguments for k, v in args.iteritems(): # var ------------------ states ------------------ # idx ===== not None ===== ======= None ======= # isconstant True False None True False None # is_dynamic no yes yes no no no is_dynamic = idx and not reg.isconstant.get(v) # switch based on string type instead of sequence if isinstance(v, basestring): # the default assumes the current index rargs[k] = reg[v][idx] if is_dynamic else reg[v] elif len(v) < 3: if reg.isconstant[v[0]]: # only get indices specified by v[1] # tuples interpreted as a list of indices, see # NumPy basic indexing: Dealing with variable # numbers of indices within programs rargs[k] = reg[v[0]][tuple(v[1])] elif v[1] < 0: # specified offset from current index rargs[k] = reg[v[0]][idx + v[1]] else: # get indices specified by v[1] at current index rargs[k] = reg[v[0]][idx][tuple(v[1])] else: # specified timedelta from current index dt = 1 + (v[1] * UREG(str(v[2])) / ts).item() # TODO: deal with fractions of timestep rargs[k] = reg[v[0]][(idx + dt):(idx + 1)] return rargs
def __init__(self, simfile=None, **kwargs): # check if simulation file is first argument or is in keyword arguments simfile = simfile or kwargs.get('simfile') # defaults to None # check if simulation file is still None or in parameters from metaclass simfile = simfile or getattr(self, 'param_file', None) #: parameter file self.param_file = simfile # read and load JSON parameter map file as "parameters" if self.param_file is not None: with open(self.param_file, 'r') as fp: #: parameters from file for simulation self.parameters = json.load(fp) # if not subclassed and metaclass skipped, then use kwargs if not hasattr(self, 'parameters'): self.parameters = kwargs else: # use any keyword arguments instead of parameters self.parameters.update(kwargs) # make pycharm happy - attributes assigned in loop by attrs self.thresholds = {} self.display_frequency = 0 self.display_fields = {} self.write_frequency = 0 self.write_fields = {} # pop deprecated attribute names for k, v in self.deprecated.iteritems(): val = self.parameters.pop(v, None) # update parameters if deprecated attr used and no new attr if val and k not in self.parameters: self.parameters[k] = val # Attributes for k, v in self.attrs.iteritems(): setattr(self, k, self.parameters.get(k, v)) # member docstrings are in documentation since attrs are generated if self.ID is None: # generate id from object class name and datetime in ISO format self.ID = id_maker(self) if self.path is not None: # expand environment variables, ~ and make absolute path self.path = os.path.expandvars(os.path.expanduser(self.path)) self.path = os.path.abspath(self.path) # convert simulation interval to Pint Quantity if isinstance(self.interval, basestring): self.interval = UREG(self.interval) elif not isinstance(self.interval, Q_): self.interval = self.interval[0] * UREG[str(self.interval[1])] # convert simulation length to Pint Quantity if isinstance(self.sim_length, basestring): self.sim_length = UREG(self.sim_length) elif not isinstance(self.sim_length, Q_): self.sim_length = self.sim_length[0] * UREG[str( self.sim_length[1])] # convert simulation length to interval units to calc total intervals sim_to_interval_units = self.sim_length.to(self.interval.units) #: total number of intervals simulated self.number_intervals = np.ceil(sim_to_interval_units / self.interval) #: interval index, start at zero self.interval_idx = 0 #: pause status self._ispaused = False #: finished status self._iscomplete = False #: initialized status self._isinitialized = False #: order of calculations self.calc_order = [] #: command queue self.cmd_queue = Queue.Queue() #: index iterator self.idx_iter = self.index_iterator() #: data loaded status self._is_data_loaded = False
def register(self, newdata, *args, **kwargs): """ Register data in registry. Meta for each data is specified by positional or keyword arguments after the new data and consists of the following: * ``uncertainty`` - Map of uncertainties in percent corresponding to new keys. The uncertainty keys must be a subset of the new data keys. * ``variance`` - Square of the uncertainty (no units). * ``isconstant``: Map corresponding to new keys whose values are``True`` if constant or ``False`` if periodic. These keys must be a subset of the new data keys. * ``timeseries``: Name of corresponding time series data, ``None`` if no time series. _EG_: DNI data ``timeseries`` attribute might be set to a date/time data that it corresponds to. More than one data can have the same ``timeseries`` data. * ``data_source``: the :class:`~carousel.core.data_sources.DataSource` superclass that was used to acquire this data. This can be used to group data from a specific source together. :param newdata: New data to add to registry. When registering new data, keys are not allowed to override existing keys in the data registry. :type newdata: mapping :raises: :exc:`~carousel.core.exceptions.UncertaintyPercentUnitsError` """ kwargs.update(zip(self.meta_names, args)) # check uncertainty has units of percent uncertainty = kwargs['uncertainty'] variance = kwargs['variance'] isconstant = kwargs['isconstant'] # check uncertainty is percent if uncertainty: for k0, d in uncertainty.iteritems(): for k1, v01 in d.iteritems(): units = v01.units if units != UREG('percent'): keys = '%s-%s' % (k0, k1) raise UncertaintyPercentUnitsError(keys, units) # check variance is square of uncertainty if variance and uncertainty: for k0, d in variance.iteritems(): for k1, v01 in d.iteritems(): keys = '%s-%s' % (k0, k1) missing = k1 not in uncertainty[k0] v2 = np.asarray(uncertainty[k0][k1].to('fraction').m)**2.0 if missing or not np.allclose(np.asarray(v01), v2): raise UncertaintyVarianceError(keys, v01) # check that isconstant is boolean if isconstant: for k, v in isconstant.iteritems(): if not isinstance(v, bool): classname = self.__class__.__name__ error_msg = [ '%s meta "isconstant" should be' % classname, 'boolean, but it was "%s" for "%s".' % (v, k) ] raise TypeError(' '.join(error_msg)) # call super method, meta must be passed as kwargs! super(DataRegistry, self).register(newdata, **kwargs)
its units are, what the data will be called in calculations and any other meta-data the registry requires. """ from carousel.core import (UREG, Registry, CarouselJSONEncoder, CommonBase, Parameter) from carousel.core.data_readers import JSONReader from carousel.core.exceptions import (UncertaintyPercentUnitsError, UncertaintyVarianceError) import json import os import time from copy import copy import numpy as np DFLT_UNC = 1.0 * UREG('percent') # default uncertainty class DataParameter(Parameter): """ Field for data parameters. """ _attrs = ['units', 'uncertainty', 'isconstant', 'timeseries'] class DataRegistry(Registry): """ A registry for data sources. The meta names are: ``uncertainty``, ``variance``, ``isconstant``, ``timeseries`` and ``data_source`` """ #: meta names
def _read_header(f, header_param): """ Read and parse data from 1st line of a file. :param f: :func:`file` or :class:`~StringIO.StringIO` object from which to read 1st line. :type f: file :param header_param: Parameters used to parse the data from the header. Contains "delimiter" and "fields". :type header_param: dict :returns: Dictionary of data read from header. :rtype: dict :raises: :exc:`~carousel.core.exceptions.UnnamedDataError` The **header_param** argument contains keys to read the 1st line of **f**. If "delimiter" is ``None`` or missing, the default delimiter is a comma, otherwise "delimiter" can be any single character, integer or sequence of ``int``. * single character -- a delimiter * single integer -- uniform fixed width * sequence of ``int`` -- fixed widths, the number of fields should \ correspond to the length of the sequence. The "fields" key is a list of (parameter-name, parameter-type[, parameter- units]) lists. """ # default delimiter is a comma, can't be None header_delim = str(header_param.get('delimiter', ',')) # don't allow unnamed fields if 'fields' not in header_param: raise UnnamedDataError(f.name) header_fields = {field[0]: field[1:] for field in header_param['fields']} # header_names can't be generator b/c DictReader needs list, and can't be # dictionary b/c must be same order as 'fields' to match data readby csv header_names = [field[0] for field in header_param['fields']] # read header header_str = StringIO(f.readline()) # read the 1st line # use csv because it will preserve quoted fields with commas # make a csv.DictReader from header string, use header names for # fieldnames and set delimiter to header delimiter header_reader = csv.DictReader(header_str, header_names, delimiter=header_delim, skipinitialspace=True) data = header_reader.next() # parse the header dictionary # iterate over items in data for k, v in data.iteritems(): header_type = header_fields[k][0] # spec'd type # whitelist header types if isinstance(header_type, basestring): if header_type.lower().startswith('int'): header_type = int # coerce to integer elif header_type.lower().startswith('long'): header_type = long # coerce to long integer elif header_type.lower().startswith('float'): header_type = float # to floating decimal point elif header_type.lower().startswith('str'): header_type = str # coerce to string elif header_type.lower().startswith('bool'): header_type = bool # coerce to boolean else: raise TypeError('"%s" is not a supported type.' % header_type) # WARNING! Use of `eval` considered harmful. `header_type` is read # from JSON file, not secure input, could be used to exploit system data[k] = header_type(v) # cast v to type # check for units in 3rd element if len(header_fields[k]) > 1: units = UREG(str(header_fields[k][1])) # spec'd units data[k] = data[k] * units # apply units return data
def load_data(self, filename, *args, **kwargs): """ Load parameters from Excel spreadsheet. :param filename: Name of Excel workbook with data. :type filename: str :returns: Data read from Excel workbook. :rtype: dict """ # workbook read from file workbook = open_workbook(filename, verbosity=True) data = {} # an empty dictionary to store data # iterate through sheets in parameters # iterate through the parameters on each sheet for param, pval in self.parameters.iteritems(): sheet = pval['extras']['sheet'] # get each worksheet from the workbook worksheet = workbook.sheet_by_name(sheet) # split the parameter's range elements prng0, prng1 = pval['extras']['range'] # missing "units", json ``null`` and Python ``None`` all OK! # convert to str from unicode, None to '' (dimensionless) punits = str(pval.get('units') or '') # replace None with empty list if prng0 is None: prng0 = [] if prng1 is None: prng1 = [] # FIXME: Use duck-typing here instead of type-checking! # if both elements in range are `int` then parameter is a cell if isinstance(prng0, int) and isinstance(prng1, int): datum = worksheet.cell_value(prng0, prng1) # if the either element is a `list` then parameter is a slice elif isinstance(prng0, list) and isinstance(prng1, int): datum = worksheet.col_values(prng1, *prng0) elif isinstance(prng0, int) and isinstance(prng1, list): datum = worksheet.row_values(prng0, *prng1) # if both elements are `list` then parameter is 2-D else: datum = [] for col in xrange(prng0[1], prng1[1]): datum.append(worksheet.col_values(col, prng0[0], prng1[0])) # duck typing that datum is real try: npdatum = np.array(datum, dtype=np.float) except ValueError as err: # check for iterable: # if `datum` can't be coerced to float, then it must be # *string* & strings *are* iterables, so don't check! # check for strings: # data must be real or *all* strings! # empty string, None or JSON null also OK # all([]) == True but any([]) == False if not datum: data[param] = None # convert empty to None elif all(isinstance(_, basestring) for _ in datum): data[param] = datum # all str is OK (EG all 'TMY') elif all(not _ for _ in datum): data[param] = None # convert list of empty to None else: raise err # raise ValueError if not all real or str else: data[param] = npdatum * UREG(punits) # FYI: only put one statement into try-except test otherwise # might catch different error than expected. use ``else`` as # option to execute only if exception *not* raised. return data