def __init__( self, params=None, expected_params=None, input_names=None, output_names=None, debug_mode=None, error_method=None, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things expected_params = arg_str_seq_none(expected_params, "expected_params") input_names = arg_str_seq_none(input_names, "input_names") output_names = arg_str_seq_none(output_names, "output_names") module_path = self.__module__.split(".") self.stage_name = module_path[-2] """Name of the stage (e.g. flux, osc, aeff, reco, pid, etc.""" self.service_name = module_path[-1] """Name of the specific service implementing the stage.""" self.expected_params = expected_params """The full set of parameters (by name) that must be present in `params`""" self._input_names = [] if input_names is None else input_names self._output_names = [] if output_names is None else output_names self._source_code_hash = None self.outputs = None """Last-computed outputs; None if no outputs have been computed yet.""" self._attrs_to_hash = set([]) """Attributes of the stage that are to be included in its hash value""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" param_selector_keys = set( ["regular_params", "selector_param_sets", "selections"]) if isinstance(params, Mapping) and set( params.keys()) == param_selector_keys: self._param_selector = ParamSelector(**params) elif isinstance(params, ParamSelector): self._param_selector = params else: self._param_selector = ParamSelector(regular_params=params) # Get the params from the ParamSelector, validate, and set as the # params object for this stage p = self._param_selector.params self._check_params(p) self.validate_params(p) self._params = p if bool(debug_mode): self._debug_mode = debug_mode else: self._debug_mode = None if bool(error_method): self._error_method = error_method else: self._error_method = None self.inputs = None
def parse_pipeline_config(config): """Parse pipeline config. Parameters ---------- config : string or ConfigParser Returns ------- stage_dicts : OrderedDict Keys are (stage_name, service_name) tuples and values are OrderedDicts with keys the argnames and values the arguments' values. Some known arg values are parsed out fully into Python objects, while the rest remain as strings that must be used or parsed elsewhere. """ # Note: imports placed here to avoid circular imports from pisa.core.binning import MultiDimBinning, OneDimBinning from pisa.core.param import ParamSelector if isinstance(config, basestring): config = from_file(config) elif isinstance(config, PISAConfigParser): pass else: raise TypeError( '`config` must either be a string or PISAConfigParser. Got %s ' 'instead.' % type(config)) if not config.has_section('binning'): raise NoSectionError( "Could not find 'binning'. Only found sections: %s" % config.sections()) # Create binning objects binning_dict = {} for name, value in config['binning'].items(): if name.endswith('.order'): order = split(config.get('binning', name)) binning, _ = split(name, sep='.') bins = [] for bin_name in order: try: def_raw = config.get('binning', binning + '.' + bin_name) except: dims_defined = [ split(dim, sep='.')[1] for dim in config['binning'].keys() if dim.startswith(binning + '.') and not dim.endswith('.order') ] logging.error( "Failed to find definition of '%s' dimension of '%s'" " binning entry. Only found definition(s) of: %s", bin_name, binning, dims_defined) del dims_defined raise try: kwargs = eval(def_raw) # pylint: disable=eval-used except: logging.error( "Failed to evaluate definition of '%s' dimension of" " '%s' binning entry:\n'%s'", bin_name, binning, def_raw) raise try: bins.append(OneDimBinning(bin_name, **kwargs)) except: logging.error( "Failed to instantiate new `OneDimBinning` from '%s'" " dimension of '%s' binning entry with definition:\n" "'%s'\n", bin_name, binning, kwargs) raise binning_dict[binning] = MultiDimBinning(bins) # Pipeline section section = 'pipeline' # Get and parse the order of the stages (and which services implement them) order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))] param_selections = [] if config.has_option(section, 'param_selections'): param_selections = split(config.get(section, 'param_selections')) detector_name = None if config.has_option(section, 'detector_name'): detector_name = config.get(section, 'detector_name') # Parse [stage.<stage_name>] sections and store to stage_dicts stage_dicts = OrderedDict() for stage, service in order: old_section_header = 'stage%s%s' % (STAGE_SEP, stage) new_section_header = '%s%s%s' % (stage, STAGE_SEP, service) if config.has_section(old_section_header): logging.warning( '"%s" is an old-style section header, in the future use "%s"' % (old_section_header, new_section_header)) section = old_section_header elif config.has_section(new_section_header): section = new_section_header else: raise IOError( 'missing section in cfg for stage "%s" service "%s"' % (stage, service)) # Instantiate dict to store args to pass to this stage service_kwargs = OrderedDict() param_selector = ParamSelector(selections=param_selections) service_kwargs['params'] = param_selector n_params = 0 for fullname in config.options(section): try: value = config.get(section, fullname) except: logging.error( 'Unable to obtain value of option "%s" in section "%s".' % (fullname, section)) raise # See if this matches a param specification param_match = PARAM_RE.match(fullname) if param_match is not None: n_params += 1 param_match_dict = param_match.groupdict() param_subfields = param_match_dict['subfields'].split('.') # Figure out what the dotted fields represent... infodict = interpret_param_subfields(subfields=param_subfields) # If field is an attr, skip since these are located manually if infodict['attr'] is not None: continue # Check if this param already exists in a previous stage; if # so, make sure there are no specs for this param, but just a # link to previous the param object that is already # instantiated. for kw in stage_dicts.values(): # Stage did not get a `params` argument from config if not kw.has_key('params'): continue # Retrieve the param from the ParamSelector try: param = kw['params'].get(name=infodict['pname'], selector=infodict['selector']) except KeyError: continue # Make sure there are no other specs (in this section) for # the param defined defined in previous section for a in PARAM_ATTRS: if config.has_option(section, '%s.%s' % (fullname, a)): raise ValueError("Parameter spec. '%s' of '%s' " "found in section '%s', but " "parameter exists in previous " "stage!" % (a, fullname, section)) break # Param *not* found in a previous stage (i.e., no explicit # `break` encountered in `for` loop above); therefore must # instantiate it. else: param = parse_param(config=config, section=section, selector=infodict['selector'], fullname=fullname, pname=infodict['pname'], value=value) param_selector.update(param, selector=infodict['selector']) # If it's not a param spec but contains 'binning', assume it's a # binning spec for CAKE stages elif 'binning' in fullname: service_kwargs[fullname] = binning_dict[value] # it's gonna be a PI stage elif '_specs' in fullname: value = parse_string_literal(value) # is it None? if value is None: service_kwargs[fullname] = value # is it evts? elif value in ['evnts', 'events']: service_kwargs[fullname] = 'events' # so it gotta be a binning else: service_kwargs[fullname] = binning_dict[value] # it's a list on in/output names list elif fullname.endswith('_names'): value = split(value) service_kwargs[fullname] = value # Otherwise it's some other stage instantiation argument; identify # this by its full name and try to interpret and instantiate a # Python object using the string else: try: value = parse_quantity(value) value = value.nominal_value * value.units except ValueError: value = parse_string_literal(value) service_kwargs[fullname] = value # If no params actually specified in config, remove 'params' from the # service's keyword args if n_params == 0: service_kwargs.pop('params') # Store the service's kwargs to the stage_dicts stage_dicts[(stage, service)] = service_kwargs stage_dicts['detector_name'] = detector_name return stage_dicts
class BaseStage(object): """ PISA base stage base class. Should encompass all behaviors common to (almost) all stages. Specialization should be done via subclasses. Parameters ---------- params : ParamSelector, dict of ParamSelector kwargs, ParamSet, or object instantiable to ParamSet expected_params : list of strings List containing required `params` names. input_names : None or list of strings output_names : None or list of strings debug_mode : None, bool, or string If None, False, or empty string, the stage runs normally. Otherwise, the stage runs in debug mode. This disables caching (forcing recomputation of any nominal transforms, transforms, and outputs). Services that subclass from the `Stage` class can then implement further custom behavior when this mode is set by reading the value of the `self.debug_mode` attribute. error_method : None, bool, or string If None, False, or empty string, the stage does not compute errors for the transforms and does not apply any (additional) error to produce its outputs. (If the inputs already have errors, these are propagated.) Otherwise, this specifies the method by which the stage should compute errors for the transforms to be applied in producing outputs from the stage. Notes ----- The following methods can be overridden in derived classes where applicable: validate_params Perform validation on any parameters. """ def __init__( self, params=None, expected_params=None, input_names=None, output_names=None, debug_mode=None, error_method=None, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things expected_params = arg_str_seq_none(expected_params, "expected_params") input_names = arg_str_seq_none(input_names, "input_names") output_names = arg_str_seq_none(output_names, "output_names") module_path = self.__module__.split(".") self.stage_name = module_path[-2] """Name of the stage (e.g. flux, osc, aeff, reco, pid, etc.""" self.service_name = module_path[-1] """Name of the specific service implementing the stage.""" self.expected_params = expected_params """The full set of parameters (by name) that must be present in `params`""" self._input_names = [] if input_names is None else input_names self._output_names = [] if output_names is None else output_names self._source_code_hash = None self.outputs = None """Last-computed outputs; None if no outputs have been computed yet.""" self._attrs_to_hash = set([]) """Attributes of the stage that are to be included in its hash value""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" param_selector_keys = set( ["regular_params", "selector_param_sets", "selections"]) if isinstance(params, Mapping) and set( params.keys()) == param_selector_keys: self._param_selector = ParamSelector(**params) elif isinstance(params, ParamSelector): self._param_selector = params else: self._param_selector = ParamSelector(regular_params=params) # Get the params from the ParamSelector, validate, and set as the # params object for this stage p = self._param_selector.params self._check_params(p) self.validate_params(p) self._params = p if bool(debug_mode): self._debug_mode = debug_mode else: self._debug_mode = None if bool(error_method): self._error_method = error_method else: self._error_method = None self.inputs = None def setup(self): pass def run(self, inputs=None): return None def select_params(self, selections, error_on_missing=False): """Apply the `selections` to contained ParamSet. Parameters ---------- selections : string or iterable error_on_missing : bool """ try: self._param_selector.select_params(selections, error_on_missing=True) except KeyError: msg = "Not all of the selections %s found in this stage." % ( selections, ) if error_on_missing: # logging.error(msg) raise logging.trace(msg) else: logging.trace("`selections` = %s yielded `params` = %s" % (selections, self.params)) def _check_params(self, params): """Make sure that `expected_params` is defined and that exactly the params specified in self.expected_params are present. """ assert self.expected_params is not None exp_p, got_p = set(self.expected_params), set(params.names) if exp_p == got_p: return excess = got_p.difference(exp_p) missing = exp_p.difference(got_p) err_strs = [] if len(excess) > 0: err_strs.append("Excess params provided: %s" % ", ".join(sorted(excess))) if len(missing) > 0: err_strs.append("Missing params: %s" % ", ".join(sorted(missing))) raise ValueError("Expected parameters: %s;\n" % ", ".join(sorted(exp_p)) + ";\n".join(err_strs)) @property def params(self): """Params""" return self._params @property def param_selections(self): """Param selections""" return sorted(deepcopy(self._param_selector.param_selections)) @property def input_names(self): """Names of input objects (e.g. names of input maps)""" return deepcopy(self._input_names) @property def output_names(self): """Names of output objects (e.g. names of output maps)""" return deepcopy(self._output_names) @property def source_code_hash(self): """Hash for the source code of this object's class. Not meant to be perfect, but should suffice for tracking provenance of an object stored to disk that were produced by a Stage. """ if self._source_code_hash is None: self._source_code_hash = hash_obj(inspect.getsource( self.__class__), full_hash=self.full_hash) return self._source_code_hash @property def hash(self): """Combines source_code_hash and params.hash for checking/tagging provenance of persisted (on-disk) objects.""" objects_to_hash = [self.source_code_hash, self.params.hash] for attr in sorted(self._attrs_to_hash): objects_to_hash.append( hash_obj(getattr(self, attr), full_hash=self.full_hash)) return hash_obj(objects_to_hash, full_hash=self.full_hash) def __hash__(self): return self.hash def include_attrs_for_hashes(self, attrs): """Include a class attribute or attributes to be included when computing hashes (for all that apply: nominal transforms, transforms, and/or outputs). This is a convenience that allows some customization of hashing (and hence caching) behavior without having to override the hash-computation methods (`_derive_nominal_transforms_hash`, `_derive_transforms_hash`, and `_derive_outputs_hash`). Parameters ---------- attrs : string or sequence thereof Name of the attribute(s) to include for hashes. Each must be an existing attribute of the object at the time this method is invoked. """ if isinstance(attrs, basestring): attrs = [attrs] # Validate that all are actually attrs before setting any for attr in attrs: assert isinstance(attr, basestring) if not hasattr(self, attr): raise ValueError('"%s" not an attribute of the class; not' " adding *any* of the passed attributes %s to" " attrs to hash." % (attr, attrs)) # Include the attribute names for attr in attrs: self._attrs_to_hash.add(attr) @property def debug_mode(self): """Read-only attribute indicating whether or not the stage is being run in debug mode. None indicates non-debug mode, while non-none value indicates a debug mode.""" return self._debug_mode def validate_params(self, params): # pylint: disable=unused-argument """Override this method to test if params are valid; e.g., check range and dimensionality. Invalid params should be indicated by raising an exception; no value should be returned.""" return @property def error_method(self): """Read-only attribute indicating whether or not the stage will compute errors for its transforms and outputs (whichever is applicable). Errors on inputs are propagated regardless of this setting.""" return self._error_method
class Stage(): """ PISA stage base class. Should be used to implement PISA Pi stages Specialization should be done via subclasses. Parameters ---------- data : ContainerSet or None object to be passed along params : ParamSelector, dict of ParamSelector kwargs, ParamSet, or object instantiable to ParamSet expected_params : list of strings List containing required `params` names. debug_mode : None, bool, or string If None, False, or empty string, the stage runs normally. Otherwise, the stage runs in debug mode. This disables caching (forcing recomputation of any nominal transforms, transforms, and outputs). Services that subclass from the `Stage` class can then implement further custom behavior when this mode is set by reading the value of the `self.debug_mode` attribute. calc_mode : pisa.core.binning.MultiDimBinning, str, or None Specify in what to do the calculation apply_mode : pisa.core.binning.MultiDimBinning, str, or None Specify in what to do the application """ def __init__( self, data=None, params=None, expected_params=None, debug_mode=None, error_method=None, calc_mode=None, apply_mode=None, profile=False, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things expected_params = arg_str_seq_none(expected_params, "expected_params") module_path = self.__module__.split(".") self.stage_name = module_path[-2] """Name of the stage (e.g. flux, osc, aeff, reco, pid, etc.""" self.service_name = module_path[-1] """Name of the specific service implementing the stage.""" self.expected_params = expected_params """The full set of parameters (by name) that must be present in `params`""" self._source_code_hash = None """Last-computed outputs; None if no outputs have been computed yet.""" self._attrs_to_hash = set([]) """Attributes of the stage that are to be included in its hash value""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" param_selector_keys = set( ["regular_params", "selector_param_sets", "selections"]) if isinstance(params, Mapping) and set( params.keys()) == param_selector_keys: self._param_selector = ParamSelector(**params) elif isinstance(params, ParamSelector): self._param_selector = params else: self._param_selector = ParamSelector(regular_params=params) # Get the params from the ParamSelector, validate, and set as the # params object for this stage p = self._param_selector.params self._check_params(p) self.validate_params(p) self._params = p if bool(debug_mode): self._debug_mode = debug_mode else: self._debug_mode = None self.calc_mode = calc_mode self.apply_mode = apply_mode self.data = data self._error_method = error_method self.param_hash = None self.profile = profile self.setup_times = [] self.calc_times = [] self.apply_times = [] def __repr__(self): return 'Stage "%s"' % (self.__class__.__name__) def report_profile(self, detailed=False): for stage in self.stages: stage.report_profile(detailed=detailed) def report_profile(self, detailed=False): def format(times): tot = np.sum(times) n = len(times) ave = 0. if n == 0 else tot / n return 'Total time %.5f s, n calls: %i, time/call: %.5f s' % ( tot, n, ave) print(self.stage_name, self.service_name) print('- setup: ', format(self.setup_times)) if detailed: print( ' Individual runs: ', ', '.join([ '%i: %.3f s' % (i, t) for i, t in enumerate(self.setup_times) ])) print('- calc: ', format(self.calc_times)) if detailed: print( ' Individual runs: ', ', '.join([ '%i: %.3f s' % (i, t) for i, t in enumerate(self.calc_times) ])) print('- apply: ', format(self.apply_times)) if detailed: print( ' Individual runs: ', ', '.join([ '%i: %.3f s' % (i, t) for i, t in enumerate(self.apply_times) ])) def select_params(self, selections, error_on_missing=False): """Apply the `selections` to contained ParamSet. Parameters ---------- selections : string or iterable error_on_missing : bool """ try: self._param_selector.select_params(selections, error_on_missing=True) except KeyError: msg = "Not all of the selections %s found in this stage." % ( selections, ) if error_on_missing: # logging.error(msg) raise logging.trace(msg) else: logging.trace("`selections` = %s yielded `params` = %s" % (selections, self.params)) def _check_params(self, params): """Make sure that `expected_params` is defined and that exactly the params specified in self.expected_params are present. """ assert self.expected_params is not None exp_p, got_p = set(self.expected_params), set(params.names) if exp_p == got_p: return excess = got_p.difference(exp_p) missing = exp_p.difference(got_p) err_strs = [] if len(excess) > 0: err_strs.append("Excess params provided: %s" % ", ".join(sorted(excess))) if len(missing) > 0: err_strs.append("Missing params: %s" % ", ".join(sorted(missing))) raise ValueError("Expected parameters: %s;\n" % ", ".join(sorted(exp_p)) + ";\n".join(err_strs)) @property def params(self): """Params""" return self._params @property def param_selections(self): """Param selections""" return sorted(deepcopy(self._param_selector.param_selections)) @property def source_code_hash(self): """Hash for the source code of this object's class. Not meant to be perfect, but should suffice for tracking provenance of an object stored to disk that were produced by a Stage. """ if self._source_code_hash is None: self._source_code_hash = hash_obj(inspect.getsource( self.__class__), full_hash=self.full_hash) return self._source_code_hash @property def hash(self): """Combines source_code_hash and params.hash for checking/tagging provenance of persisted (on-disk) objects.""" objects_to_hash = [self.source_code_hash, self.params.hash] for attr in sorted(self._attrs_to_hash): objects_to_hash.append( hash_obj(getattr(self, attr), full_hash=self.full_hash)) return hash_obj(objects_to_hash, full_hash=self.full_hash) def __hash__(self): return self.hash def include_attrs_for_hashes(self, attrs): """Include a class attribute or attributes to be included when computing hashes (for all that apply: nominal transforms, transforms, and/or outputs). This is a convenience that allows some customization of hashing (and hence caching) behavior without having to override the hash-computation methods (`_derive_nominal_transforms_hash`, `_derive_transforms_hash`, and `_derive_outputs_hash`). Parameters ---------- attrs : string or sequence thereof Name of the attribute(s) to include for hashes. Each must be an existing attribute of the object at the time this method is invoked. """ if isinstance(attrs, str): attrs = [attrs] # Validate that all are actually attrs before setting any for attr in attrs: assert isinstance(attr, str) if not hasattr(self, attr): raise ValueError('"%s" not an attribute of the class; not' " adding *any* of the passed attributes %s to" " attrs to hash." % (attr, attrs)) # Include the attribute names for attr in attrs: self._attrs_to_hash.add(attr) @property def debug_mode(self): """Read-only attribute indicating whether or not the stage is being run in debug mode. None indicates non-debug mode, while non-none value indicates a debug mode.""" return self._debug_mode def validate_params(self, params): # pylint: disable=unused-argument, no-self-use """Override this method to test if params are valid; e.g., check range and dimensionality. Invalid params should be indicated by raising an exception; no value should be returned.""" return @property def error_method(self): """Read-only attribute indicating whether or not the stage will compute errors for its transforms and outputs (whichever is applicable). Errors on inputs are propagated regardless of this setting.""" return self._error_method @property def is_map(self): return self.data.is_map def setup(self): # check that data is a ContainerSet (downstream modules assume this) if self.data is not None: if not isinstance(self.data, ContainerSet): raise TypeError( "`data` must be a `pisa.core.container.ContainerSet`") if self.calc_mode is not None: self.data.representation = self.calc_mode # call the user-defined setup function if self.profile: start_t = time() self.setup_function() end_t = time() self.setup_times.append(end_t - start_t) else: self.setup_function() # invalidate param hash: self.param_hash = -1 def setup_function(self): """Implement in services (subclasses of Stage)""" pass def compute(self): # simplest caching algorithm: don't compute if params didn't change new_param_hash = self.params.values_hash if new_param_hash == self.param_hash: logging.trace("cached output") return if self.calc_mode is not None: self.data.representation = self.calc_mode if self.profile: start_t = time() self.compute_function() end_t = time() self.calc_times.append(end_t - start_t) else: self.compute_function() self.param_hash = new_param_hash def compute_function(self): """Implement in services (subclasses of Stage)""" pass def apply(self): if self.apply_mode is not None: self.data.representation = self.apply_mode if self.profile: start_t = time() self.apply_function() end_t = time() self.apply_times.append(end_t - start_t) else: self.apply_function() def apply_function(self): """Implement in services (subclasses of Stage)""" pass def run(self): self.compute() self.apply() return None