def execute(self, **kwargs): """ Overwrite the default implementation of execute to update parameter specifications/types when wrapping functions where the types are not known a priori. :param kwargs: Custom analysis parameters :return: The result of execute_analysis() """ # Update the dtype of all the input parameters to ensure we save them correctly to file log_helper.debug(__name__, "Setting parameters based on the given inputs") ana_dtypes = data_dtypes.get_dtypes() for k, v in kwargs.iteritems(): for param in self.parameters: if param['name'] == k: if hasattr(v, 'dtype'): param['dtype'] = ana_dtypes['ndarray'] else: param['dtype'] = type(v) # Determine the custom parameters custom_parameters = kwargs # Execute the analysis as usual result = super(analysis_generic, self).execute(**custom_parameters) return result
def __init__(self, analysis_objects=None): """ Initialize the workflow executor :param analysis_objects: A list of analysis objects to be executed """ super(workflow_executor_base, self).__init__() log_helper.debug(__name__, "Creating workflow executor") if analysis_objects is not None: if not isinstance(analysis_objects, list) and not isinstance(analysis_objects, set): analysis_objects = [analysis_objects, ] log_helper.log_var(__name__, analysis_objects=analysis_objects, level='DEBUG') self.run_info = run_info_dict() self.analysis_tasks = analysis_task_list(analysis_objects) \ if analysis_objects is not None \ else analysis_task_list() self.mpi_comm = mpi_helper.get_comm_world() self.mpi_root = 0 self.workflow_identifier = "we" # self.parameters = [] # Inherited from parameter_manager and set in parent class dtypes = data_dtypes.get_dtypes() self.add_parameter(name='profile_time_and_usage', help='Enable/disable profiling of time and usage of the whole workflow', required=False, default=False, dtype=dtypes['bool']) self.add_parameter(name='profile_memory', help='Enable/disable profiling of memory usage of the whole workflow', required=False, default=False, dtype=dtypes['bool'])
def __write_omsi_analysis_data__(cls, data_group, ana_data): """ Private helper function used to write the data defined by a analysis_data object to HDF5. :param data_group: The h5py data group to which the data should be written to. :param ana_data: The analysis_data object with the description of the data to be written. :type ana_data: omsi.analysis.analysis_data """ from omsi.datastructures.analysis_data import analysis_data, data_dtypes curr_dtype = ana_data['dtype'] try: if curr_dtype == data_dtypes.get_dtypes()['ndarray']: curr_dtype = ana_data['data'].dtype except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['bool']: curr_dtype = bool except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['str']: curr_dtype = omsi_format_common.str_type except TypeError: pass # Create link in HDF5 to an existing dataset within the file if isinstance(ana_data, analysis_data) and isinstance(ana_data['dtype'], int): if curr_dtype == ana_data.ana_hdf5link: linkobject = data_group.file.get(ana_data['data']) data_group[ana_data['name']] = linkobject omsiobj = omsi_file_common.get_omsi_object(linkobject) try: # Check if we already have a type attribute _ = data_group[ana_data['name']].attrs[omsi_format_common.type_attribute] except: # Generate the type attribute from scratch if omsiobj is not None: omsiobjtype = omsiobj.__class__.__name__ else: omsiobjtype = "" data_group[ana_data['name']].attrs[ omsi_format_common.type_attribute] = omsiobjtype # Create a new string-type dataset elif (curr_dtype == omsi_format_common.str_type) or (curr_dtype == h5py.special_dtype(vlen=str)): tempdata = data_group.require_dataset(name=unicode(ana_data['name']), shape=(1,), dtype=omsi_format_common.str_type) if len(unicode(ana_data['data'])) > 0: if omsi_format_common.str_type_unicode: tempdata[0] = unicode(ana_data['data']) else: tempdata[0] = str(ana_data['data']) else: warnings.warn("WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty.") # Create a new dataset to store the current numpy-type dataset elif 'numpy' in str(type(ana_data['data'])): # Decide whether we want to enable chunking for the current # analysis dataset chunks = None if ana_data['data'].size > 1000: chunks = True # Write the current analysis dataset if ana_data['data'].dtype.type in [np.string_, np.unicode_]: tempdata = data_group.require_dataset(name=ana_data['name'], shape=ana_data['data'].shape, dtype=omsi_format_common.str_type, chunks=chunks) else: tempdata = data_group.require_dataset(name=ana_data['name'], shape=ana_data['data'].shape, dtype=ana_data['data'].dtype, chunks=chunks) if ana_data['data'].size > 0: try: tempdata[:] = ana_data['data'] except TypeError: tempdata[()] = ana_data['data'] else: warnings.warn("WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty.") # Unknown dtype. Attempt to convert the dataset to numpy and write it to # file. else: # Safely convert scalars to numpy but warn in case we see something else from omsi.datastructures.analysis_data import data_dtypes default_dtypes = data_dtypes.get_dtypes() if ana_data['dtype'] not in default_dtypes.keys() and ana_data['dtype'] not in default_dtypes.values(): warnings.warn("WARNING: " + str(ana_data['name']) + ": The data specified by the analysis object is not " + "in numpy format. Attempting to convert the data to numpy") try: dat = np.asarray(ana_data['data']) if len(dat.shape) == 0: dat = dat[np.newaxis] # np.asarray([ana_data['data']]) try: tempdata = data_group.require_dataset(name=ana_data['name'], shape=dat.shape, dtype=str(dat.dtype)) except TypeError: # Some Unicode types are not well-understood by h5py if 'U' in str(dat.dtype) or 'S' in str(dat.dtype): tempdata = data_group.require_dataset(name=ana_data['name'], shape=dat.shape, dtype=omsi_format_common.str_type) else: raise if dat.size > 0: try: tempdata[:] = dat except TypeError: tempdata[()] = dat else: warnings.warn(ana_data['name'] + " dataset generated but not written. The given dataset was empty.") except: warnings.warn("ERROR: " + str(ana_data['name']) + ": The data specified by the analysis could not be " + "converted to numpy for writing to HDF5")
def from_function(cls, analysis_function, output_names=None, parameter_specs=None, name_key="undefined"): """ Create a generic analysis class for a given analysis function. This functionality is useful to ease quick scripting on analyses but should not be used in production. NOTE: __analysis_function is a reserved parameter name used to store the analysis function and may not be used as an input parameter for the analysis function. :param analysis_function: The analysis function to be wrapped for provenance tracking and storage :param output_names: Optionally, define a list of the names of the outputs :param parameter_specs: Optional list of omsi.datastructures.analysis_data.parameter_data with additional information about the parameters of the function. :param name_key: The name for the analysis, i.e., the analysis identifier :return: A new generic analysis class """ log_helper.debug(__name__, "Creating generic analysis from function") ana_dtypes = data_dtypes.get_dtypes() generic_analysis = cls(name_key=name_key) generic_analysis.real_analysis_type = analysis_function.__code__.co_name function_argcount = analysis_function.__code__.co_argcount # Get the number of function parameters function_args = analysis_function.__code__.co_varnames[0:function_argcount] # Get the function arguments # Get the default values for the function parameters function_defaults = () if hasattr(analysis_function, 'func_defaults'): if analysis_function.func_defaults is not None: function_defaults = analysis_function.func_defaults function_nondefaults = function_argcount - len(function_defaults) default_pos = 0 # Add all parameters of the function to our generic analysis for varindex, varname in enumerate(function_args): # Determine the default value (if any) for the current parameter has_default = varindex >= function_nondefaults default = None if has_default: default = function_defaults[default_pos] default_pos += 1 # Check if the user has supplied an additional specification for the current parameter param_spec = None if parameter_specs is not None: for ps in parameter_specs: if isinstance(ps, dict) or isinstance(ps, parameter_data): if ps['name'] == varname: param_spec = ps else: raise ValueError("Invalid parameter specification. Spec is not a dict or parameter_data object") # Try to determine the dtype from the default values of the function dtype = None if default is not None: if isinstance(default, list) or isinstance(default, np.ndarray): dtype = ana_dtypes['ndarray'] elif isinstance(default, bool): dtype = ana_dtypes['bool'] elif isinstance(default, basestring): dtype=str else: for k, v in ana_dtypes.iteritems(): try: if isinstance(default, v): dtype = v break except: pass # Add the parameter to our analysis if param_spec is None: generic_analysis.add_parameter(name=varname, help=' ', dtype=dtype, default=default) else: generic_analysis.add_parameter( name=varname, help=' ' if 'help' not in param_spec else param_spec['help'], dtype=dtype if 'dtype' not in param_spec else param_spec['dtype'], required=(not has_default) if 'required' not in param_spec else param_spec['required'], default=default if 'default' not in param_spec else param_spec['default'], choices=None if 'choices' not in param_spec else param_spec['choices'], group=None if 'group' not in param_spec else param_spec['group'], data=None if 'data' not in param_spec else param_spec['data']) # Add the analysis function as an internal parameter to our analysis generic_analysis.add_parameter(name='__analysis_function', help='The analysis function we want to execute', dtype=ana_dtypes['ndarray']) # Assign the names of the outputs if output_names is not None: generic_analysis.data_names = output_names # Pickle out analysis function and save it generic_analysis['__analysis_function'] = np.fromstring(cloudpickle.dumps(analysis_function), cls.PICKLE_DTYPE) # Return our initalized analysis return generic_analysis
def __write_omsi_analysis_data__(cls, data_group, ana_data): """ Private helper function used to write the data defined by a analysis_data object to HDF5. :param data_group: The h5py data group to which the data should be written to. :param ana_data: The analysis_data object with the description of the data to be written. :type ana_data: omsi.analysis.analysis_data """ from omsi.datastructures.analysis_data import analysis_data, data_dtypes curr_dtype = ana_data['dtype'] try: if curr_dtype == data_dtypes.get_dtypes()['ndarray']: curr_dtype = ana_data['data'].dtype except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['bool']: curr_dtype = bool except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['str']: curr_dtype = omsi_format_common.str_type except TypeError: pass # Create link in HDF5 to an existing dataset within the file if isinstance(ana_data, analysis_data) and isinstance( ana_data['dtype'], int): if curr_dtype == ana_data.ana_hdf5link: linkobject = data_group.file.get(ana_data['data']) data_group[ana_data['name']] = linkobject omsiobj = omsi_file_common.get_omsi_object(linkobject) try: # Check if we already have a type attribute _ = data_group[ana_data['name']].attrs[ omsi_format_common.type_attribute] except: # Generate the type attribute from scratch if omsiobj is not None: omsiobjtype = omsiobj.__class__.__name__ else: omsiobjtype = "" data_group[ana_data['name']].attrs[ omsi_format_common.type_attribute] = omsiobjtype # Create a new string-type dataset elif (curr_dtype == omsi_format_common.str_type) or ( curr_dtype == h5py.special_dtype(vlen=str)): tempdata = data_group.require_dataset( name=unicode(ana_data['name']), shape=(1, ), dtype=omsi_format_common.str_type) if len(unicode(ana_data['data'])) > 0: if omsi_format_common.str_type_unicode: tempdata[0] = unicode(ana_data['data']) else: tempdata[0] = str(ana_data['data']) else: warnings.warn( "WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty." ) # Create a new dataset to store the current numpy-type dataset elif 'numpy' in str(type(ana_data['data'])): # Decide whether we want to enable chunking for the current # analysis dataset chunks = None if ana_data['data'].size > 1000: chunks = True # Write the current analysis dataset if ana_data['data'].dtype.type in [np.string_, np.unicode_]: tempdata = data_group.require_dataset( name=ana_data['name'], shape=ana_data['data'].shape, dtype=omsi_format_common.str_type, chunks=chunks) else: tempdata = data_group.require_dataset( name=ana_data['name'], shape=ana_data['data'].shape, dtype=ana_data['data'].dtype, chunks=chunks) if ana_data['data'].size > 0: try: tempdata[:] = ana_data['data'] except TypeError: tempdata[()] = ana_data['data'] else: warnings.warn( "WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty." ) # Unknown dtype. Attempt to convert the dataset to numpy and write it to # file. else: # Safely convert scalars to numpy but warn in case we see something else from omsi.datastructures.analysis_data import data_dtypes default_dtypes = data_dtypes.get_dtypes() if ana_data['dtype'] not in default_dtypes.keys( ) and ana_data['dtype'] not in default_dtypes.values(): warnings.warn( "WARNING: " + str(ana_data['name']) + ": The data specified by the analysis object is not " + "in numpy format. Attempting to convert the data to numpy") try: dat = np.asarray(ana_data['data']) if len(dat.shape) == 0: dat = dat[np.newaxis] # np.asarray([ana_data['data']]) try: tempdata = data_group.require_dataset( name=ana_data['name'], shape=dat.shape, dtype=str(dat.dtype)) except TypeError: # Some Unicode types are not well-understood by h5py if 'U' in str(dat.dtype) or 'S' in str(dat.dtype): tempdata = data_group.require_dataset( name=ana_data['name'], shape=dat.shape, dtype=omsi_format_common.str_type) else: raise if dat.size > 0: try: tempdata[:] = dat except TypeError: tempdata[()] = dat else: warnings.warn( ana_data['name'] + " dataset generated but not written. The given dataset was empty." ) except: warnings.warn( "ERROR: " + str(ana_data['name']) + ": The data specified by the analysis could not be " + "converted to numpy for writing to HDF5")
def from_function(cls, analysis_function, output_names=None, parameter_specs=None, name_key="undefined"): """ Create a generic analysis class for a given analysis function. This functionality is useful to ease quick scripting on analyses but should not be used in production. NOTE: __analysis_function is a reserved parameter name used to store the analysis function and may not be used as an input parameter for the analysis function. :param analysis_function: The analysis function to be wrapped for provenance tracking and storage :param output_names: Optionally, define a list of the names of the outputs :param parameter_specs: Optional list of omsi.datastructures.analysis_data.parameter_data with additional information about the parameters of the function. :param name_key: The name for the analysis, i.e., the analysis identifier :return: A new generic analysis class """ log_helper.debug(__name__, "Creating generic analysis from function") ana_dtypes = data_dtypes.get_dtypes() generic_analysis = cls(name_key=name_key) generic_analysis.real_analysis_type = analysis_function.__code__.co_name function_argcount = analysis_function.__code__.co_argcount # Get the number of function parameters function_args = analysis_function.__code__.co_varnames[ 0:function_argcount] # Get the function arguments # Get the default values for the function parameters function_defaults = () if hasattr(analysis_function, 'func_defaults'): if analysis_function.func_defaults is not None: function_defaults = analysis_function.func_defaults function_nondefaults = function_argcount - len(function_defaults) default_pos = 0 # Add all parameters of the function to our generic analysis for varindex, varname in enumerate(function_args): # Determine the default value (if any) for the current parameter has_default = varindex >= function_nondefaults default = None if has_default: default = function_defaults[default_pos] default_pos += 1 # Check if the user has supplied an additional specification for the current parameter param_spec = None if parameter_specs is not None: for ps in parameter_specs: if isinstance(ps, dict) or isinstance(ps, parameter_data): if ps['name'] == varname: param_spec = ps else: raise ValueError( "Invalid parameter specification. Spec is not a dict or parameter_data object" ) # Try to determine the dtype from the default values of the function dtype = None if default is not None: if isinstance(default, list) or isinstance( default, np.ndarray): dtype = ana_dtypes['ndarray'] elif isinstance(default, bool): dtype = ana_dtypes['bool'] elif isinstance(default, basestring): dtype = str else: for k, v in ana_dtypes.iteritems(): try: if isinstance(default, v): dtype = v break except: pass # Add the parameter to our analysis if param_spec is None: generic_analysis.add_parameter(name=varname, help=' ', dtype=dtype, default=default) else: generic_analysis.add_parameter( name=varname, help=' ' if 'help' not in param_spec else param_spec['help'], dtype=dtype if 'dtype' not in param_spec else param_spec['dtype'], required=(not has_default) if 'required' not in param_spec else param_spec['required'], default=default if 'default' not in param_spec else param_spec['default'], choices=None if 'choices' not in param_spec else param_spec['choices'], group=None if 'group' not in param_spec else param_spec['group'], data=None if 'data' not in param_spec else param_spec['data']) # Add the analysis function as an internal parameter to our analysis generic_analysis.add_parameter( name='__analysis_function', help='The analysis function we want to execute', dtype=ana_dtypes['ndarray']) # Assign the names of the outputs if output_names is not None: generic_analysis.data_names = output_names # Pickle out analysis function and save it generic_analysis['__analysis_function'] = np.fromstring( cloudpickle.dumps(analysis_function), cls.PICKLE_DTYPE) # Return our initalized analysis return generic_analysis