コード例 #1
0
ファイル: generic.py プロジェクト: biorack/BASTet
    def execute(self, **kwargs):
        """
        Overwrite the default implementation of execute to update parameter specifications/types
        when wrapping functions where the types are not known a priori.

        :param kwargs: Custom analysis parameters

        :return: The result of execute_analysis()
        """
        # Update the dtype of all the input parameters to ensure we save them correctly to file
        log_helper.debug(__name__, "Setting parameters based on the given inputs")
        ana_dtypes = data_dtypes.get_dtypes()
        for k, v in kwargs.iteritems():
            for param in self.parameters:
                if param['name'] == k:
                    if hasattr(v, 'dtype'):
                        param['dtype'] = ana_dtypes['ndarray']
                    else:
                        param['dtype'] = type(v)
        # Determine the custom parameters
        custom_parameters = kwargs

        # Execute the analysis as usual
        result = super(analysis_generic, self).execute(**custom_parameters)
        return result
コード例 #2
0
ファイル: base.py プロジェクト: biorack/BASTet
    def __init__(self,
                 analysis_objects=None):
        """
        Initialize the workflow executor

        :param analysis_objects: A list of analysis objects to be executed
        """
        super(workflow_executor_base, self).__init__()
        log_helper.debug(__name__, "Creating workflow executor")
        if analysis_objects is not None:
            if not isinstance(analysis_objects, list) and not isinstance(analysis_objects, set):
                analysis_objects = [analysis_objects, ]
        log_helper.log_var(__name__, analysis_objects=analysis_objects, level='DEBUG')
        self.run_info = run_info_dict()
        self.analysis_tasks = analysis_task_list(analysis_objects) \
            if analysis_objects is not None \
            else analysis_task_list()
        self.mpi_comm = mpi_helper.get_comm_world()
        self.mpi_root = 0
        self.workflow_identifier = "we"
        # self.parameters = []  # Inherited from parameter_manager and set in parent class

        dtypes = data_dtypes.get_dtypes()
        self.add_parameter(name='profile_time_and_usage',
                           help='Enable/disable profiling of time and usage of the whole workflow',
                           required=False,
                           default=False,
                           dtype=dtypes['bool'])
        self.add_parameter(name='profile_memory',
                           help='Enable/disable profiling of memory usage of the whole workflow',
                           required=False,
                           default=False,
                           dtype=dtypes['bool'])
コード例 #3
0
ファイル: generic.py プロジェクト: wholtz/BASTet
    def execute(self, **kwargs):
        """
        Overwrite the default implementation of execute to update parameter specifications/types
        when wrapping functions where the types are not known a priori.

        :param kwargs: Custom analysis parameters

        :return: The result of execute_analysis()
        """
        # Update the dtype of all the input parameters to ensure we save them correctly to file
        log_helper.debug(__name__,
                         "Setting parameters based on the given inputs")
        ana_dtypes = data_dtypes.get_dtypes()
        for k, v in kwargs.iteritems():
            for param in self.parameters:
                if param['name'] == k:
                    if hasattr(v, 'dtype'):
                        param['dtype'] = ana_dtypes['ndarray']
                    else:
                        param['dtype'] = type(v)
        # Determine the custom parameters
        custom_parameters = kwargs

        # Execute the analysis as usual
        result = super(analysis_generic, self).execute(**custom_parameters)
        return result
コード例 #4
0
ファイル: analysis.py プロジェクト: biorack/BASTet
    def __write_omsi_analysis_data__(cls,
                                     data_group,
                                     ana_data):
        """
        Private helper function used to write the data defined by a analysis_data object to HDF5.

        :param data_group: The h5py data group to which the data should be written to.
        :param ana_data: The analysis_data object with the description of the data to be written.
        :type ana_data: omsi.analysis.analysis_data
        """
        from omsi.datastructures.analysis_data import analysis_data, data_dtypes
        curr_dtype = ana_data['dtype']
        try:
            if curr_dtype == data_dtypes.get_dtypes()['ndarray']:
                curr_dtype = ana_data['data'].dtype
        except TypeError:
            pass
        try:
            if curr_dtype == data_dtypes.get_dtypes()['bool']:
                curr_dtype = bool
        except TypeError:
            pass
        try:
            if curr_dtype == data_dtypes.get_dtypes()['str']:
                curr_dtype =  omsi_format_common.str_type
        except TypeError:
            pass

        # Create link in HDF5 to an existing dataset within the file
        if isinstance(ana_data, analysis_data) and isinstance(ana_data['dtype'], int):
            if curr_dtype == ana_data.ana_hdf5link:
                linkobject = data_group.file.get(ana_data['data'])
                data_group[ana_data['name']] = linkobject
                omsiobj = omsi_file_common.get_omsi_object(linkobject)
                try:
                    # Check if we already have a type attribute
                    _ = data_group[ana_data['name']].attrs[omsi_format_common.type_attribute]
                except:
                    # Generate the type attribute from scratch
                    if omsiobj is not None:
                        omsiobjtype = omsiobj.__class__.__name__
                    else:
                        omsiobjtype = ""
                    data_group[ana_data['name']].attrs[
                        omsi_format_common.type_attribute] = omsiobjtype
        # Create a new string-type dataset
        elif (curr_dtype == omsi_format_common.str_type) or (curr_dtype == h5py.special_dtype(vlen=str)):
            tempdata = data_group.require_dataset(name=unicode(ana_data['name']),
                                                  shape=(1,),
                                                  dtype=omsi_format_common.str_type)
            if len(unicode(ana_data['data'])) > 0:
                if omsi_format_common.str_type_unicode:
                    tempdata[0] = unicode(ana_data['data'])
                else:
                    tempdata[0] = str(ana_data['data'])
            else:
                warnings.warn("WARNING: " + ana_data['name'] +
                              " dataset generated but not written. The given dataset was empty.")
        # Create a new dataset to store the current numpy-type dataset
        elif 'numpy' in str(type(ana_data['data'])):
            # Decide whether we want to enable chunking for the current
            # analysis dataset
            chunks = None
            if ana_data['data'].size > 1000:
                chunks = True
            # Write the current analysis dataset
            if ana_data['data'].dtype.type in [np.string_, np.unicode_]:
                tempdata = data_group.require_dataset(name=ana_data['name'],
                                                      shape=ana_data['data'].shape,
                                                      dtype=omsi_format_common.str_type,
                                                      chunks=chunks)
            else:
                tempdata = data_group.require_dataset(name=ana_data['name'],
                                                      shape=ana_data['data'].shape,
                                                      dtype=ana_data['data'].dtype,
                                                      chunks=chunks)
            if ana_data['data'].size > 0:
                try:
                    tempdata[:] = ana_data['data']
                except TypeError:
                    tempdata[()] = ana_data['data']
            else:
                warnings.warn("WARNING: " + ana_data['name'] +
                              " dataset generated but not written. The given dataset was empty.")
        # Unknown dtype. Attempt to convert the dataset to numpy and write it to
        # file.
        else:
            # Safely convert scalars to numpy but warn in case we see something else
            from omsi.datastructures.analysis_data import data_dtypes
            default_dtypes = data_dtypes.get_dtypes()
            if ana_data['dtype'] not in default_dtypes.keys() and ana_data['dtype'] not in default_dtypes.values():
                warnings.warn("WARNING: " + str(ana_data['name']) +
                              ": The data specified by the analysis object is not " +
                              "in numpy format. Attempting to convert the data to numpy")
            try:
                dat = np.asarray(ana_data['data'])
                if len(dat.shape) == 0:
                    dat = dat[np.newaxis]  # np.asarray([ana_data['data']])
                try:
                    tempdata = data_group.require_dataset(name=ana_data['name'],
                                                          shape=dat.shape,
                                                          dtype=str(dat.dtype))
                except TypeError:  # Some Unicode types are not well-understood by h5py
                    if 'U' in str(dat.dtype) or 'S' in str(dat.dtype):
                        tempdata = data_group.require_dataset(name=ana_data['name'],
                                                              shape=dat.shape,
                                                              dtype=omsi_format_common.str_type)
                    else:
                        raise
                if dat.size > 0:
                    try:
                        tempdata[:] = dat
                    except TypeError:
                        tempdata[()] = dat
                else:
                    warnings.warn(ana_data['name'] + " dataset generated but not written. The given dataset was empty.")
            except:
                warnings.warn("ERROR: " + str(ana_data['name']) +
                              ": The data specified by the analysis could not be " +
                              "converted to numpy for writing to HDF5")
コード例 #5
0
ファイル: generic.py プロジェクト: biorack/BASTet
    def from_function(cls, analysis_function, output_names=None, parameter_specs=None, name_key="undefined"):
        """
        Create a generic analysis class for a given analysis function.

        This functionality is useful to ease quick scripting on analyses but should not be used in production.

        NOTE: __analysis_function is a reserved parameter name used to store the analysis function and may
        not be used as an input parameter for the analysis function.

        :param analysis_function: The analysis function to be wrapped for provenance tracking and storage
        :param output_names: Optionally, define a list of the names of the outputs
        :param parameter_specs: Optional list of omsi.datastructures.analysis_data.parameter_data with
            additional information about the parameters of the function.
        :param name_key: The name for the analysis, i.e., the analysis  identifier

        :return: A new generic analysis class
        """
        log_helper.debug(__name__, "Creating generic analysis from function")
        ana_dtypes = data_dtypes.get_dtypes()
        generic_analysis = cls(name_key=name_key)
        generic_analysis.real_analysis_type = analysis_function.__code__.co_name
        function_argcount = analysis_function.__code__.co_argcount   # Get the number of function parameters
        function_args = analysis_function.__code__.co_varnames[0:function_argcount] # Get the function arguments
        # Get the default values for the function parameters
        function_defaults = ()
        if hasattr(analysis_function, 'func_defaults'):
            if analysis_function.func_defaults is not None:
                function_defaults = analysis_function.func_defaults
        function_nondefaults = function_argcount - len(function_defaults)
        default_pos = 0
        # Add all parameters of the function to our generic analysis
        for varindex, varname in enumerate(function_args):
            # Determine the default value (if any) for the current parameter
            has_default = varindex >= function_nondefaults
            default = None
            if has_default:
                default = function_defaults[default_pos]
                default_pos += 1
            # Check if the user has supplied an additional specification for the current parameter
            param_spec = None
            if parameter_specs is not None:
                for ps in parameter_specs:
                    if isinstance(ps, dict) or isinstance(ps, parameter_data):
                        if ps['name'] == varname:
                            param_spec = ps
                    else:
                        raise ValueError("Invalid parameter specification. Spec is not a dict or parameter_data object")
             # Try to determine the dtype from the default values of the function
            dtype = None
            if default is not None:
                if isinstance(default, list) or isinstance(default, np.ndarray):
                    dtype = ana_dtypes['ndarray']
                elif isinstance(default, bool):
                    dtype = ana_dtypes['bool']
                elif isinstance(default, basestring):
                    dtype=str
                else:
                    for k, v in ana_dtypes.iteritems():
                        try:
                            if isinstance(default, v):
                                dtype = v
                                break
                        except:
                            pass
            # Add the parameter to our analysis
            if param_spec is None:
                generic_analysis.add_parameter(name=varname,
                                               help=' ',
                                               dtype=dtype,
                                               default=default)
            else:
                generic_analysis.add_parameter(
                        name=varname,
                        help=' ' if 'help' not in param_spec else param_spec['help'],
                        dtype=dtype if 'dtype' not in param_spec else param_spec['dtype'],
                        required=(not has_default) if 'required' not in param_spec else param_spec['required'],
                        default=default if 'default' not in param_spec else param_spec['default'],
                        choices=None if 'choices' not in param_spec else param_spec['choices'],
                        group=None if 'group' not in param_spec else param_spec['group'],
                        data=None if 'data' not in param_spec else param_spec['data'])
        # Add the analysis function as an internal parameter to our analysis
        generic_analysis.add_parameter(name='__analysis_function',
                                       help='The analysis function we want to execute',
                                       dtype=ana_dtypes['ndarray'])
        # Assign the names of the outputs
        if output_names is not None:
            generic_analysis.data_names = output_names
        # Pickle out analysis function and save it
        generic_analysis['__analysis_function'] = np.fromstring(cloudpickle.dumps(analysis_function), cls.PICKLE_DTYPE)
        # Return our initalized analysis
        return generic_analysis
コード例 #6
0
    def __write_omsi_analysis_data__(cls, data_group, ana_data):
        """
        Private helper function used to write the data defined by a analysis_data object to HDF5.

        :param data_group: The h5py data group to which the data should be written to.
        :param ana_data: The analysis_data object with the description of the data to be written.
        :type ana_data: omsi.analysis.analysis_data
        """
        from omsi.datastructures.analysis_data import analysis_data, data_dtypes
        curr_dtype = ana_data['dtype']
        try:
            if curr_dtype == data_dtypes.get_dtypes()['ndarray']:
                curr_dtype = ana_data['data'].dtype
        except TypeError:
            pass
        try:
            if curr_dtype == data_dtypes.get_dtypes()['bool']:
                curr_dtype = bool
        except TypeError:
            pass
        try:
            if curr_dtype == data_dtypes.get_dtypes()['str']:
                curr_dtype = omsi_format_common.str_type
        except TypeError:
            pass

        # Create link in HDF5 to an existing dataset within the file
        if isinstance(ana_data, analysis_data) and isinstance(
                ana_data['dtype'], int):
            if curr_dtype == ana_data.ana_hdf5link:
                linkobject = data_group.file.get(ana_data['data'])
                data_group[ana_data['name']] = linkobject
                omsiobj = omsi_file_common.get_omsi_object(linkobject)
                try:
                    # Check if we already have a type attribute
                    _ = data_group[ana_data['name']].attrs[
                        omsi_format_common.type_attribute]
                except:
                    # Generate the type attribute from scratch
                    if omsiobj is not None:
                        omsiobjtype = omsiobj.__class__.__name__
                    else:
                        omsiobjtype = ""
                    data_group[ana_data['name']].attrs[
                        omsi_format_common.type_attribute] = omsiobjtype
        # Create a new string-type dataset
        elif (curr_dtype == omsi_format_common.str_type) or (
                curr_dtype == h5py.special_dtype(vlen=str)):
            tempdata = data_group.require_dataset(
                name=unicode(ana_data['name']),
                shape=(1, ),
                dtype=omsi_format_common.str_type)
            if len(unicode(ana_data['data'])) > 0:
                if omsi_format_common.str_type_unicode:
                    tempdata[0] = unicode(ana_data['data'])
                else:
                    tempdata[0] = str(ana_data['data'])
            else:
                warnings.warn(
                    "WARNING: " + ana_data['name'] +
                    " dataset generated but not written. The given dataset was empty."
                )
        # Create a new dataset to store the current numpy-type dataset
        elif 'numpy' in str(type(ana_data['data'])):
            # Decide whether we want to enable chunking for the current
            # analysis dataset
            chunks = None
            if ana_data['data'].size > 1000:
                chunks = True
            # Write the current analysis dataset
            if ana_data['data'].dtype.type in [np.string_, np.unicode_]:
                tempdata = data_group.require_dataset(
                    name=ana_data['name'],
                    shape=ana_data['data'].shape,
                    dtype=omsi_format_common.str_type,
                    chunks=chunks)
            else:
                tempdata = data_group.require_dataset(
                    name=ana_data['name'],
                    shape=ana_data['data'].shape,
                    dtype=ana_data['data'].dtype,
                    chunks=chunks)
            if ana_data['data'].size > 0:
                try:
                    tempdata[:] = ana_data['data']
                except TypeError:
                    tempdata[()] = ana_data['data']
            else:
                warnings.warn(
                    "WARNING: " + ana_data['name'] +
                    " dataset generated but not written. The given dataset was empty."
                )
        # Unknown dtype. Attempt to convert the dataset to numpy and write it to
        # file.
        else:
            # Safely convert scalars to numpy but warn in case we see something else
            from omsi.datastructures.analysis_data import data_dtypes
            default_dtypes = data_dtypes.get_dtypes()
            if ana_data['dtype'] not in default_dtypes.keys(
            ) and ana_data['dtype'] not in default_dtypes.values():
                warnings.warn(
                    "WARNING: " + str(ana_data['name']) +
                    ": The data specified by the analysis object is not " +
                    "in numpy format. Attempting to convert the data to numpy")
            try:
                dat = np.asarray(ana_data['data'])
                if len(dat.shape) == 0:
                    dat = dat[np.newaxis]  # np.asarray([ana_data['data']])
                try:
                    tempdata = data_group.require_dataset(
                        name=ana_data['name'],
                        shape=dat.shape,
                        dtype=str(dat.dtype))
                except TypeError:  # Some Unicode types are not well-understood by h5py
                    if 'U' in str(dat.dtype) or 'S' in str(dat.dtype):
                        tempdata = data_group.require_dataset(
                            name=ana_data['name'],
                            shape=dat.shape,
                            dtype=omsi_format_common.str_type)
                    else:
                        raise
                if dat.size > 0:
                    try:
                        tempdata[:] = dat
                    except TypeError:
                        tempdata[()] = dat
                else:
                    warnings.warn(
                        ana_data['name'] +
                        " dataset generated but not written. The given dataset was empty."
                    )
            except:
                warnings.warn(
                    "ERROR: " + str(ana_data['name']) +
                    ": The data specified by the analysis could not be " +
                    "converted to numpy for writing to HDF5")
コード例 #7
0
ファイル: generic.py プロジェクト: wholtz/BASTet
    def from_function(cls,
                      analysis_function,
                      output_names=None,
                      parameter_specs=None,
                      name_key="undefined"):
        """
        Create a generic analysis class for a given analysis function.

        This functionality is useful to ease quick scripting on analyses but should not be used in production.

        NOTE: __analysis_function is a reserved parameter name used to store the analysis function and may
        not be used as an input parameter for the analysis function.

        :param analysis_function: The analysis function to be wrapped for provenance tracking and storage
        :param output_names: Optionally, define a list of the names of the outputs
        :param parameter_specs: Optional list of omsi.datastructures.analysis_data.parameter_data with
            additional information about the parameters of the function.
        :param name_key: The name for the analysis, i.e., the analysis  identifier

        :return: A new generic analysis class
        """
        log_helper.debug(__name__, "Creating generic analysis from function")
        ana_dtypes = data_dtypes.get_dtypes()
        generic_analysis = cls(name_key=name_key)
        generic_analysis.real_analysis_type = analysis_function.__code__.co_name
        function_argcount = analysis_function.__code__.co_argcount  # Get the number of function parameters
        function_args = analysis_function.__code__.co_varnames[
            0:function_argcount]  # Get the function arguments
        # Get the default values for the function parameters
        function_defaults = ()
        if hasattr(analysis_function, 'func_defaults'):
            if analysis_function.func_defaults is not None:
                function_defaults = analysis_function.func_defaults
        function_nondefaults = function_argcount - len(function_defaults)
        default_pos = 0
        # Add all parameters of the function to our generic analysis
        for varindex, varname in enumerate(function_args):
            # Determine the default value (if any) for the current parameter
            has_default = varindex >= function_nondefaults
            default = None
            if has_default:
                default = function_defaults[default_pos]
                default_pos += 1
            # Check if the user has supplied an additional specification for the current parameter
            param_spec = None
            if parameter_specs is not None:
                for ps in parameter_specs:
                    if isinstance(ps, dict) or isinstance(ps, parameter_data):
                        if ps['name'] == varname:
                            param_spec = ps
                    else:
                        raise ValueError(
                            "Invalid parameter specification. Spec is not a dict or parameter_data object"
                        )
            # Try to determine the dtype from the default values of the function
            dtype = None
            if default is not None:
                if isinstance(default, list) or isinstance(
                        default, np.ndarray):
                    dtype = ana_dtypes['ndarray']
                elif isinstance(default, bool):
                    dtype = ana_dtypes['bool']
                elif isinstance(default, basestring):
                    dtype = str
                else:
                    for k, v in ana_dtypes.iteritems():
                        try:
                            if isinstance(default, v):
                                dtype = v
                                break
                        except:
                            pass
            # Add the parameter to our analysis
            if param_spec is None:
                generic_analysis.add_parameter(name=varname,
                                               help=' ',
                                               dtype=dtype,
                                               default=default)
            else:
                generic_analysis.add_parameter(
                    name=varname,
                    help=' '
                    if 'help' not in param_spec else param_spec['help'],
                    dtype=dtype
                    if 'dtype' not in param_spec else param_spec['dtype'],
                    required=(not has_default) if 'required' not in param_spec
                    else param_spec['required'],
                    default=default
                    if 'default' not in param_spec else param_spec['default'],
                    choices=None
                    if 'choices' not in param_spec else param_spec['choices'],
                    group=None
                    if 'group' not in param_spec else param_spec['group'],
                    data=None
                    if 'data' not in param_spec else param_spec['data'])
        # Add the analysis function as an internal parameter to our analysis
        generic_analysis.add_parameter(
            name='__analysis_function',
            help='The analysis function we want to execute',
            dtype=ana_dtypes['ndarray'])
        # Assign the names of the outputs
        if output_names is not None:
            generic_analysis.data_names = output_names
        # Pickle out analysis function and save it
        generic_analysis['__analysis_function'] = np.fromstring(
            cloudpickle.dumps(analysis_function), cls.PICKLE_DTYPE)
        # Return our initalized analysis
        return generic_analysis