Exemple #1
0
 def from_parsed_config(cls, pipeline_config, pipeline):
     """
     Create a Step instance from a parsed Pipeline configuration file 
     `pipeline_config` which specifies the Step steps, data directories as 
     well as the Step configuration file path.
     
     If a file called <self.name>.spec is found in the same directory as the
     Step class source code, then the Step configuration file is validated 
     against the spec file.
     """
     # First understand which Step (sub)class we need to instantiate. The 
     # class name is given in full Python package notation, e.g.
     #   package.subPackage.subsubpackage.className
     # this means that
     #   1. We HAVE to be able to say 
     #       from package.subPackage.subsubpackage import className
     #   2. The resulting Python class MUST be a subclass of Step.
     step_class = utilities.import_class(pipeline_config['python_class'],
                                          subclassof=cls)
     
     # Now, we have the right Python class for our Step, we just need to
     # get to the corresponding config file and we are done.
     step_config = {}
     step_config_file = pipeline_config.get('config_file', None)
     
     # Do we have a spec file? If so, do parameter and input/output key 
     # validation as well. If not keep going.
     step_spec_file = utilities.find_spec_file(step_class)
     if(not step_spec_file):
         pipeline.log.debug("No spec file for Step %s." \
                            % (pipeline_config['name']))
     else:
         pipeline.log.debug("Step %s specfile: %s" \
                            % (pipeline_config['name'], step_spec_file))
     # Now do the actual parsing and, if we do have a spec file, validate as 
     # well.
     if(step_config_file):
         step_config = config_parser.loads(step_config_file, 
                                            specfile=step_spec_file)
         parameters = step_config.get('parameters', {})
     
     # Now we have everything we need to create a Step instance.
     return(step_class(name=pipeline_config['name'], 
                        pipeline=pipeline,
                        input_info=pipeline_config.get('input', []),
                        output_info=pipeline_config.get('output', []),
                        **parameters))
Exemple #2
0
def pipeline_from_config_file(config_file):
    """
    Create a Pipeline instance from a ConfigObj/INI configuration file 
    `config_file` which specifies the Pipeline steps, data directories
    etc.
    """
    # Do we have a spec file? If so, do parameter and input/output key 
    # validation as well. If not keep going.
    spec_file = utilities.find_spec_file(Pipeline)
    
    # Now do the actual parsing and, if we do have a spec file, validate as 
    # well.
    parsed = config_parser.loads(config_file, 
                                 specfile=spec_file)['pipeline']
    
    # Create a Pipeline instance with no steps, we will add them later.
    pipe = Pipeline(name=parsed['name'],
                    system=parsed['system'],
                    log_level=parsed.get('log_level', DEFAULT_LOG_LEVEL),
                    local_logs=parsed.get('local_log_mode', DEFAULT_LOCAL_LOGS))
    
    # The only thing that requires special handling is the steps array. 
    # Here we have to create Step instances of the appropriate class and
    # pass the appropriate Step config file to them.
    # Also, as part of the "steps" list, we have hints on which data each 
    # Step produces and which data it consumes. In order to transfer these
    # pieces of data in-memory between steps we have a simple architecture.
    # We have a dictionary at the Pipeline level where data is put and
    # possibly updated. This is the clipboard. Then before executing each 
    # Step, the data the Step needs in input is put in Step.inbox which
    # is a list. Elements are put in that list in the order they are defined
    # in that Step section of the Pipeline configuration file (inbox 
    # parameter). After the Step completes, data from Step.outbox is 
    # fetched and put in the clipboard. Data in Step.outbox is assumed to 
    # be in the order defined in that Step section of the Pipeline 
    # configuration file (outbox parameter).
    steps = [Step.from_parsed_config(x, pipe) for x in parsed['steps']]
    
    # Finally update the pipe.steps list. We did this so that the Step 
    # instances could make use in their initialization, of whatever they
    # needed to pull from the Pipeline object they belong to.
    pipe.configure(steps)
    return(pipe)
Exemple #3
0
def pipeline_from_config_file(config_file):
    """
    Create a Pipeline instance from a ConfigObj/INI configuration file 
    `config_file` which specifies the Pipeline steps, data directories
    etc.
    """
    # Do we have a spec file? If so, do parameter and input/output key
    # validation as well. If not keep going.
    spec_file = utilities.find_spec_file(Pipeline)

    # Now do the actual parsing and, if we do have a spec file, validate as
    # well.
    parsed = config_parser.loads(config_file, specfile=spec_file)['pipeline']

    # Create a Pipeline instance with no steps, we will add them later.
    pipe = Pipeline(name=parsed['name'],
                    system=parsed['system'],
                    log_level=parsed.get('log_level', DEFAULT_LOG_LEVEL),
                    local_logs=parsed.get('local_log_mode',
                                          DEFAULT_LOCAL_LOGS))

    # The only thing that requires special handling is the steps array.
    # Here we have to create Step instances of the appropriate class and
    # pass the appropriate Step config file to them.
    # Also, as part of the "steps" list, we have hints on which data each
    # Step produces and which data it consumes. In order to transfer these
    # pieces of data in-memory between steps we have a simple architecture.
    # We have a dictionary at the Pipeline level where data is put and
    # possibly updated. This is the clipboard. Then before executing each
    # Step, the data the Step needs in input is put in Step.inbox which
    # is a list. Elements are put in that list in the order they are defined
    # in that Step section of the Pipeline configuration file (inbox
    # parameter). After the Step completes, data from Step.outbox is
    # fetched and put in the clipboard. Data in Step.outbox is assumed to
    # be in the order defined in that Step section of the Pipeline
    # configuration file (outbox parameter).
    steps = [Step.from_parsed_config(x, pipe) for x in parsed['steps']]

    # Finally update the pipe.steps list. We did this so that the Step
    # instances could make use in their initialization, of whatever they
    # needed to pull from the Pipeline object they belong to.
    pipe.configure(steps)
    return (pipe)
Exemple #4
0
 def from_config_file(cls, config_file, pipeline=DEFAULT_PIPELINE, name=''):
     """
     Create a Step instance from a ConfigObj/INI configuration file 
     `config_file` which specifies the Pipeline steps, data directories
     etc.
     
     This is used in scripts where users create Steps manually without using
     a Pipeline class. In these cases, we just use teh default Pipeline 
     instance created for us by 
     """
     # Since we do not have a proper Pipeline instance with its configuration
     # file to give us our name, we will generate one, based on the number of
     # Steps already added to `pipeline`.
     if(not name and not [s.name for s in pipeline.steps if s.name == name]):
         name = 'Step%06d' % (len(pipeline.steps))
     
     # Do we have a spec file? If so, do parameter and input/output key 
     # validation as well. If not keep going.
     spec_file = utilities.find_spec_file(cls)
     if(not spec_file):
         pipeline.log.debug("No spec file for Step %s." % ('name'))
     else:
         pipeline.log.debug("Step %s specfile: %s" % (name, spec_file))
     # Now do the actual parsing and, if we do have a spec file, validate as 
     # well.
     config = config_parser.loads(config_file, specfile=spec_file)
     parameters = config.get('parameters', {})
     
     # Create the Step instance.
     step_instance = cls(name=name, 
                         pipeline=pipeline, 
                         input_info=[], 
                         output_info=[],
                         **parameters)
     
     # Add the step instance to pipeline.steps.
     pipeline.steps.append(step_instance)
     
     # Now we have everything we need to create a Step instance.
     return(step_instance)