Beispiel #1
0
    def _init_exec_context_set(self):
        """
        Initialize set of execution contexts, which is specified by the execution.context job
        parameters.

        Args:
            self: class instance

        Returns:
            On success: True.

        """
        # get explicit execution contexts from the job parameters
        self._exec_contexts = set(self._job['execution']['context'].values())

        # check validity of exec contexts
        for context in self._exec_contexts:
            if not Contexts.is_exec_context(context):
                msg = 'invalid exec context: {}'.format(context)
                Log.an().error(msg)
                return self._fatal(msg)

        Log.some().debug('execution contexts: %s', self._exec_contexts)

        return True
Beispiel #2
0
    def _init_data_context_set(self):
        """
        Initialize set of data contexts, which is determined by inputs and output.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # check input URIs for data contexts
        for input_key in self._workflow['inputs']:
            parsed_uri = URIParser.parse(self._workflow['inputs'][input_key]['value'][0])
            if not parsed_uri:
                msg = 'invalid input uri: {}'.format(
                    self._workflow['inputs'][input_key]['value'][0]
                )
                Log.an().error(msg)
                return self._fatal(msg)

            self._data_contexts.add(parsed_uri['scheme'])

        # add output URI data context
        parsed_output_uri = URIParser.parse(self._job['output_uri'])
        if not parsed_output_uri:
            msg = 'invalid base of job output uri: {}'.format(
                self._job['output_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        self._data_contexts.add(parsed_output_uri['scheme'])

        # check validity of data contexts
        for context in self._data_contexts:
            if not Contexts.is_data_context(context):
                msg = 'invalid data context: {}'.format(context)
                Log.an().error(msg)
                return self._fatal(msg)

        Log.some().debug('data contexts: %s', self._data_contexts)

        return True
Beispiel #3
0
    def _init_job_uris(self):
        """
        Initialize all work and output URIs.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # name of the job directory
        job_dir = slugify(self._job['name'], regex_pattern=r'[^-a-z0-9_]+')
        job_dir_hash = '{}-{}'.format(job_dir, self._job['job_id'][:8])

        # validate work URI for each exec context
        #   use the 'data_scheme' for each execution context
        #   and place into a set to remove repeats
        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        }:
            # work_uri must be set for each exec_context
            if context not in self._job['work_uri']:
                msg = 'missing work_uri for context: {}'.format(context)
                Log.an().error(msg)
                return self._fatal(msg)

            parsed_uri = URIParser.parse(self._job['work_uri'][context])
            if not parsed_uri:
                msg = 'invalid base of job work uri for context: {}->{}'.format(
                    context, self._job['work_uri'][context]
                )
                Log.an().error(msg)
                return self._fatal(msg)

            # append hashed job dir to each context
            full_job_work_uri = (
                '{}{}' if parsed_uri['chopped_path'] == '/' else '{}/{}'
            ).format(parsed_uri['chopped_uri'], job_dir_hash)

            # validate again after appending
            parsed_job_work_uri = URIParser.parse(full_job_work_uri)

            if not parsed_job_work_uri:
                msg = 'invalid job work uri for context: {}->{}'.format(
                    context, full_job_work_uri
                )
                Log.an().error(msg)
                return self._fatal(msg)

            self._parsed_job_work_uri[context] = parsed_job_work_uri


        # validate output URI
        parsed_uri = URIParser.parse(self._job['output_uri'])
        if not parsed_uri:
            msg = 'invalid base of job output uri: {}'.format(
                self._job['output_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # append job dir (hashed or not) to output uri
        full_job_output_uri = (
            '{}{}' if parsed_uri['chopped_path'] == '/' else '{}/{}'
        ).format(
            parsed_uri['chopped_uri'],
            job_dir if self._job['no_output_hash'] else job_dir_hash
        )

        # validate again after appending
        parsed_job_output_uri = URIParser.parse(full_job_output_uri)

        if not parsed_job_output_uri:
            msg = 'invalid job output uri: {}'.format(
                full_job_output_uri
            )
            Log.an().error(msg)
            return self._fatal(msg)

        self._parsed_job_output_uri = parsed_job_output_uri

        return True
Beispiel #4
0
    def _init_graph_structure(self):
        """
        Create empty nodes for each workflow input and step.

        Nodes contain attributes for type (e.g., input or step), contexts for
        data staging (e.g., local or agave), source context, and node.
        The node attribute is initialized as None, but will later be a
        reference to a WorkflowInput or WorkflowStep object.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        # add empty input nodes to graph
        for input_name in self._workflow['inputs']:

            # extract the input source context
            parsed_input_uri = URIParser.parse(
                self._workflow['inputs'][input_name]['value'])
            if not parsed_input_uri:
                msg = 'invalid input uri: {}'.format(
                    self._workflow['inputs'][input_name]['value'])
                Log.an().error(msg)
                raise WorkflowDAGException(msg)

            source_context = parsed_input_uri['scheme']

            try:
                self._graph.add_node('input.{}'.format(input_name),
                                     name='{}'.format(input_name),
                                     type='input',
                                     contexts={source_context: ''},
                                     source_context=source_context,
                                     exec_context=None,
                                     node=None)
            except nx.NetworkXException as err:
                msg = 'cannot add input node "{}" to graph [{}]'.format(
                    input_name, str(err))
                Log.an().error(msg)
                raise WorkflowDAGException(msg)

        # add empty step nodes to graph
        for step_name, step in self._workflow['steps'].items():

            # extract the step source context
            source_data_context = Contexts.get_data_scheme_of_exec_context(
                step['execution']['context'])
            if not source_data_context:
                msg = 'invalid execution context ({}) for step {}'.format(
                    step['execution']['context'], step_name)
                Log.an().error(msg)
                raise WorkflowDAGException(msg)

            contexts = {source_data_context: ''}
            if step_name in self._workflow['final_output']:
                contexts['final'] = ''

            try:
                self._graph.add_node('step.{}'.format(step_name),
                                     name='{}'.format(step_name),
                                     type='step',
                                     step=step,
                                     contexts=contexts,
                                     source_context=source_data_context,
                                     exec_context=step['execution']['context'],
                                     node=None)
            except nx.NetworkXException as err:
                msg = 'cannot add step node "{}" to graph [{}]'.format(
                    step_name, str(err))
                Log.an().error(msg)
                raise WorkflowDAGException(msg)

        # create graph edges and determine contexts for each node based on
        #   dependencies
        for step_name, step in self._workflow['steps'].items():

            # name of this step node
            step_node = 'step.{}'.format(step_name)

            # get all input or step dependencies for this step
            deps = self._get_step_dependencies(step)

            for dep in deps:

                if deps[dep]['name'] == 'workflow':
                    # input or parameter dependency
                    input_node = 'input.{}'.format(deps[dep]['var'])

                    # only add edge if it's an input (not a parameter)
                    if input_node in self._graph.nodes:
                        # add graph edge from input to step
                        try:
                            self._graph.add_edge(input_node, step_node)
                        except nx.NetworkXException as err:
                            msg = ('cannot add edge from node "{}" to '
                                   'node "{}" [{}]').format(
                                       input_node, step_node, str(err))
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        # add context key to dict for input node
                        self._graph.nodes[input_node]['contexts'][
                            Contexts.get_data_scheme_of_exec_context(
                                step['execution']['context'])] = ''

                    else:
                        # if input not found, make sure var refers to
                        # a parameter
                        if not deps[dep]['var'] in self._parameters:
                            msg = ('invalid dependency for step "{}", '
                                   'parameter or input "{}" does not exist'
                                   ).format(step_name, deps[dep]['var'])
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                else:
                    # step dependency
                    depend_node = 'step.{}'.format(deps[dep]['name'])

                    if not self._graph.has_node(depend_node):
                        msg = ('invalid dependency for step "{}", '
                               'step "{}" does not exist').format(
                                   step_name, depend_node)
                        Log.an().error(msg)
                        raise WorkflowDAGException(msg)

                    # add graph edge from step to step
                    try:
                        self._graph.add_edge(depend_node, step_node)
                    except nx.NetworkXException as err:
                        msg = ('cannot add edge from node "{}" to '
                               'node "{}" [{}]').format(
                                   depend_node, step_node, str(err))
                        Log.an().error(msg)
                        raise WorkflowDAGException(msg)

                    # add context key to dict for depend node
                    self._graph.nodes[depend_node]['contexts'][
                        Contexts.get_data_scheme_of_exec_context(
                            step['execution']['context'])] = ''
Beispiel #5
0
    def _init_context_uris(self):
        """
        Generate all context URIs for this workflow run.

        Context URIs are generated based on contexts given in
        _parsed_job_work_uri, and the "final" context for steps given in the
        _parsed_job_output_uri.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        self._context_uris['inputs'] = {}
        self._context_uris['steps'] = {'final': {}}
        self._parsed_context_uris['inputs'] = {}
        self._parsed_context_uris['steps'] = {'final': {}}

        # init all data contexts
        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        } | self._data_contexts:

            self._context_uris['inputs'][context] = {}
            self._parsed_context_uris['inputs'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'input':
                    if node['source_context'] == context:
                        # use original input URI
                        parsed_uri = URIParser.parse(
                            self._workflow['inputs'][node['name']]['value'])
                        if not parsed_uri:
                            msg = 'invalid input uri: {}'.format(
                                self._workflow['inputs'][
                                    node['name']]['value'])
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = parsed_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = parsed_uri

                    else:
                        # skip if _parsed_job_work_uri is not defined for this context
                        # this implies that there is no execution defined for that context,
                        # so no need to setup the data staging location at the work_uri
                        if context not in self._parsed_job_work_uri:
                            continue

                        # switch context of input URI
                        new_base_uri = '{}/_input-{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'],
                                    regex_pattern=r'[^-a-z0-9_]+'))

                        # create new base URI
                        if not DataManager.mkdir(
                                uri=new_base_uri,
                                recursive=True,
                                **{context: self._context_options[context]}):
                            msg = 'cannot create new base uri for input: {}'\
                                .format(new_base_uri)
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        # switch input URI base
                        switched_uri = URIParser.switch_context(
                            self._workflow['inputs'][node['name']]['value'],
                            new_base_uri)
                        if not switched_uri:
                            msg = (
                                'cannot switch input uri context to '
                                'new base URI: {}->{}'
                            ).format(
                                self._workflow['inputs'][node['name']]\
                                    ['value'],
                                new_base_uri
                            )
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = switched_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = switched_uri

        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        }:

            self._context_uris['steps'][context] = {}
            self._parsed_context_uris['steps'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'step':
                    self._context_uris['steps'][context][node['name']]\
                        = '{}/{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')
                        )
                    self._parsed_context_uris['steps'][context][node['name']]\
                        = URIParser.parse(
                            self._context_uris['steps'][context][node['name']]
                        )

        # init final contexts for steps
        for node_name in self._topo_sort:

            node = self._graph.nodes[node_name]

            if node['type'] == 'step':
                self._context_uris['steps']['final'][node['name']]\
                    = '{}/{}'.format(
                        self._parsed_job_output_uri['chopped_uri'],
                        slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')
                    )
                self._parsed_context_uris['steps']['final'][node['name']]\
                    = URIParser.parse(
                        self._context_uris['steps']['final'][node['name']]
                    )