コード例 #1
0
    def test_task_get_stdout(self):
        """
        test gbdx.workflows.get_stdout(<workflow_id>,<task_id>)
        """
        wf = Workflow()

        output = wf.get_stdout('4488969848362445219', '4488969848354891944')

        self.assertTrue(len(output) > 0)
コード例 #2
0
    def test_task_get_stdout(self):
        """
        test gbdx.workflows.get_stdout(<workflow_id>,<task_id>)
        """
        wf = Workflow(self.gbdx)

        output = wf.get_stdout('4488969848362445219','4488969848354891944')

        self.assertTrue(len(output) > 0)
コード例 #3
0
class Workflow(object):
    def __init__(self, tasks, **kwargs):
        self.workflow = WF()
        self.name = kwargs.get('name', str(uuid.uuid4())[:8] )
        self.id = None
        self.callback = kwargs.get('callback', None )

        self.definition = None

        self.tasks = tasks

        batch_values = []

        for task in self.tasks:
            if task.batch_values:
                batch_values.extend(task.batch_values)

        if batch_values:
            self.batch_values = batch_values
        else:
            self.batch_values = None

    def savedata(self, output, location=None):
        '''
        Save output data from any task in this workflow to S3

        Args:
               output: Reference task output (e.g. task.inputs.output1).

               location (optional): Subfolder under which the output will be saved.
                                    It will be placed under the account directory in gbd-customer-data bucket:
                                    s3://gbd-customer-data/{account_id}/{location}
                                    Leave blank to save to: workflow_output/{workflow_id}/{task_name}/{port_name}

        Returns:
            None
        '''

        output.persist = True
        if location:
            output.persist_location = location

    def workflow_skeleton(self):
        return {
            "tasks": [],
            "name": self.name
        }

    def list_workflow_outputs(self):
        '''
        Get a list of outputs from the workflow that are saved to S3. To get resolved locations call workflow status.
        Args:
            None

        Returns:
            list
        '''
        workflow_outputs = []
        for task in self.tasks:
            for output_port_name in task.outputs._portnames:
                if task.outputs.__getattribute__(output_port_name).persist:
                    workflow_outputs.append(task.name + ':' + output_port_name)

        return workflow_outputs

    def generate_workflow_description(self):
        '''
        Generate workflow json for launching the workflow against the gbdx api

        Args:
            None

        Returns:
            json string
        '''
        if not self.tasks:
            raise WorkflowError('Workflow contains no tasks, and cannot be executed.')

        self.definition = self.workflow_skeleton()

        if self.batch_values:
            self.definition["batch_values"] = self.batch_values

        all_input_port_values = [t.inputs.__getattribute__(input_port_name).value for t in self.tasks for
                                 input_port_name in t.inputs._portnames]
        for task in self.tasks:
            # only include multiplex output ports in this task if other tasks refer to them in their inputs.
            # 1. find the multplex output port_names in this task
            # 2. see if they are referred to in any other tasks inputs
            # 3. If not, exclude them from the workflow_def
            output_multiplex_ports_to_exclude = []
            multiplex_output_port_names = [portname for portname in task.outputs._portnames if
                                           task.outputs.__getattribute__(portname).is_multiplex]
            for p in multiplex_output_port_names:
                output_port_reference = 'source:' + task.name + ':' + p
                if output_port_reference not in all_input_port_values:
                    output_multiplex_ports_to_exclude.append(p)

            task_def = task.generate_task_workflow_json(
                output_multiplex_ports_to_exclude=output_multiplex_ports_to_exclude)
            self.definition['tasks'].append(task_def)

        if self.callback:
            self.definition['callback'] = self.callback

        return self.definition

    def execute(self):
        '''
        Execute the workflow.

        Args:
            None

        Returns:
            Workflow_id
        '''
        # if not self.tasks:
        #     raise WorkflowError('Workflow contains no tasks, and cannot be executed.')

        # for task in self.tasks:
        #     self.definition['tasks'].append( task.generate_task_workflow_json() )

        self.generate_workflow_description()

        # hit batch workflow endpoint if batch values
        if self.batch_values:
            self.id = self.workflow.launch_batch_workflow(self.definition)

        # use regular workflow endpoint if no batch values
        else:
            self.id = self.workflow.launch(self.definition)

        return self.id

    @property
    def task_ids(self):
        '''
        Get the task IDs of a running workflow

        Args:
            None

        Returns:
            List of task IDs
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot get task IDs.')

        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for task IDs.")

        wf = self.workflow.get(self.id)

        return [task['id'] for task in wf['tasks']]

    @task_ids.setter
    def task_ids(self, value):
        raise NotImplementedError("Cannot set workflow task_ids, readonly.")


    def cancel(self):
        '''
        Cancel a running workflow.

        Args:
            None

        Returns:
            None
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot cancel.')

        if self.batch_values:
            self.workflow.batch_workflow_cancel(self.id)
        else:
            self.workflow.cancel(self.id)

    @property
    def status(self):
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot check status.')

        if self.batch_values:
            status = self.workflow.batch_workflow_status(self.id)
        else:
            status = self.workflow.status(self.id)

        return status

    @status.setter
    def status(self, value):
        raise NotImplementedError("Cannot set workflow status, readonly.")

    @property
    def events(self):
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot check status.')
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Events")
        return self.workflow.events(self.id)

    @events.setter
    def events(self, value):
        raise NotImplementedError("Cannot set workflow events, readonly.")

    @property
    def complete(self):
        if not self.id:
            return False

        # check if all sub workflows are either done, failed, or timedout
        if self.batch_values:
            return all(workflow.get("state") in ["succeeded", "failed", "timedout"] for workflow in
                       self.status['workflows'])
        else:
            return self.status['state'] == 'complete'

    @complete.setter
    def complete(self, value):
        raise NotImplementedError("Cannot set workflow complete, readonly.")

    @property
    def failed(self):
        if not self.id:
            return False
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Current State")
        status = self.status
        return status['state'] == 'complete' and status['event'] == 'failed'

    @failed.setter
    def failed(self, value):
        raise NotImplementedError("Cannot set workflow failed, readonly.")

    @property
    def canceled(self):
        if not self.id:
            return False
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Current State")
        status = self.status
        return status['state'] == 'complete' and status['event'] == 'canceled'

    @canceled.setter
    def canceled(self, value):
        raise NotImplementedError("Cannot set workflow canceled, readonly.")

    @property
    def succeeded(self):
        if not self.id:
            return False

        # check if all sub workflows are succeeded
        if self.batch_values:
            return all(workflow.get("state") == "succeeded" for workflow in self.status['workflows'])

        status = self.status
        return status['state'] == 'complete' and status['event'] == 'succeeded'

    @succeeded.setter
    def succeeded(self, value):
        raise NotImplementedError("Cannot set workflow succeeded, readonly.")

    @property
    def running(self):
        if not self.id:
            return False
        if self.batch_values:
            # check if any sub workflows are running
            return any(workflow.get("state") not in ["succeeded", "failed", "timedout"] for workflow in
                       self.status['workflows'])
        status = self.status
        return status['state'] == 'running' and status['event'] == 'started'

    @running.setter
    def running(self, value):
        raise NotImplementedError("Cannot set workflow running, readonly.")

    @property
    def timedout(self):
        if not self.id:
            return False
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Current State")
        status = self.status
        return status['state'] == 'complete' and status['event'] == 'timedout'

    @timedout.setter
    def timedout(self, value):
        raise NotImplementedError("Cannot set workflow timedout, readonly.")

    @property
    def stdout(self):
        '''
        Get stdout from all the tasks of a workflow.

        Args:
            None

        Returns:
            List of tasks with their stdout, formatted like this:
            [
                {
                    "id": "4488895771403082552",
                    "taskType": "AOP_Strip_Processor",
                    "name": "Task1",
                    "stdout": "............"
                }
            ]
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot get stdout.')
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for stdout.")

        wf = self.workflow.get(self.id)

        stdout_list = []
        for task in wf['tasks']:
            stdout_list.append(
                {
                    'id': task['id'],
                    'taskType': task['taskType'],
                    'name': task['name'],
                    'stdout': self.workflow.get_stdout(self.id, task['id'])
                }
            )

        return stdout_list

    @stdout.setter
    def stdout(self, value):
        raise NotImplementedError("Cannot set workflow stdout, readonly.")

    @property
    def stderr(self):
        '''
        Get stderr from all the tasks of a workflow.

        Args:
            None

        Returns:
            List of tasks with their stderr, formatted like this:
            [
                {
                    "id": "4488895771403082552",
                    "taskType": "AOP_Strip_Processor",
                    "name": "Task1",
                    "stderr": "............"
                }
            ]
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot get stderr.')
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for stderr.")

        wf = self.workflow.get(self.id)

        stderr_list = []
        for task in wf['tasks']:
            stderr_list.append(
                {
                    'id': task['id'],
                    'taskType': task['taskType'],
                    'name': task['name'],
                    'stderr': self.workflow.get_stderr(self.id, task['id'])
                }
            )

        return stderr_list

    @stderr.setter
    def stderr(self, value):
        raise NotImplementedError("Cannot set workflow stderr, readonly.")