コード例 #1
0
    def execute_script(self, args, context):
        """Execute a R script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get R script from user arguments
        source = args.get_value(cmd.PARA_R_SOURCE)
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        outputs = ModuleOutputs()
        
        mimir_table_names = dict()
        for ds_name_o in context.datasets:
            dataset_id = context.datasets[ds_name_o]
            dataset = context.datastore.get_dataset(dataset_id)
            if dataset is None:
                raise ValueError('unknown dataset \'' + ds_name_o + '\'')
            mimir_table_names[ds_name_o] = dataset.identifier
        # Run the r code
        try:
            evalresp = mimir.evalR(mimir_table_names, source)
            ostd = evalresp['stdout']
            oerr = evalresp['stderr']
            if not ostd == '':
                outputs.stdout.append(HtmlOutput(ostd))
            if not oerr == '':
                outputs.stderr.append(TextOutput(oerr))
        except Exception as ex:
            outputs.error(ex)
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(HtmlOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
        provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(
            is_success=(len(outputs.stderr) == 0),
            outputs=outputs,
            provenance=provenance
        )
コード例 #2
0
    def compute_empty_dataset(self, args, context):
        """Execute empty dataset command.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        outputs = ModuleOutputs()
        default_columns = [("''", "unnamed_column")]
        ds_name = args.get_value(pckg.PARA_NAME).lower()
        if ds_name in context.datasets:
            raise ValueError('dataset \'' + ds_name + '\' exists')
        if not is_valid_name(ds_name):
            raise ValueError('invalid dataset name \'' + ds_name + '\'')
        try:
            source = "SELECT {};".format(", ".join(
                default_val + " AS " + col_name
                for default_val, col_name in default_columns))
            view_name, dependencies = mimir.createView(dict(), source)

            columns = [
                MimirDatasetColumn(identifier=col_id,
                                   name_in_dataset=col_defn[1])
                for col_defn, col_id in zip(default_columns,
                                            range(len(default_columns)))
            ]

            ds = context.datastore.register_dataset(table_name=view_name,
                                                    columns=columns,
                                                    row_counter=1)
            provenance = ModuleProvenance(
                write={
                    ds_name:
                    DatasetDescriptor(identifier=ds.identifier,
                                      columns=ds.columns,
                                      row_count=ds.row_count)
                },
                read=dict(
                )  # Need to explicitly declare a lack of dependencies.
            )
            outputs.stdout.append(
                TextOutput("Empty dataset '{}' created".format(ds_name)))
        except Exception as ex:
            provenance = ModuleProvenance()
            outputs.error(ex)
        return ExecResult(is_success=(len(outputs.stderr) == 0),
                          outputs=outputs,
                          provenance=provenance)
コード例 #3
0
 def test_init(self):
     """test getter and setter methods for output streams."""
     # Ensure that lists are initialized properly
     out = ModuleOutputs()
     self.assertEqual(len(out.stderr), 0)
     self.assertEqual(len(out.stdout), 0)
     out.stdout.append(TextOutput(value='Hello World'))
     out.stderr.append(OutputObject(type='ERROR', value='Some Error'))
     out = ModuleOutputs(stdout=out.stdout, stderr=out.stderr)
     self.assertEqual(len(out.stderr), 1)
     self.assertEqual(len(out.stdout), 1)
     self.assertTrue(out.stdout[0].is_text)
     self.assertFalse(out.stderr[0].is_text)
コード例 #4
0
    def execute_script(self, args, context):
        """Execute a Markdown script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Markdown script from user arguments
        source = args.get_value(cmd.PARA_MARKDOWN_SOURCE)
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        outputs = ModuleOutputs()
        # Run the markdown code
        try:
            #we should validate the markdown here
            ostd = source
            oerr = ''
            if not ostd == '':
                outputs.stdout.append(MarkdownOutput(ostd))
            if not oerr == '':
                outputs.stderr.append(TextOutput(oerr))
        except Exception as ex:
            outputs.error(ex)
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(MarkdownOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
        provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(is_success=(len(outputs.stderr) == 0),
                          outputs=outputs,
                          provenance=provenance)
コード例 #5
0
def exec_command(task_id, command, context, processor):
    """The function executes a given task using a package task processor.
    Returns a pair of task identifier and execution result.

    Parameters
    ----------
    task_id: string
        Unique task identifier
    command : vizier.viztrail.command.ModuleCommand
        Specification of the command that is to be executed
    context: vizier.engine.task.base.TaskContext
        Context for the executed task
    processor: vizier.engine.task.processor.TaskProcessor
        Task processor to execute the given command

    Returns
    -------
    (string, vizier.engine.task.processor.ExecResult)
    """
    try:
        result = processor.compute(
            command_id=command.command_id,
            arguments=command.arguments,
            context=context
        )
    except Exception as ex:
        outputs = ModuleOutputs().error(ex)
        result = ExecResult(is_success=False, outputs=outputs)
    return task_id, result
コード例 #6
0
    def __init__(self,
                 is_success: bool = True,
                 outputs: ModuleOutputs = ModuleOutputs(),
                 provenance: ModuleProvenance = ModuleProvenance(),
                 updated_arguments: ModuleArguments = None):
        """Initialize the result components.

        Parameters
        ----------
        is_success: bool
            Flag indicating if execution was successful
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Outputs to STDOUT and STDERR generated during task execution
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen
            during task execution.
        updated_arguments: vizier.viztrail.command.ModuleArguments, optional
            If provided, the module's arguments will be overridden by the provided
            argument list.  This functionality should *only* be used when the module
            needs to infer/guess some of its arguments (e.g., Load Dataset needs to 
            actually try to load the dataset to have type/name information for 
            columns).  If updated arguments are provided, it is up to the processor 
            to guarantee that the updated arguments are *idempotent* with its 
            current execution (although idempotence with changes to the data and/or
            processor implementation need not be enforced.)
        """
        self.is_success = is_success
        self.outputs = outputs
        self.provenance = provenance
        self.updated_arguments = updated_arguments
コード例 #7
0
def OUTPUTS(obj):
    """Convert a set of module output streams from the default dictionary
    serialization into a ModuleOutputs object.

    Raises a ValueError if the given dictionary is not a proper output stream
    serialization.

    Parameters
    ----------
    obj: dict
        Default output serialization for a pair of module output streams

    Returns
    -------
    vizier.viztrail.module.output.ModuleOutputs
    """
    try:
        return ModuleOutputs(stdout=[
            OutputObject(type=o['type'], value=o['value'])
            for o in obj['stdout']
        ],
                             stderr=[
                                 OutputObject(type=o['type'], value=o['value'])
                                 for o in obj['stderr']
                             ])
    except KeyError as ex:
        raise ValueError(ex)
コード例 #8
0
    def compute_drop_dataset(self, args, context):
        """Execute drop dataset command.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get dataset name and remove the associated entry from the
        # dictionary of datasets in the context. Will raise exception if the
        # specified dataset does not exist.
        ds_name = args.get_value(pckg.PARA_DATASET).lower()
        ds = context.get_dataset(ds_name)
        datasets = dict(context.datasets)
        del datasets[ds_name]
        return ExecResult(outputs=ModuleOutputs(
            stdout=[TextOutput('Dataset \'' + ds_name + '\' deleted')]),
                          provenance=ModuleProvenance(read=dict(),
                                                      write=dict(),
                                                      delete=[ds_name]))
コード例 #9
0
 def test_single_append(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     ts = get_current_time()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=ts,
                                   started_at=ts,
                                   finished_at=ts),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     # We expect that there exists a file for the workflow handle and one for
     # the new module
     self.assertTrue(
         os.path.isfile(os.path.join(branch.base_path, wf.identifier)))
     self.assertTrue(
         os.path.isfile(os.path.join(wf.modules[-1].module_path)))
     # Load the viztrail and get the module at the branch head
     vt = OSViztrailHandle.load_viztrail(base_path)
     module = vt.get_default_branch().get_head().modules[-1]
     self.assertEqual(module.external_form, 'print 2+2')
     self.assertEqual(module.outputs.stdout[-1].value, '4')
コード例 #10
0
ファイル: module.py プロジェクト: mikebrachmann/web-api-async
    def set_success(self, 
            finished_at: datetime = get_current_time(), 
            outputs: ModuleOutputs = ModuleOutputs(), 
            provenance: ModuleProvenance = ModuleProvenance(),
            updated_arguments: Optional[ModuleArguments] = None
        ):
        """Set status of the module to success. The finished_at property of the
        timestamp is set to the given value or the current time (if None).

        If case of a successful module execution the database state and module
        provenance information are also adjusted together with the module
        output streams.

        Parameters
        ----------
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        # Update state, timestamp, database state, outputs and provenance
        # information.
        super().set_success(finished_at, outputs, provenance, updated_arguments)
        # Materialize module state
        self.write_safe()
コード例 #11
0
    def set_error(
        self,
        task_id: str,
        finished_at: datetime = get_current_time(),
        outputs: ModuleOutputs = ModuleOutputs()
    ) -> Optional[bool]:
        """Set status of the module that is associated with the given task
        identifier to error. The finished_at property of the timestamp is set
        to the given value or the current time (if None). The module outputs
        are adjusted to the given value. The output streams are empty if no
        value is given for the outputs parameter.

        Cancels all pending modules in the workflow.

        Returns True if the state of the workflow was changed and False
        otherwise. The result is None if the project or task did not exist.

        Parameters
        ----------
        task_id : string
            Unique task identifier
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module

        Returns
        -------
        bool
        """
        raise NotImplementedError
コード例 #12
0
ファイル: base.py プロジェクト: mikebrachmann/web-api-async
    def set_success(self,
                    finished_at: datetime = get_current_time(),
                    outputs: ModuleOutputs = ModuleOutputs(),
                    provenance: ModuleProvenance = ModuleProvenance(),
                    updated_arguments: Optional[ModuleArguments] = None):
        """Set status of the module to success. The finished_at property of the
        timestamp is set to the given value or the current time (if None).

        If case of a successful module execution the database state and module
        provenance information are also adjusted together with the module
        output streams.

        Parameters
        ----------
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        # Update state, timestamp, database state, outputs and provenance
        # information.
        self.state = MODULE_SUCCESS
        self.timestamp.finished_at = finished_at
        # If the module is set to success straight from pending state the
        # started_at timestamp may not have been set.
        if self.timestamp.started_at is None:
            self.timestamp.started_at = self.timestamp.finished_at
        if updated_arguments is not None:
            self.command.arguments = updated_arguments
        self.outputs = outputs
        self.provenance = provenance
コード例 #13
0
def execute(task_id, project_id, command_doc, context, resources):
    """Execute the givven command.

    Parameters:
    -----------
    task_id: string
        Unique task identifier
    project_id: string
        Unique project identifier
    command_doc : dict
        Dictionary serialization of the module command
    context: dict
        Dictionary of available resources in the database state. The key is
        the resource name. Values are resource identifiers.
    resources: dict
        Optional information about resources that were generated during a
        previous execution of the command
    """
    # Create a remote workflow controller for the given task
    controller = worker_env.get_controller(project_id)
    # Notify the workflow controller that the task started to run
    controller.set_running(task_id=task_id, started_at=get_current_time())
    # Get the processor and execute the command. In case of an unknown package
    # the result is set to error.
    command = ModuleCommand.from_dict(command_doc)
    if command.package_id in worker_env.processors:
        processor = worker_env.processors[command.package_id]
        _, exec_result = exec_command(
            task_id=task_id,
            command=command,
            context=TaskContext(
                project_id=project_id,
                datastore=worker_env.datastores.get_datastore(project_id),
                filestore=worker_env.filestores.get_filestore(project_id),
                datasets=context[labels.CONTEXT_DATASETS],
                resources=resources,
                dataobjects=context[labels.CONTEXT_DATAOBJECTS]
            ),
            processor=processor
        )
    else:
        message = 'unknown package \'' + str(command.package_id) + '\''
        exec_result = ExecResult(
            is_success=False,
            outputs=ModuleOutputs(stderr=[TextOutput(message)])
        )
    # Notify the workflow controller that the task has finished
    if exec_result.is_success:
        controller.set_success(
            task_id=task_id,
            outputs=exec_result.outputs,
            provenance=exec_result.provenance
        )
    else:
        controller.set_error(
            task_id=task_id,
            outputs=exec_result.outputs
        )
コード例 #14
0
 def test_load_with_dataset_delete(self):
     """Test loading workflows where each module creates a new dataset and
     deletes the previous dataset (except for the first module).
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         deleted_datasets = list()
         if i > 0:
             deleted_datasets.append('DS' + str(i - 1))
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(write={
                 'DS' + str(i):
                 DatasetDescriptor(
                     identifier=str(i),
                     name='DS' + str(i),
                     columns=[
                         DatasetColumn(identifier=j, name=str(j))
                         for j in range(i)
                     ],
                 )
             },
                                         delete=deleted_datasets),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     vt = OSViztrailHandle.load_viztrail(base_path)
     workflow = vt.get_default_branch().get_head()
     self.assertEqual(len(workflow.modules), 5)
     datasets = {}
     for i in range(5):
         module = workflow.modules[i]
         datasets = module.provenance.get_database_state(datasets)
         self.assertEqual(len(datasets), 1)
         key = 'DS' + str(i)
         self.assertTrue(key in datasets)
         self.assertEqual(len(datasets[key].columns), i)
コード例 #15
0
    def compute_simple_chart(self, args, context):
        """Execute simple chart command.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get dataset name and the associated dataset. This will raise an
        # exception if the dataset name is unknown.
        ds_name = args.get_value(pckg.PARA_DATASET)
        ds = context.get_dataset(ds_name)
        # Get user-provided name for the new chart and verify that it is a
        # valid name
        chart_name = args.get_value(pckg.PARA_NAME,
                                    default_value=ds_name + ' Plot')
        if chart_name == '' or chart_name == None:
            chart_name = ds_name + ' Plot'
        if not is_valid_name(chart_name):
            raise ValueError('invalid chart name \'' + str(chart_name) + '\'')
        chart_args = args.get_value(cmd.PARA_CHART)
        chart_type = chart_args.get_value(cmd.PARA_CHART_TYPE)
        grouped_chart = chart_args.get_value(cmd.PARA_CHART_GROUPED)
        # Create a new chart view handle and add the series definitions
        view = ChartViewHandle(dataset_name=ds_name,
                               chart_name=chart_name,
                               chart_type=chart_type,
                               grouped_chart=grouped_chart)
        # The data series index for x-axis values is optional
        if args.has(cmd.PARA_XAXIS):
            x_axis = args.get_value(cmd.PARA_XAXIS)
            # X-Axis column may be empty. In that case, we ignore the
            # x-axis spec
            add_data_series(args=x_axis,
                            view=view,
                            dataset=ds,
                            col_arg_id=cmd.PARA_XAXIS_COLUMN,
                            range_arg_id=cmd.PARA_XAXIS_RANGE)
            view.x_axis = 0
        # Definition of data series. Each series is a pair of column
        # identifier and a printable label.
        for data_series in args.get_value(cmd.PARA_SERIES):
            add_data_series(args=data_series, view=view, dataset=ds)
        # Execute the query and get the result
        rows = ChartQuery.exec_query(ds, view)
        # Add chart view handle as module output
        return ExecResult(
            outputs=ModuleOutputs(stdout=[ChartOutput(view=view, rows=rows)]),
            provenance=ModuleProvenance(read={ds_name: ds.identifier},
                                        write=dict(),
                                        charts=[view]))
コード例 #16
0
 def test_timestamps(self):
     """Test reading and writing modules with different timestamp values."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     # Test timestamps
     created_at = m.timestamp.created_at
     started_at = to_datetime('2018-11-26T13:00:00.000000')
     m.timestamp.started_at = started_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     finished_at = to_datetime('2018-11-26T13:00:00.000010')
     m.timestamp.created_at = finished_at
     m.timestamp.finished_at = finished_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, finished_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertEqual(m.timestamp.finished_at, finished_at)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=created_at,
                                   started_at=started_at),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertIsNone(m.timestamp.finished_at)
コード例 #17
0
 def test_load_active(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # This is a hack to simulate loading workflows with active modules
     # Change state of last two modules in branch head to an active state
     m = branch.get_head().modules[-2]
     m.state = MODULE_RUNNING
     m.write_module()
     m = branch.get_head().modules[-1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.get_head().modules[0].is_success)
     self.assertTrue(branch.get_head().modules[1].is_success)
     self.assertTrue(branch.get_head().modules[2].is_success)
     self.assertTrue(branch.get_head().modules[3].is_canceled)
     self.assertTrue(branch.get_head().modules[4].is_canceled)
     # Change state of last module in second workflow to an active state
     m = branch.get_head().modules[1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     wf = branch.get_workflow(branch.get_history()[1].identifier)
     self.assertTrue(wf.modules[0].is_success)
     self.assertTrue(wf.modules[1].is_canceled)
コード例 #18
0
 def test_datasets(self):
     """Test reading and writing modules with dataset information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path,
                                    prev_state=dict())
     self.assertEqual(len(m.datasets), 0)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path,
                                    prev_state=dict())
     self.assertEqual(len(m.datasets), 2)
     self.assertEqual(m.datasets['DS1'].identifier, 'ID1')
     self.assertEqual(len(m.datasets['DS1'].columns), 0)
     self.assertEqual(m.datasets['DS1'].row_count, 0)
     ds2 = m.datasets['DS2']
     self.assertEqual(ds2.identifier, 'ID2')
     self.assertEqual(len(ds2.columns), 2)
     col0 = ds2.columns[0]
     self.assertEqual(col0.identifier, 0)
     self.assertEqual(col0.name, 'ABC')
     self.assertEqual(col0.data_type, 'int')
     col1 = ds2.columns[1]
     self.assertEqual(col1.identifier, 1)
     self.assertEqual(col1.name, 'xyz')
     self.assertEqual(col1.data_type, 'real')
     self.assertEqual(ds2.row_count, 100)
コード例 #19
0
ファイル: module.py プロジェクト: mikebrachmann/web-api-async
 def set_canceled(self, 
         finished_at: datetime = get_current_time(), 
         outputs: ModuleOutputs = ModuleOutputs()
     ) -> None:
     """Set status of the module to canceled. The finished_at property of the
     timestamp is set to the given value or the current time (if None). The
     module outputs are set to the given value. If no outputs are given the
     module output streams will be empty.
     """
     super().set_canceled(finished_at, outputs)
     # Materialize module state
     self.write_safe()
コード例 #20
0
ファイル: base.py プロジェクト: mikebrachmann/web-api-async
    def set_error(
        self,
        task_id: str,
        finished_at: datetime = get_current_time(),
        outputs: ModuleOutputs = ModuleOutputs()
    ) -> Optional[bool]:
        """Set status of the module that is associated with the given task
        identifier to error. The finished_at property of the timestamp is set
        to the given value or the current time (if None). The module outputs
        are adjusted to the given value. The output streams are empty if no
        value is given for the outputs parameter.

        Cancels all pending modules in the workflow.

        Returns True if the state of the workflow was changed and False
        otherwise. The result is None if the project or task did not exist.

        Parameters
        ----------
        task_id : string
            Unique task identifier
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module

        Returns
        -------
        bool
        """
        print("ERROR: {}".format(task_id))
        with self.backend.lock:
            # Get task handle and remove it from the internal index. The result
            # is None if the task does not exist.
            task = pop_task(tasks=self.tasks, task_id=task_id)
            if task is None:
                return None
            # Get the handle for the head workflow of the specified branch and
            # the index for the module matching the identifier in the task.
            workflow, module_index = self.get_task_module(task)
            if workflow is None or module_index == -1:
                return None
            # Notify the backend that the task is finished
            self.backend.task_finished(task_id)
            module = workflow.modules[module_index]
            if module.is_active:
                module.set_error(finished_at=finished_at, outputs=outputs)
                for m in workflow.modules[module_index + 1:]:
                    m.set_canceled()
                return True
            else:
                return False
コード例 #21
0
 def test_safe_write(self):
     """Update module state with write error."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     module.set_success(outputs=ModuleOutputs(stderr=[None]))
     self.assertTrue(module.is_error)
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
コード例 #22
0
ファイル: base.py プロジェクト: mikebrachmann/web-api-async
    def __init__(self,
                 command: ModuleCommand,
                 external_form: Optional[str],
                 identifier: Optional[str] = None,
                 state: int = MODULE_PENDING,
                 timestamp: ModuleTimestamp = ModuleTimestamp(),
                 outputs: ModuleOutputs = ModuleOutputs(),
                 provenance: ModuleProvenance = ModuleProvenance()):
        """Initialize the module handle. For new modules, datasets and outputs
        are initially empty.

        Parameters
        ----------
        command : vizier.viztrail.command.ModuleCommand
            Specification of the module (i.e., package, name, and arguments)
        external_form: string
            Printable representation of module command
        identifier : string, optional
            Unique module identifier
        state: int
            Module state (one of PENDING, RUNNING, CANCELED, ERROR, SUCCESS)
        timestamp: vizier.viztrail.module.timestamp.ModuleTimestamp, optional
            Module timestamp
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Module output streams STDOUT and STDERR
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        super(ModuleHandle, self).__init__(
            state=state if not state is None else MODULE_PENDING)
        self.identifier = identifier
        self.command = command
        self.external_form = external_form
        self.outputs = outputs if not outputs is None else ModuleOutputs()
        self.provenance = provenance if not provenance is None else ModuleProvenance(
        )
        self.timestamp = timestamp if not timestamp is None else ModuleTimestamp(
        )
コード例 #23
0
 def write_safe(self):
     """The write safe method writes the current module state to the object
     store. It catches any occuring exception and sets the module into error
     state if an exception occurs. This method is used to ensure that the
     state of the module is in error (i.e., the workflow cannot further be
     executed) if a state change fails.
     """
     try:
         self.write_module()
     except Exception as ex:
         self.state = mstate.MODULE_ERROR
         self.outputs = ModuleOutputs(stderr=[TextOutput(str(ex))])
         self.datasets = dict()
コード例 #24
0
 def test_completed_append(self):
     """Test appending a completed workflow to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     wf = branch.append_workflow(modules=head_modules[:-1],
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
コード例 #25
0
 def test_running(self):
     """Update module state from pending to running."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         datasets={'DS1': DS1},
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2')},
             resources={'fileid': '0123456789'}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Set running with all optional parameters
     module.set_running(started_at=module.timestamp.created_at,
                        external_form='Some form')
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
コード例 #26
0
 def test_state(self):
     """Ensure that only one of the state flag is True at the same time."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     # Pending
     self.assertTrue(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Running
     module.set_running(external_form='TEST MODULE')
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertTrue(module.is_running)
     self.assertFalse(module.is_success)
     # Canceled
     module.set_canceled()
     self.assertFalse(module.is_pending)
     self.assertTrue(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Error
     module.set_error()
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertTrue(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Success
     module.set_success()
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertTrue(module.is_success)
コード例 #27
0
ファイル: module.py プロジェクト: mikebrachmann/web-api-async
    def __init__(self, 
            identifier: str, 
            command: ModuleCommand, 
            external_form: str, 
            module_path: str,
            state: int = mstate.MODULE_PENDING, 
            timestamp: ModuleTimestamp = ModuleTimestamp(), 
            outputs: ModuleOutputs = ModuleOutputs(),
            provenance: ModuleProvenance = ModuleProvenance(), 
            object_store: ObjectStore = DefaultObjectStore()
        ):
        """Initialize the module handle. For new modules, datasets and outputs
        are initially empty.

        Parameters
        ----------
        identifier : string
            Unique module identifier
        command : vizier.viztrail.command.ModuleCommand
            Specification of the module (i.e., package, name, and arguments)
        external_form: string
            Printable representation of module command
        module_path: string
            Path to module resource in object store
        state: int
            Module state (one of PENDING, RUNNING, CANCELED, ERROR, SUCCESS)
        timestamp: vizier.viztrail.module.timestamp.ModuleTimestamp, optional
            Module timestamp
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Module output streams STDOUT and STDERR
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources
        """
        super(OSModuleHandle, self).__init__(
            identifier=identifier,
            command=command,
            external_form=external_form,
            state=state,
            timestamp=timestamp,
            outputs= outputs,
            provenance=provenance,
        )
        self.module_path = module_path
        self.object_store = object_store
コード例 #28
0
    def create_exec_result(self,
                           dataset_name,
                           input_dataset=None,
                           output_dataset=None,
                           database_state=None,
                           stdout=None,
                           resources=None):
        """Create execution result object for a successfully completed task.
        Assumes that a single datasets has been modified.

        Note that this method is not suitable to generate the result object for
        the drop dataset and rename dataset commands.

        Parameters
        ----------
        dataset_name: string
            Name of the manipulated dataset
        input_dataset: vizier.datastore.dataset.DatasetDescriptor
            Descriptor for the input dataset
        output_dataset: vizier.datastore.dataset.DatasetDescriptor, optional
            Descriptor for the resulting dataset
        database_state: dict, optional
            Identifier for datasets in the database state agains which a task
            was executed (keyed by user-provided name)
        stdout= list(string), optional
            Lines in the command output
        resources: dict, optional
            Optional resources that were generated by the command

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        if not output_dataset is None:
            ds = DatasetDescriptor(identifier=output_dataset.identifier,
                                   columns=output_dataset.columns,
                                   row_count=output_dataset.row_count)
        else:
            ds = None
        return ExecResult(
            outputs=ModuleOutputs(stdout=[TextOutput(line)
                                          for line in stdout]),
            provenance=ModuleProvenance(
                read={dataset_name: input_dataset.identifier}
                if not input_dataset is None else None,
                write={dataset_name: ds},
                resources=resources))
コード例 #29
0
    def __init__(self, is_success=True, outputs=None, provenance=None):
        """Initialize the result components.

        Parameters
        ----------
        is_success: bool
            Flag indicating if execution was successful
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Outputs to STDOUT and STDERR generated during task execution
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen
            during task execution.
        """
        self.is_success = is_success
        self.outputs = outputs if not outputs is None else ModuleOutputs()
        self.provenance = provenance if not provenance is None else ModuleProvenance(
        )
コード例 #30
0
 def test_multi_append(self):
     """Test appending modules to viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 10)
     for i in range(10):
         wf = branch.get_workflow(history[i].identifier)
         self.assertEqual(len(wf.modules), (i + 1))
         for m in range(i + 1):
             module = wf.modules[m]
             self.assertEqual(module.external_form,
                              'print ' + str(m) + '+' + str(m))
             self.assertEqual(module.outputs.stdout[-1].value, str(m + m))