def test_python_cell(self):
     """Test validation of the python cell command."""
     python_cell(source='ABC', validate=True)
     # Have an error raised if values of invalid data type are given
     with self.assertRaises(ValueError):
         python_cell(source=[], validate=True)
     # Get dictionary serialization of command arguments. Ensure that we
     # can create a valid command instance from the returned result.
     obj = python_cell(source='ABC', validate=True).arguments.to_list()
     ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                   command_id=pycell.PYTHON_CODE,
                   arguments=obj,
                   packages=PACKAGES)
     # Delete the only mandatory element from the serialization to ensure
     # that validation fails
     del obj[0]
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                       command_id=pycell.PYTHON_CODE,
                       arguments=obj,
                       packages=PACKAGES)
     # Add an unknown argument to ensure that the validation fails
     obj = python_cell(source='ABC', validate=True).arguments.to_list()
     obj.append(ARG(id='someUnknownLabel', value=''))
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                       command_id=pycell.PYTHON_CODE,
                       arguments=obj,
                       packages=PACKAGES)
Beispiel #2
0
def execute_task():
    """Execute a task against a given project context.

    Request
    -------
    {
      "id": "string",
      "command": {
        "packageId": "string",
        "commandId": "string",
        "arguments": [
          null
        ]
      },
      "context": [
        {
          "id": "string",
          "name": "string"
        }
      ],
      "resources": {}
    }
    """
    # Abort with BAD REQUEST if request body is not in Json format or does not
    # contain the expected elements.
    obj = srv.validate_json_request(
        request,
        required=[labels.ID, labels.COMMAND, labels.CONTEXT],
        optional=[labels.RESOURCES])
    # Validate module command
    cmd = obj[labels.COMMAND]
    for key in [
            labels.COMMAND_PACKAGE, labels.COMMAND_ID, labels.COMMAND_ARGS
    ]:
        if not key in cmd:
            raise srv.InvalidRequest('missing element \'' + key +
                                     '\' in command specification')
    # Get database state
    context = dict()
    for ds in obj[labels.CONTEXT]:
        for key in [labels.ID, labels.NAME]:
            if not key in ds:
                raise srv.InvalidRequest('missing element \'' + key +
                                         '\' in dataset identifier')
        context[ds[labels.NAME]] = ds[labels.ID]
    try:
        # Execute module. Result should not be None.
        result = api.tasks.execute_task(
            project_id=config.project_id,
            task_id=obj[labels.ID],
            command=ModuleCommand(package_id=cmd[labels.COMMAND_PACKAGE],
                                  command_id=cmd[labels.COMMAND_ID],
                                  arguments=cmd[labels.COMMAND_ARGS],
                                  packages=api.engine.packages),
            context=context,
            resources=obj[labels.RESOURCES]
            if labels.RESOURCES in obj else None)
        return jsonify(result)
    except ValueError as ex:
        raise srv.InvalidRequest(str(ex))
Beispiel #3
0
def execute(task_id, project_id, command_doc, context, resources):
    """Execute the givven command.

    Parameters:
    -----------
    task_id: string
        Unique task identifier
    project_id: string
        Unique project identifier
    command_doc : dict
        Dictionary serialization of the module command
    context: dict
        Dictionary of available resources in the database state. The key is
        the resource name. Values are resource identifiers.
    resources: dict
        Optional information about resources that were generated during a
        previous execution of the command
    """
    # Create a remote workflow controller for the given task
    controller = worker_env.get_controller(project_id)
    # Notify the workflow controller that the task started to run
    controller.set_running(task_id=task_id, started_at=get_current_time())
    # Get the processor and execute the command. In case of an unknown package
    # the result is set to error.
    command = ModuleCommand.from_dict(command_doc)
    if command.package_id in worker_env.processors:
        processor = worker_env.processors[command.package_id]
        _, exec_result = exec_command(
            task_id=task_id,
            command=command,
            context=TaskContext(
                project_id=project_id,
                datastore=worker_env.datastores.get_datastore(project_id),
                filestore=worker_env.filestores.get_filestore(project_id),
                datasets=context[labels.CONTEXT_DATASETS],
                resources=resources,
                dataobjects=context[labels.CONTEXT_DATAOBJECTS]
            ),
            processor=processor
        )
    else:
        message = 'unknown package \'' + str(command.package_id) + '\''
        exec_result = ExecResult(
            is_success=False,
            outputs=ModuleOutputs(stderr=[TextOutput(message)])
        )
    # Notify the workflow controller that the task has finished
    if exec_result.is_success:
        controller.set_success(
            task_id=task_id,
            outputs=exec_result.outputs,
            provenance=exec_result.provenance
        )
    else:
        controller.set_error(
            task_id=task_id,
            outputs=exec_result.outputs
        )
Beispiel #4
0
    def insert_workflow_module(self, project_id, branch_id, before_module_id,
                               package_id, command_id, arguments):
        """Append a new module to the head of the identified project branch.
        The module command is identified by the package and command identifier.
        Arguments is a list of command arguments.

        Raises ValueError if the command is unknown or the command arguments
        cannot be validated.

        Parameters
        ----------
        project_id : string
            Unique project identifier
        branch_id: string
            Unique workflow branch identifier
        package_id: string
            Unique package identifier
        command_id: string
            Unique command identifier
        arguments: list
            List of dictionaries representing the user-provided command
            arguments

        Returns
        -------
        dict()
        """
        # Retrieve the project and branch from the repository to ensure that
        # they exist. Run this part first to ensure that all requested resources
        # exist before validating the command.
        project = self.engine.projects.get_project(project_id)
        if project is None:
            return None
        branch = project.viztrail.get_branch(branch_id)
        if branch is None:
            return None
        # Create module command (will ensure that it is a valid command) and
        # insert it into the workflow at the branch head. The result is the list
        # of affected modules.
        modules = self.engine.insert_workflow_module(
            project_id=project_id,
            branch_id=branch_id,
            before_module_id=before_module_id,
            command=ModuleCommand(package_id=package_id,
                                  command_id=command_id,
                                  arguments=arguments,
                                  packages=self.engine.packages),
        )
        if not modules is None:
            return serialwf.WORKFLOW_HANDLE(project=project,
                                            branch=branch,
                                            workflow=branch.head,
                                            urls=self.urls)
        return None
Beispiel #5
0
 def test_error(self) -> None:
     """Test executing a command with processor that raises an exception
     instead of returning an execution result.
     """
     context: Dict[str, ArtifactDescriptor] = dict()
     cmd = ModuleCommand(package_id='error',
                         command_id='error',
                         arguments=[],
                         packages=None)
     controller = FakeWorkflowController()
     self.backend.execute_async(task=TaskHandle(task_id='000',
                                                project_id=self.PROJECT_ID,
                                                controller=controller),
                                command=cmd,
                                artifacts=context)
     time.sleep(2)
     self.assertEqual(controller.task_id, '000')
     self.assertEqual(controller.state, 'ERROR')
     self.assertEqual(len(controller.outputs.stdout), 0)
     self.assertNotEqual(len(controller.outputs.stderr), 0)
Beispiel #6
0
    def load_module(identifier,
                    module_path,
                    prev_state=None,
                    object_store=None):
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = object_store.read_object(object_path=module_path)
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(package_id=UNKNOWN_ID,
                                      command_id=UNKNOWN_ID),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store)
        # Create module command
        command = ModuleCommand(package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
                                command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
                                arguments=obj[KEY_COMMAND][KEY_ARGUMENTS])
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(created_at=created_at,
                                    started_at=started_at,
                                    finished_at=finished_at)
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR]))
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                descriptor = DatasetDescriptor(
                    identifier=ds[KEY_DATASET_ID],
                    columns=[
                        DatasetColumn(identifier=col[KEY_COLUMN_ID],
                                      name=col[KEY_COLUMN_NAME],
                                      data_type=col[KEY_COLUMN_TYPE])
                        for col in ds[KEY_DATASET_COLUMNS]
                    ],
                    row_count=ds[KEY_DATASET_ROWCOUNT])
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        delete_prov = None
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE]
        res_prov = None
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES]
        charts_prov = None
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ChartViewHandle.from_dict(c)
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        provenance = ModuleProvenance(read=read_prov,
                                      write=write_prov,
                                      delete=delete_prov,
                                      resources=res_prov,
                                      charts=charts_prov)
        # Create dictionary of dataset descriptors only if previous state is
        # given and the module is in SUCCESS state. Otherwise, the database
        # state is empty.
        if obj[KEY_STATE] == mstate.MODULE_SUCCESS and not prev_state is None:
            datasets = provenance.get_database_state(prev_state)
        else:
            datasets = dict()
        # Return module handle
        return OSModuleHandle(identifier=identifier,
                              command=command,
                              external_form=obj[KEY_EXTERNAL_FORM],
                              module_path=module_path,
                              state=obj[KEY_STATE],
                              timestamp=timestamp,
                              datasets=datasets,
                              outputs=outputs,
                              provenance=provenance,
                              object_store=object_store)
 def test_create_plot(self):
     """Test validation of the create plot command."""
     create_plot(dataset_name='ds',
                 chart_name='My Chart',
                 series=[{
                     'column': 1
                 }],
                 validate=True)
     create_plot(dataset_name='ds',
                 chart_name='My Chart',
                 series=[{
                     'column': 1
                 }],
                 xaxis_range='0:10',
                 validate=True)
     # Have an error raised if values of invalid data type are given
     with self.assertRaises(ValueError):
         create_plot(dataset_name='ds',
                     chart_name='My Chart',
                     series=[{
                         'column': 'abc'
                     }],
                     xaxis_range='0:10',
                     validate=True)
     with self.assertRaises(ValueError):
         create_plot(dataset_name='ds',
                     chart_name='My Chart',
                     series=[{
                         'column': 1,
                         'label': [],
                         'range': '0-10'
                     }],
                     xaxis_range='0:10',
                     validate=True)
     # Get dictionary serialization of command arguments. Ensure that we
     # can create a valid command instance from the returned result.
     obj = create_plot(dataset_name='ds',
                       chart_name='My Chart',
                       series=[{
                           'column': 1
                       }],
                       xaxis_range='0:10',
                       validate=True).arguments.to_list()
     ModuleCommand(package_id=plot.PACKAGE_PLOT,
                   command_id=plot.PLOT_SIMPLE_CHART,
                   arguments=obj,
                   packages=PACKAGES)
     # Delete a mandatory element from the serialization to ensure that
     # validation fails
     index = -1
     for i in range(len(obj)):
         if obj[i][ARG_ID] == pckg.PARA_NAME:
             index = i
             break
     del obj[i]
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=plot.PACKAGE_PLOT,
                       command_id=plot.PLOT_SIMPLE_CHART,
                       arguments=obj,
                       packages=PACKAGES)
     # Add an unknown argument to ensure that the validation fails
     obj = create_plot(dataset_name='ds',
                       chart_name='My Chart',
                       series=[{
                           'column': 1
                       }],
                       xaxis_range='0:10',
                       validate=True).arguments.to_list()
     obj.append(ARG(id='someUnknownLabel', value=''))
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=plot.PACKAGE_PLOT,
                       command_id=plot.PLOT_SIMPLE_CHART,
                       arguments=obj,
                       packages=PACKAGES)
Beispiel #8
0
    def load_module(
            identifier: str, 
            module_path: str, 
            prev_state: Optional[Dict[str, ArtifactDescriptor]] = None, 
            object_store: ObjectStore = DefaultObjectStore()
        ) -> "OSModuleHandle":
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = cast(Dict[str, Any], object_store.read_object(object_path=module_path))
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(
                    package_id=UNKNOWN_ID,
                    command_id=UNKNOWN_ID,
                    arguments=list(),
                    packages=None
                ),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store
            )
        # Create module command
        command = ModuleCommand(
            package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
            command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
            arguments=obj[KEY_COMMAND][KEY_ARGUMENTS],
            packages=None
        )
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(
            created_at=created_at,
            started_at=started_at,
            finished_at=finished_at
        )
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR])
        )
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                if KEY_DATAOBJECT_TYPE in ds:
                    descriptor = ArtifactDescriptor(
                        identifier=ds[KEY_DATAOBJECT_ID],
                        name=ds[KEY_DATAOBJECT_NAME],
                        artifact_type=ds[KEY_DATAOBJECT_TYPE])
                else: 
                    descriptor = DatasetDescriptor(
                        identifier=ds[KEY_DATASET_ID],
                        name=ds[KEY_DATASET_NAME],
                        columns=[
                            DatasetColumn(
                                identifier=col[KEY_COLUMN_ID],
                                name=col[KEY_COLUMN_NAME],
                                data_type=col[KEY_COLUMN_TYPE]
                            ) for col in ds[KEY_DATASET_COLUMNS]
                        ]
                    )
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = set(obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE])
        else:
            delete_prov = set()
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = cast(Dict[str, Any], obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES])
        else:
            res_prov = dict()
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ( 
                    c[0], 
                    ChartViewHandle.from_dict(c[1])  # type: ignore[no-untyped-call]
                ) if isinstance(c, list) else 
                (
                    "Chart",
                    ChartViewHandle.from_dict(c)
                )
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        else:
            charts_prov = list()
        provenance = ModuleProvenance(
            read=read_prov,
            write=write_prov,
            delete=delete_prov,
            resources=res_prov,
            charts=charts_prov
        )
        # Return module handle
        return OSModuleHandle(
            identifier=identifier,
            command=command,
            external_form=obj[KEY_EXTERNAL_FORM],
            module_path=module_path,
            state=obj[KEY_STATE],
            timestamp=timestamp,
            outputs=outputs,
            provenance=provenance,
            object_store=object_store,
        )
Beispiel #9
0
    def append_workflow_module(
            self, project_id: str, branch_id: str,
            command: ModuleCommand) -> Optional[ModuleHandle]:
        """Append module to the workflow at the head of the given viztrail
        branch. The modified workflow will be executed. The result is the new
        head of the branch.

        Returns the handle for the new module in the modified workflow. The
        result is None if the specified project or branch do not exist.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id : string
            Unique branch identifier
        command : vizier.viztrail.command.ModuleCommand
            Specification of the command that is to be executed by the appended
            workflow module

        Returns
        -------
        vizier.viztrail.module.base.ModuleHandle
        """
        with self.backend.lock:
            # Get the handle for the specified branch
            branch = self.projects.get_branch(project_id=project_id,
                                              branch_id=branch_id)
            if branch is None:
                return None
            # Get the current database state from the last module in the current
            # branch head. At the same time we retrieve the list of modules for
            # the current head of the branch.
            head = branch.get_head()
            if head is not None and len(head.modules) > 0:
                modules = head.modules
                is_active = head.is_active
                is_error = head.modules[-1].is_error or head.modules[
                    -1].is_canceled
            else:
                modules = list()
                is_active = False
                is_error = False
            context = compute_context(modules)
            # Get the external representation for the command
            external_form = command.to_external_form(
                command=self.packages[command.package_id].get(
                    command.command_id),
                datasets=dict((name, cast(DatasetDescriptor, context[name]))
                              for name in context if context[name].is_dataset))
            # If the workflow is not active and the command can be executed
            # synchronously we run the command immediately and return the
            # completed workflow. Otherwise, a pending workflow is created.
            if not is_active and self.backend.can_execute(command):
                ts_start = get_current_time()
                result = self.backend.execute(task=TaskHandle(
                    task_id=get_unique_identifier(),
                    project_id=project_id,
                    controller=self),
                                              command=command,
                                              artifacts=context)
                ts = ModuleTimestamp(created_at=ts_start,
                                     started_at=ts_start,
                                     finished_at=get_current_time())
                # Depending on the execution outcome create a handle for the
                # executed module
                if result.is_success:
                    module = ModuleHandle(state=mstate.MODULE_SUCCESS,
                                          command=command,
                                          external_form=external_form,
                                          timestamp=ts,
                                          outputs=result.outputs,
                                          provenance=result.provenance)
                else:
                    module = ModuleHandle(state=mstate.MODULE_ERROR,
                                          command=command,
                                          external_form=external_form,
                                          timestamp=ts,
                                          outputs=result.outputs)
                workflow = branch.append_workflow(modules=modules,
                                                  action=wf.ACTION_APPEND,
                                                  command=command,
                                                  pending_modules=[module])
            else:
                # Create new workflow by appending one module to the current
                # head of the branch. The module state is pending if the
                # workflow is active otherwise it depends on the associated
                # backend.
                if is_active:
                    state = mstate.MODULE_PENDING
                elif is_error:
                    state = mstate.MODULE_CANCELED
                else:
                    state = self.backend.next_task_state()
                workflow = branch.append_workflow(
                    modules=modules,
                    action=wf.ACTION_APPEND,
                    command=command,
                    pending_modules=[
                        ModuleHandle(
                            state=state,
                            command=command,
                            external_form=external_form,
                            provenance=ModuleProvenance(unexecuted=True))
                    ])
                if not is_active and not state == mstate.MODULE_CANCELED:
                    self.execute_module(project_id=project_id,
                                        branch_id=branch_id,
                                        module=workflow.modules[-1],
                                        artifacts=context)
        return workflow.modules[-1]