def test_python_cell(self):
     """Test validation of the python cell command."""
     python_cell(source='ABC', validate=True)
     # Have an error raised if values of invalid data type are given
     with self.assertRaises(ValueError):
         python_cell(source=[], validate=True)
     # Get dictionary serialization of command arguments. Ensure that we
     # can create a valid command instance from the returned result.
     obj = python_cell(source='ABC', validate=True).arguments.to_list()
     ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                   command_id=pycell.PYTHON_CODE,
                   arguments=obj,
                   packages=PACKAGES)
     # Delete the only mandatory element from the serialization to ensure
     # that validation fails
     del obj[0]
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                       command_id=pycell.PYTHON_CODE,
                       arguments=obj,
                       packages=PACKAGES)
     # Add an unknown argument to ensure that the validation fails
     obj = python_cell(source='ABC', validate=True).arguments.to_list()
     obj.append(ARG(id='someUnknownLabel', value=''))
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                       command_id=pycell.PYTHON_CODE,
                       arguments=obj,
                       packages=PACKAGES)
Beispiel #2
0
def execute_task():
    """Execute a task against a given project context.

    Request
    -------
    {
      "id": "string",
      "command": {
        "packageId": "string",
        "commandId": "string",
        "arguments": [
          null
        ]
      },
      "context": [
        {
          "id": "string",
          "name": "string"
        }
      ],
      "resources": {}
    }
    """
    # Abort with BAD REQUEST if request body is not in Json format or does not
    # contain the expected elements.
    obj = srv.validate_json_request(
        request,
        required=[labels.ID, labels.COMMAND, labels.CONTEXT],
        optional=[labels.RESOURCES])
    # Validate module command
    cmd = obj[labels.COMMAND]
    for key in [
            labels.COMMAND_PACKAGE, labels.COMMAND_ID, labels.COMMAND_ARGS
    ]:
        if not key in cmd:
            raise srv.InvalidRequest('missing element \'' + key +
                                     '\' in command specification')
    # Get database state
    context = dict()
    for ds in obj[labels.CONTEXT]:
        for key in [labels.ID, labels.NAME]:
            if not key in ds:
                raise srv.InvalidRequest('missing element \'' + key +
                                         '\' in dataset identifier')
        context[ds[labels.NAME]] = ds[labels.ID]
    try:
        # Execute module. Result should not be None.
        result = api.tasks.execute_task(
            project_id=config.project_id,
            task_id=obj[labels.ID],
            command=ModuleCommand(package_id=cmd[labels.COMMAND_PACKAGE],
                                  command_id=cmd[labels.COMMAND_ID],
                                  arguments=cmd[labels.COMMAND_ARGS],
                                  packages=api.engine.packages),
            context=context,
            resources=obj[labels.RESOURCES]
            if labels.RESOURCES in obj else None)
        return jsonify(result)
    except ValueError as ex:
        raise srv.InvalidRequest(str(ex))
Beispiel #3
0
    def insert_workflow_module(self, project_id, branch_id, before_module_id,
                               package_id, command_id, arguments):
        """Append a new module to the head of the identified project branch.
        The module command is identified by the package and command identifier.
        Arguments is a list of command arguments.

        Raises ValueError if the command is unknown or the command arguments
        cannot be validated.

        Parameters
        ----------
        project_id : string
            Unique project identifier
        branch_id: string
            Unique workflow branch identifier
        package_id: string
            Unique package identifier
        command_id: string
            Unique command identifier
        arguments: list
            List of dictionaries representing the user-provided command
            arguments

        Returns
        -------
        dict()
        """
        # Retrieve the project and branch from the repository to ensure that
        # they exist. Run this part first to ensure that all requested resources
        # exist before validating the command.
        project = self.engine.projects.get_project(project_id)
        if project is None:
            return None
        branch = project.viztrail.get_branch(branch_id)
        if branch is None:
            return None
        # Create module command (will ensure that it is a valid command) and
        # insert it into the workflow at the branch head. The result is the list
        # of affected modules.
        modules = self.engine.insert_workflow_module(
            project_id=project_id,
            branch_id=branch_id,
            before_module_id=before_module_id,
            command=ModuleCommand(package_id=package_id,
                                  command_id=command_id,
                                  arguments=arguments,
                                  packages=self.engine.packages),
        )
        if not modules is None:
            return serialwf.WORKFLOW_HANDLE(project=project,
                                            branch=branch,
                                            workflow=branch.head,
                                            urls=self.urls)
        return None
Beispiel #4
0
 def test_error(self) -> None:
     """Test executing a command with processor that raises an exception
     instead of returning an execution result.
     """
     context: Dict[str, ArtifactDescriptor] = dict()
     cmd = ModuleCommand(package_id='error',
                         command_id='error',
                         arguments=[],
                         packages=None)
     controller = FakeWorkflowController()
     self.backend.execute_async(task=TaskHandle(task_id='000',
                                                project_id=self.PROJECT_ID,
                                                controller=controller),
                                command=cmd,
                                artifacts=context)
     time.sleep(2)
     self.assertEqual(controller.task_id, '000')
     self.assertEqual(controller.state, 'ERROR')
     self.assertEqual(len(controller.outputs.stdout), 0)
     self.assertNotEqual(len(controller.outputs.stderr), 0)
Beispiel #5
0
    def load_module(identifier,
                    module_path,
                    prev_state=None,
                    object_store=None):
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = object_store.read_object(object_path=module_path)
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(package_id=UNKNOWN_ID,
                                      command_id=UNKNOWN_ID),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store)
        # Create module command
        command = ModuleCommand(package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
                                command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
                                arguments=obj[KEY_COMMAND][KEY_ARGUMENTS])
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(created_at=created_at,
                                    started_at=started_at,
                                    finished_at=finished_at)
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR]))
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                descriptor = DatasetDescriptor(
                    identifier=ds[KEY_DATASET_ID],
                    columns=[
                        DatasetColumn(identifier=col[KEY_COLUMN_ID],
                                      name=col[KEY_COLUMN_NAME],
                                      data_type=col[KEY_COLUMN_TYPE])
                        for col in ds[KEY_DATASET_COLUMNS]
                    ],
                    row_count=ds[KEY_DATASET_ROWCOUNT])
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        delete_prov = None
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE]
        res_prov = None
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES]
        charts_prov = None
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ChartViewHandle.from_dict(c)
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        provenance = ModuleProvenance(read=read_prov,
                                      write=write_prov,
                                      delete=delete_prov,
                                      resources=res_prov,
                                      charts=charts_prov)
        # Create dictionary of dataset descriptors only if previous state is
        # given and the module is in SUCCESS state. Otherwise, the database
        # state is empty.
        if obj[KEY_STATE] == mstate.MODULE_SUCCESS and not prev_state is None:
            datasets = provenance.get_database_state(prev_state)
        else:
            datasets = dict()
        # Return module handle
        return OSModuleHandle(identifier=identifier,
                              command=command,
                              external_form=obj[KEY_EXTERNAL_FORM],
                              module_path=module_path,
                              state=obj[KEY_STATE],
                              timestamp=timestamp,
                              datasets=datasets,
                              outputs=outputs,
                              provenance=provenance,
                              object_store=object_store)
 def test_create_plot(self):
     """Test validation of the create plot command."""
     create_plot(dataset_name='ds',
                 chart_name='My Chart',
                 series=[{
                     'column': 1
                 }],
                 validate=True)
     create_plot(dataset_name='ds',
                 chart_name='My Chart',
                 series=[{
                     'column': 1
                 }],
                 xaxis_range='0:10',
                 validate=True)
     # Have an error raised if values of invalid data type are given
     with self.assertRaises(ValueError):
         create_plot(dataset_name='ds',
                     chart_name='My Chart',
                     series=[{
                         'column': 'abc'
                     }],
                     xaxis_range='0:10',
                     validate=True)
     with self.assertRaises(ValueError):
         create_plot(dataset_name='ds',
                     chart_name='My Chart',
                     series=[{
                         'column': 1,
                         'label': [],
                         'range': '0-10'
                     }],
                     xaxis_range='0:10',
                     validate=True)
     # Get dictionary serialization of command arguments. Ensure that we
     # can create a valid command instance from the returned result.
     obj = create_plot(dataset_name='ds',
                       chart_name='My Chart',
                       series=[{
                           'column': 1
                       }],
                       xaxis_range='0:10',
                       validate=True).arguments.to_list()
     ModuleCommand(package_id=plot.PACKAGE_PLOT,
                   command_id=plot.PLOT_SIMPLE_CHART,
                   arguments=obj,
                   packages=PACKAGES)
     # Delete a mandatory element from the serialization to ensure that
     # validation fails
     index = -1
     for i in range(len(obj)):
         if obj[i][ARG_ID] == pckg.PARA_NAME:
             index = i
             break
     del obj[i]
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=plot.PACKAGE_PLOT,
                       command_id=plot.PLOT_SIMPLE_CHART,
                       arguments=obj,
                       packages=PACKAGES)
     # Add an unknown argument to ensure that the validation fails
     obj = create_plot(dataset_name='ds',
                       chart_name='My Chart',
                       series=[{
                           'column': 1
                       }],
                       xaxis_range='0:10',
                       validate=True).arguments.to_list()
     obj.append(ARG(id='someUnknownLabel', value=''))
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=plot.PACKAGE_PLOT,
                       command_id=plot.PLOT_SIMPLE_CHART,
                       arguments=obj,
                       packages=PACKAGES)
Beispiel #7
0
    def load_module(
            identifier: str, 
            module_path: str, 
            prev_state: Optional[Dict[str, ArtifactDescriptor]] = None, 
            object_store: ObjectStore = DefaultObjectStore()
        ) -> "OSModuleHandle":
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = cast(Dict[str, Any], object_store.read_object(object_path=module_path))
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(
                    package_id=UNKNOWN_ID,
                    command_id=UNKNOWN_ID,
                    arguments=list(),
                    packages=None
                ),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store
            )
        # Create module command
        command = ModuleCommand(
            package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
            command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
            arguments=obj[KEY_COMMAND][KEY_ARGUMENTS],
            packages=None
        )
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(
            created_at=created_at,
            started_at=started_at,
            finished_at=finished_at
        )
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR])
        )
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                if KEY_DATAOBJECT_TYPE in ds:
                    descriptor = ArtifactDescriptor(
                        identifier=ds[KEY_DATAOBJECT_ID],
                        name=ds[KEY_DATAOBJECT_NAME],
                        artifact_type=ds[KEY_DATAOBJECT_TYPE])
                else: 
                    descriptor = DatasetDescriptor(
                        identifier=ds[KEY_DATASET_ID],
                        name=ds[KEY_DATASET_NAME],
                        columns=[
                            DatasetColumn(
                                identifier=col[KEY_COLUMN_ID],
                                name=col[KEY_COLUMN_NAME],
                                data_type=col[KEY_COLUMN_TYPE]
                            ) for col in ds[KEY_DATASET_COLUMNS]
                        ]
                    )
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = set(obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE])
        else:
            delete_prov = set()
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = cast(Dict[str, Any], obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES])
        else:
            res_prov = dict()
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ( 
                    c[0], 
                    ChartViewHandle.from_dict(c[1])  # type: ignore[no-untyped-call]
                ) if isinstance(c, list) else 
                (
                    "Chart",
                    ChartViewHandle.from_dict(c)
                )
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        else:
            charts_prov = list()
        provenance = ModuleProvenance(
            read=read_prov,
            write=write_prov,
            delete=delete_prov,
            resources=res_prov,
            charts=charts_prov
        )
        # Return module handle
        return OSModuleHandle(
            identifier=identifier,
            command=command,
            external_form=obj[KEY_EXTERNAL_FORM],
            module_path=module_path,
            state=obj[KEY_STATE],
            timestamp=timestamp,
            outputs=outputs,
            provenance=provenance,
            object_store=object_store,
        )