Ejemplo n.º 1
0
    def __init__(self, 
            identifier: str, 
            properties: ObjectAnnotationSet, 
            branches: Optional[List[BranchHandle]]=None, 
            default_branch: Optional[BranchHandle] = None, 
            created_at: datetime = get_current_time()
    ):
        """Initialize the viztrail descriptor.

        Parameters
        ----------
        identifier : string
            Unique viztrail identifier
        properties: dict(string, any), optional
            Handler for user-defined properties
        branches: list(vizier.viztrail.branch.BranchHandle), optional
            List of branches in the viztrail
        default_branch: vizier.viztrail.branch.BranchHandle, optional
            Default branch for the viztrail
        created_at : datetime.datetime, optional
            Timestamp of project creation (UTC)
        """
        super(ViztrailHandle, self).__init__(
            properties=properties
        )
        self.identifier = identifier
        self.branches = dict()
        # Initialize the branch index from the given list (if present)
        if not branches is None:
            for b in branches:
                self.branches[b.identifier] = b
        self.default_branch = default_branch
        # If created_at timestamp is None the viztrail is expected to be a newly
        # created viztrail.
        self.created_at = created_at if not created_at is None else get_current_time()
Ejemplo n.º 2
0
    def set_success(self,
                    finished_at: datetime = get_current_time(),
                    outputs: ModuleOutputs = ModuleOutputs(),
                    provenance: ModuleProvenance = ModuleProvenance(),
                    updated_arguments: Optional[ModuleArguments] = None):
        """Set status of the module to success. The finished_at property of the
        timestamp is set to the given value or the current time (if None).

        If case of a successful module execution the database state and module
        provenance information are also adjusted together with the module
        output streams.

        Parameters
        ----------
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        # Update state, timestamp, database state, outputs and provenance
        # information.
        self.state = MODULE_SUCCESS
        self.timestamp.finished_at = finished_at
        # If the module is set to success straight from pending state the
        # started_at timestamp may not have been set.
        if self.timestamp.started_at is None:
            self.timestamp.started_at = self.timestamp.finished_at
        if updated_arguments is not None:
            self.command.arguments = updated_arguments
        self.outputs = outputs
        self.provenance = provenance
Ejemplo n.º 3
0
    def __init__(self, 
            created_at: Optional[datetime] = None, 
            started_at: Optional[datetime] = None, 
            finished_at: Optional[datetime] = None):
        """Initialize the timestamp components. If created_at is None the
        other two timestamps are expected to be None as well. Will raise
        ValueError if created_at is None but one of the other two timestamps
        is not None.

        Parameters
        ----------
        created_at: datatime.datetime
            Time when module was first created
        started_at: datatime.datetime
            Time when module execution started
        finished_at: datatime.datetime
            Time when module execution finished
        """
        # Raise ValueError if created_at is None but one of the other two
        # timestamps is not None
        if created_at is None and not (started_at is None and finished_at is None):
            raise ValueError('invalid timestamp information')
        self.created_at = created_at if not created_at is None else get_current_time()
        self.started_at = started_at
        self.finished_at = finished_at
Ejemplo n.º 4
0
    def set_success(self, 
            finished_at: datetime = get_current_time(), 
            outputs: ModuleOutputs = ModuleOutputs(), 
            provenance: ModuleProvenance = ModuleProvenance(),
            updated_arguments: Optional[ModuleArguments] = None
        ):
        """Set status of the module to success. The finished_at property of the
        timestamp is set to the given value or the current time (if None).

        If case of a successful module execution the database state and module
        provenance information are also adjusted together with the module
        output streams.

        Parameters
        ----------
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        # Update state, timestamp, database state, outputs and provenance
        # information.
        super().set_success(finished_at, outputs, provenance, updated_arguments)
        # Materialize module state
        self.write_safe()
Ejemplo n.º 5
0
    def __init__(self,
                 identifier: str,
                 action: str,
                 package_id: Optional[str] = None,
                 command_id: Optional[str] = None,
                 created_at: Optional[datetime] = None):
        """Initialize the descriptor. If action is not the branch create action
        the package_id and command_id are expected to not be None.

        Parameters
        ----------
        identifier: string
            Unique workflow identifier
        action: string
            Identifier of the action that created the workflow version (create,
            insert, delete, or replace)
        package_id: string
            Identifier of the package the module command is from
        command_id: string
            Identifier of the module command
        create_at: datetime.datetime
            Timestamp of workflow creation (UTC)
        """
        if action != ACTION_CREATE and (package_id is None
                                        or command_id is None):
            raise ValueError('invalid workflow provenance information')
        self.identifier = identifier
        self.action = action
        self.package_id = package_id
        self.command_id = command_id
        self.created_at = init_value(created_at, get_current_time())
Ejemplo n.º 6
0
    def upload_file(self, filename, provenance=None):
        """Upload a new file.

        Parameters
        ----------
        filename: string
            Path to file on disk
        provenance: dict, optional
            Optional file provenance information

        Returns
        -------
        FileHandle
        """
        name = os.path.basename(filename).lower()

        if not provenance is None:
            properties = dict(provenance)
        else:
            properties = dict()
        properties[FH_UPLOAD_NAME] = os.path.basename(filename)

        # Create a new unique identifier for the file.
        identifier = os.path.basename(filename)
        created_at = get_current_time()
        output_file = filename
        # Add file to file index
        f_handle = FileHandle(identifier,
                              name,
                              output_file,
                              created_at,
                              properties=properties)
        self.files[identifier] = f_handle
        self.write_index(self.files)
        return f_handle
Ejemplo n.º 7
0
 def set_success(self,
                 task_id: str,
                 finished_at: datetime = get_current_time(),
                 result: ExecResult = ExecResult()):
     self.task_id = task_id
     self.outputs = result.outputs
     self.state = 'SUCCESS'
Ejemplo n.º 8
0
 def to_file(self):
     """Write the current state of the viztrail to file. Sets the last
     modified at timestamp to the current time.
     """
     self.last_modified_at = get_current_time()
     # Serialize viztrail
     doc = {
         'id':
         self.identifier,
         'env':
         self.exec_env.identifier,
         'branches': [{
             'id':
             b,
             'versions': [w.to_dict() for w in self.branches[b].workflows]
         } for b in self.branches],
         'timestamps': {
             'createdAt': self.created_at.isoformat(),
             'lastModifiedAt': self.last_modified_at.isoformat()
         },
         'versionCounter':
         self.version_counter.value,
         'moduleCounter':
         self.module_counter.value
     }
     # Write viztrail serialization to file
     with open(os.path.join(self.fs_dir, VIZTRAIL_FILE), 'w') as f:
         #yaml.dump(doc, f, default_flow_style=False, Dumper=CDumper)
         dump_json(doc, f)
 def test_single_append(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     ts = get_current_time()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=ts,
                                   started_at=ts,
                                   finished_at=ts),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     # We expect that there exists a file for the workflow handle and one for
     # the new module
     self.assertTrue(
         os.path.isfile(os.path.join(branch.base_path, wf.identifier)))
     self.assertTrue(
         os.path.isfile(os.path.join(wf.modules[-1].module_path)))
     # Load the viztrail and get the module at the branch head
     vt = OSViztrailHandle.load_viztrail(base_path)
     module = vt.get_default_branch().get_head().modules[-1]
     self.assertEqual(module.external_form, 'print 2+2')
     self.assertEqual(module.outputs.stdout[-1].value, '4')
Ejemplo n.º 10
0
    def __init__(self,
                 source_branch: Optional[str] = None,
                 workflow_id: Optional[str] = None,
                 module_id: Optional[str] = None,
                 created_at: Optional[datetime] = None):
        """Initialize the provenance object.

        Raises ValueError if at least one but not all arguments are None.

        Parameters
        ----------
        source_branch : string
            Unique identifier of source branch
        workflow_id: string
            Identifier of source workflow
        module_id: string
            Identifier of module at which the new branch started
        created_at: datetime.datetime, optional
            Timestamp of branch creation (UTC)
        """
        # Raise an exception if one argument is None but not all of them
        if not source_branch is None and not workflow_id is None and not module_id is None:
            pass
        elif source_branch is None and workflow_id is None and module_id is None:
            pass
        else:
            raise ValueError('invalid arguments for branch provenance')
        self.source_branch = source_branch
        self.workflow_id = workflow_id
        self.module_id = module_id
        self.created_at = created_at if not created_at is None else get_current_time(
        )
Ejemplo n.º 11
0
    def __init__(self,
                 version,
                 action=None,
                 package_id=None,
                 command_id=None,
                 created_at=None):
        """Initialize the descriptor.

        Parameters
        ----------
        version: int
            Workflow version identifier
        actions: string
            Identifier of the action that created the workflow version (create,
            insert, delete, or replace)
        package_id: string
            Identifier of the package the module command is from
        command_id: string
            Identifier of the module command
        create_at: datetime.datetime
            Timestamp of workflow creation (UTC)
        """
        self.version = version
        self.action = action
        self.package_id = package_id
        self.command_id = command_id
        self.created_at = created_at if not created_at is None else get_current_time(
        )
Ejemplo n.º 12
0
    def set_error(
        self,
        task_id: str,
        finished_at: datetime = get_current_time(),
        outputs: ModuleOutputs = ModuleOutputs()
    ) -> Optional[bool]:
        """Set status of the module that is associated with the given task
        identifier to error. The finished_at property of the timestamp is set
        to the given value or the current time (if None). The module outputs
        are adjusted to the given value. The output streams are empty if no
        value is given for the outputs parameter.

        Cancels all pending modules in the workflow.

        Returns True if the state of the workflow was changed and False
        otherwise. The result is None if the project or task did not exist.

        Parameters
        ----------
        task_id : string
            Unique task identifier
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module

        Returns
        -------
        bool
        """
        raise NotImplementedError
Ejemplo n.º 13
0
    def append_workflow(self, modules, action, command, pending_modules=None):
        """Append a workflow as the new head of the branch. The new workflow may
        contain modules that have not been persisted prevoiusly (pending
        modules). These modules are persisted as part of the workflow being
        created.

        Parameters
        ----------
        modules: list(vizier.viztrail.module.ModuleHandle
            List of modules in the workflow that are completed
        action: string
            Identifier of the action that created the workflow
        command: vizier.viztrail.module.ModuleCommand
            Specification of the executed command that created the workflow
        pending_modules: list(vizier.viztrail.module.ModuleHandle, optional
            List of modules in the workflow that need to be materialized

        Returns
        -------
        vizier.viztrail.workflow.base.WorkflowHandle
        """
        workflow_modules = list(modules)
        if not pending_modules is None:
            for pm in pending_modules:
                # Make sure the started_at timestamp is set if the module is
                # running
                if pm.is_running and pm.timestamp.started_at is None:
                    pm.timestamp.started_at = pm.timestamp.created_at
                module = OSModuleHandle.create_module(
                    command=pm.command,
                    external_form=pm.external_form,
                    state=pm.state,
                    timestamp=pm.timestamp,
                    datasets=pm.datasets,
                    outputs=pm.outputs,
                    provenance=pm.provenance,
                    module_folder=self.modules_folder,
                    object_store=self.object_store)
                workflow_modules.append(module)
        # Write handle for workflow at branch head
        descriptor = write_workflow_handle(
            modules=[m.identifier for m in workflow_modules],
            workflow_count=len(self.workflows),
            base_path=self.base_path,
            object_store=self.object_store,
            action=action,
            command=command,
            created_at=get_current_time())
        # Get new workflow and replace the branch head. Move the current head
        # to the cache.
        workflow = WorkflowHandle(identifier=descriptor.identifier,
                                  branch_id=self.identifier,
                                  modules=workflow_modules,
                                  descriptor=descriptor)
        self.workflows.append(workflow.descriptor)
        if not self.head is None:
            self.add_to_cache(self.head)
        self.head = workflow
        return workflow
Ejemplo n.º 14
0
def execute(task_id, project_id, command_doc, context, resources):
    """Execute the givven command.

    Parameters:
    -----------
    task_id: string
        Unique task identifier
    project_id: string
        Unique project identifier
    command_doc : dict
        Dictionary serialization of the module command
    context: dict
        Dictionary of available resources in the database state. The key is
        the resource name. Values are resource identifiers.
    resources: dict
        Optional information about resources that were generated during a
        previous execution of the command
    """
    # Create a remote workflow controller for the given task
    controller = worker_env.get_controller(project_id)
    # Notify the workflow controller that the task started to run
    controller.set_running(task_id=task_id, started_at=get_current_time())
    # Get the processor and execute the command. In case of an unknown package
    # the result is set to error.
    command = ModuleCommand.from_dict(command_doc)
    if command.package_id in worker_env.processors:
        processor = worker_env.processors[command.package_id]
        _, exec_result = exec_command(
            task_id=task_id,
            command=command,
            context=TaskContext(
                project_id=project_id,
                datastore=worker_env.datastores.get_datastore(project_id),
                filestore=worker_env.filestores.get_filestore(project_id),
                datasets=context[labels.CONTEXT_DATASETS],
                resources=resources,
                dataobjects=context[labels.CONTEXT_DATAOBJECTS]
            ),
            processor=processor
        )
    else:
        message = 'unknown package \'' + str(command.package_id) + '\''
        exec_result = ExecResult(
            is_success=False,
            outputs=ModuleOutputs(stderr=[TextOutput(message)])
        )
    # Notify the workflow controller that the task has finished
    if exec_result.is_success:
        controller.set_success(
            task_id=task_id,
            outputs=exec_result.outputs,
            provenance=exec_result.provenance
        )
    else:
        controller.set_error(
            task_id=task_id,
            outputs=exec_result.outputs
        )
 def test_load_with_dataset_delete(self):
     """Test loading workflows where each module creates a new dataset and
     deletes the previous dataset (except for the first module).
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         deleted_datasets = list()
         if i > 0:
             deleted_datasets.append('DS' + str(i - 1))
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(write={
                 'DS' + str(i):
                 DatasetDescriptor(
                     identifier=str(i),
                     name='DS' + str(i),
                     columns=[
                         DatasetColumn(identifier=j, name=str(j))
                         for j in range(i)
                     ],
                 )
             },
                                         delete=deleted_datasets),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     vt = OSViztrailHandle.load_viztrail(base_path)
     workflow = vt.get_default_branch().get_head()
     self.assertEqual(len(workflow.modules), 5)
     datasets = {}
     for i in range(5):
         module = workflow.modules[i]
         datasets = module.provenance.get_database_state(datasets)
         self.assertEqual(len(datasets), 1)
         key = 'DS' + str(i)
         self.assertTrue(key in datasets)
         self.assertEqual(len(datasets[key].columns), i)
 def test_load_active(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # This is a hack to simulate loading workflows with active modules
     # Change state of last two modules in branch head to an active state
     m = branch.get_head().modules[-2]
     m.state = MODULE_RUNNING
     m.write_module()
     m = branch.get_head().modules[-1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.get_head().modules[0].is_success)
     self.assertTrue(branch.get_head().modules[1].is_success)
     self.assertTrue(branch.get_head().modules[2].is_success)
     self.assertTrue(branch.get_head().modules[3].is_canceled)
     self.assertTrue(branch.get_head().modules[4].is_canceled)
     # Change state of last module in second workflow to an active state
     m = branch.get_head().modules[1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     wf = branch.get_workflow(branch.get_history()[1].identifier)
     self.assertTrue(wf.modules[0].is_success)
     self.assertTrue(wf.modules[1].is_canceled)
Ejemplo n.º 17
0
 def set_canceled(self, 
         finished_at: datetime = get_current_time(), 
         outputs: ModuleOutputs = ModuleOutputs()
     ) -> None:
     """Set status of the module to canceled. The finished_at property of the
     timestamp is set to the given value or the current time (if None). The
     module outputs are set to the given value. If no outputs are given the
     module output streams will be empty.
     """
     super().set_canceled(finished_at, outputs)
     # Materialize module state
     self.write_safe()
Ejemplo n.º 18
0
    def set_error(
        self,
        task_id: str,
        finished_at: datetime = get_current_time(),
        outputs: ModuleOutputs = ModuleOutputs()
    ) -> Optional[bool]:
        """Set status of the module that is associated with the given task
        identifier to error. The finished_at property of the timestamp is set
        to the given value or the current time (if None). The module outputs
        are adjusted to the given value. The output streams are empty if no
        value is given for the outputs parameter.

        Cancels all pending modules in the workflow.

        Returns True if the state of the workflow was changed and False
        otherwise. The result is None if the project or task did not exist.

        Parameters
        ----------
        task_id : string
            Unique task identifier
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module

        Returns
        -------
        bool
        """
        print("ERROR: {}".format(task_id))
        with self.backend.lock:
            # Get task handle and remove it from the internal index. The result
            # is None if the task does not exist.
            task = pop_task(tasks=self.tasks, task_id=task_id)
            if task is None:
                return None
            # Get the handle for the head workflow of the specified branch and
            # the index for the module matching the identifier in the task.
            workflow, module_index = self.get_task_module(task)
            if workflow is None or module_index == -1:
                return None
            # Notify the backend that the task is finished
            self.backend.task_finished(task_id)
            module = workflow.modules[module_index]
            if module.is_active:
                module.set_error(finished_at=finished_at, outputs=outputs)
                for m in workflow.modules[module_index + 1:]:
                    m.set_canceled()
                return True
            else:
                return False
Ejemplo n.º 19
0
    def __init__(self,
                 identifier: str,
                 properties: PersistentAnnotationSet,
                 base_path: str,
                 branches: List[BranchHandle],
                 default_branch: Optional[BranchHandle],
                 object_store: ObjectStore = DefaultObjectStore(),
                 created_at: datetime = get_current_time(),
                 branch_index: Optional[str] = None,
                 branch_folder: Optional[str] = None,
                 modules_folder: Optional[str] = None):
        """Initialize the viztrail descriptor.

        Parameters
        ----------
        identifier : string
            Unique viztrail identifier
        properties: dict(string, any)
            Dictionary of user-defined properties
        base_path: string
            Identifier for folder containing viztrail resources
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources
        branches: list(vizier.viztrail.branch.BranchHandle)
            List of branches in the viztrail
        default_branch: vizier.viztrail.branch.BranchHandle
            Default branch for the viztrail
        created_at : datetime.datetime, optional
            Timestamp of project creation (UTC)
        branch_index: string, optional
            Path to branch index list
        branch_folder: string, optional
            Path to branches folder
        modules_folder: string, optional
            Path to modules folder
        """
        super(OSViztrailHandle, self).__init__(identifier=identifier,
                                               properties=properties,
                                               branches=branches,
                                               default_branch=default_branch,
                                               created_at=created_at)
        # Initizlize the object store and identifier for all subfolders.
        self.base_path = base_path
        self.object_store = object_store
        self.branch_folder = init_value(
            branch_folder, self.object_store.join(base_path, FOLDER_BRANCHES))
        self.branch_index = init_value(
            branch_index,
            self.object_store.join(self.branch_folder, OBJ_BRANCHINDEX))
        self.modules_folder = init_value(
            modules_folder, self.object_store.join(base_path, FOLDER_MODULES))
 def test_completed_append(self):
     """Test appending a completed workflow to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     wf = branch.append_workflow(modules=head_modules[:-1],
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
Ejemplo n.º 21
0
    def __init__(self,
                 identifier,
                 branches,
                 env_id,
                 command_repository,
                 properties,
                 created_at=None,
                 last_modified_at=None):
        """Initialize the viztrail identifier and branch dictionary.

        Parameters
        ----------
        identifier : string
            Unique viztrail identifier
        branches : dict(ViztrailBranch)
            Dictionary of branches.
        env_id: string
            Unique execution environment identifier
        command_repository: dict
            Dictionary containing specifications for all commands that are
            supported by the execution environment.
        properties: vizier.core.properties.ObjectPropertiesHandler
            Handler for user-defined properties that are associated with this
            viztrail
        created_at : datetime.datetime, optional
            Timestamp of project creation (UTC)
        last_modified_at : datetime.datetime, optional
            Timestamp when project was last modified (UTC)
        """
        self.identifier = identifier
        self.branches = branches
        self.env_id = env_id
        self.command_repository = command_repository
        self.properties = properties
        # If created_at timestamp is None the viztrail is expected to be a newly
        # created viztrail. For new viztrails the last_modified timestamp and
        # branches listing are expected to be None. For existing viztrails
        # last_modified and branches should not be None
        if not created_at is None:
            if last_modified_at is None:
                raise ValueError('unexpected value for \'last_modified\'')
            self.created_at = created_at
            self.last_modified_at = last_modified_at
        else:
            if not last_modified_at is None:
                raise ValueError('missing value for \'last_modified\'')
            self.created_at = get_current_time()
            self.last_modified_at = self.created_at
Ejemplo n.º 22
0
    def unload_dataset(self,
                       dataset_name,
                       format='csv',
                       options=[],
                       filename=""):
        """Export a dataset from a given name.

        Raises ValueError if the given dataset could not be exported.

        Parameters
        ----------
        dataset_name: string
            Name of the dataset to unload
            
        format: string
            Format for output (csv, json, ect.)
            
        options: dict
            Options for data unload
            
        filename: string
            The output filename - may be empty if outputting to a database

        Returns
        -------
        vizier.filestore.base.FileHandle
        """
        name = os.path.basename(filename).lower()
        # Create a new unique identifier for the file.
        identifier = get_unique_identifier()

        abspath = ""
        if not filename == "":
            abspath = os.path.abspath((r'%s' %
                                       os.getcwd().replace('\\', '/')) + '/' +
                                      identifier)
        mimir._mimir.unloadDataSource(dataset_name, abspath, format,
                                      mimir._jvmhelper.to_scala_seq(options))

        created_at = get_current_time()
        output_file = abspath
        # Add file to file index
        f_handle = FileHandle(identifier,
                              name,
                              output_file,
                              created_at,
                              properties=dict())
        return f_handle
Ejemplo n.º 23
0
    def set_running(self, 
            started_at: datetime = get_current_time(), 
            external_form: Optional[str] = None
        ):
        """Set status of the module to running. The started_at property of the
        timestamp is set to the given value or the current time (if None).

        Parameters
        ----------
        started_at: datetime.datetime, optional
            Timestamp when module started running
        external_form: string, optional
            Adjusted external representation for the module command.
        """
        super().set_running(started_at, external_form)
        # Materialize module state
        self.write_safe()
 def test_multi_append(self):
     """Test appending modules to viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 10)
     for i in range(10):
         wf = branch.get_workflow(history[i].identifier)
         self.assertEqual(len(wf.modules), (i + 1))
         for m in range(i + 1):
             module = wf.modules[m]
             self.assertEqual(module.external_form,
                              'print ' + str(m) + '+' + str(m))
             self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
Ejemplo n.º 25
0
    def get_workflow(self, branch_id=DEFAULT_BRANCH, version=-1):
        """Get the workflow with the given version number from the workflow
        history of the given branch.

        Returns None if the branch or the workflow version do not exist.

        Parameters
        ----------
        branch_id: string, optional
            Unique branch identifier
        version: int, optional
            Workflow version number
        """
        # Return None if branch does not exist
        if not branch_id in self.branches:
            return None
        branch = self.branches[branch_id]
        if version <= 0 and len(branch.workflows) == 0:
            # Returns an empty workflow if the branch does not contain any
            # executed workflows yet.
            return WorkflowHandle(branch_id, -1, get_current_time(), [])
        # Get version number of branch HEAD if negative version is given
        wf_file = None
        if version < 0 and len(branch.workflows) > 0:
            wf_file = workflow_file(self.fs_dir, branch.workflows[-1].version)
        else:
            for wf_desc in branch.workflows:
                if wf_desc.version == version:
                    wf_file = workflow_file(self.fs_dir, version)
                    break
        # Return None if version number is not in branch (indicated by an non-
        # existing workflow file)
        if wf_file is None:
            return None
        # Read workflow handle from file
        try:
            with open(wf_file, 'r') as f:
                doc = load_json(f.read())
        except:
            with open(wf_file, 'r') as f:
                doc = yaml.load(f.read(), Loader=CLoader)
        return WorkflowHandle(
            branch_id, doc['version'], to_datetime(doc['createdAt']),
            [ModuleHandle.from_dict(m) for m in doc['modules']])
Ejemplo n.º 26
0
 def init(self):
     """Initialize the API before the first request."""
     # Initialize the API compinents
     self.urls = ContainerApiUrlFactory(
         base_url=self.config.app_base_url,
         api_doc_url=self.config.webservice.doc_url
     )
     self.engine = get_engine(self.config)
     self.projects =self.engine.projects
     self.datasets = VizierDatastoreApi(
         projects=self.projects,
         urls=self.urls,
         defaults=self.config.webservice.defaults
     )
     self.views = VizierDatasetViewApi(
         projects=self.projects,
         urls=self.urls
     )
     self.files = VizierFilestoreApi(
         projects=self.projects,
         urls=self.urls
     )
     self.tasks = VizierContainerTaskApi(
         engine=self.engine,
         controller_url=self.config.controller_url
     )
     # Initialize the service descriptor
     self.service_descriptor = {
         'name': self.config.webservice.name,
         'startedAt': get_current_time().isoformat(),
         'defaults': {
             'maxFileSize': self.config.webservice.defaults.max_file_size
         },
         'environment': {
             'name': self.engine.name,
             'version': VERSION_INFO,
             'backend': self.config.engine.backend.identifier,
             'packages': list(self.engine.packages.keys())
         },
         labels.LINKS: serialize.HATEOAS({
             'self': self.urls.service_descriptor(),
             'doc': self.urls.api_doc()
         })
     }
Ejemplo n.º 27
0
    def set_error(self, 
            finished_at: datetime = get_current_time(), 
            outputs: ModuleOutputs = ModuleOutputs()
        ):
        """Set status of the module to error. The finished_at property of the
        timestamp is set to the given value or the current time (if None). The
        module outputs are adjusted to the given value. the output streams are
        empty if no value is given for the outputs parameter.

        Parameters
        ----------
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module
        """
        super().set_error(finished_at, outputs)
        # Materialize module state
        self.write_safe()
Ejemplo n.º 28
0
    def set_running(self,
                    started_at: datetime = get_current_time(),
                    external_form: Optional[str] = None) -> None:
        """Set status of the module to running. The started_at property of the
        timestamp is set to the given value or the current time (if None).

        Parameters
        ----------
        started_at: datetime.datetime, optional
            Timestamp when module started running
        external_form: string, optional
            Adjusted external representation for the module command.
        """
        # Update state and timestamp information. Clear outputs and, database
        # state,
        if external_form is not None:
            self.external_form = external_form
        self.state = MODULE_RUNNING
        self.timestamp.started_at = started_at
        self.outputs = ModuleOutputs()
Ejemplo n.º 29
0
    def set_error(self, finished_at=None, outputs=None):
        """Set status of the module to error. The finished_at property of the
        timestamp is set to the given value or the current time (if None). The
        module outputs are adjusted to the given value. the output streams are
        empty if no value is given for the outputs parameter.

        Parameters
        ----------
        finished_at: datetime.datetime, optional
            Timestamp when module started running
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Output streams for module
        """
        # Update state, timestamp and output information. Clear database state.
        self.state = mstate.MODULE_ERROR
        self.timestamp.finished_at = finished_at if not finished_at is None else get_current_time(
        )
        self.outputs = outputs if not outputs is None else ModuleOutputs()
        self.datasets = dict()
        # Materialize module state
        self.write_safe()
Ejemplo n.º 30
0
 def set_canceled(
     self,
     finished_at: datetime = get_current_time(),
     outputs: ModuleOutputs = ModuleOutputs()
 ) -> None:
     """Set status of the module to canceled. The finished_at property of the
     timestamp is set to the given value or the current time (if None). The
     module outputs are set to the given value. If no outputs are given the
     module output streams will be empty.
     
     Parameters
     ----------
     finished_at: datetime.datetime, optional
         Timestamp when module started running
     outputs: vizier.viztrail.module.output.ModuleOutputs, optional
         Output streams for module
     """
     # Update state, timestamp and output information. Clear database state.
     self.state = MODULE_CANCELED
     self.timestamp.finished_at = finished_at
     self.outputs = outputs