def test_init(self):
     """Ensure that invalid argument combinations for the provenance object
     are detected properly.
     """
     prov = BranchProvenance()
     prov = BranchProvenance(source_branch='A', workflow_id='B', module_id='C')
     # If either one but not all arguments are None an exception is thrown
     with self.assertRaises(ValueError):
         BranchProvenance(source_branch='A', workflow_id='B')
     with self.assertRaises(ValueError):
         BranchProvenance(source_branch='A')
     with self.assertRaises(ValueError):
         BranchProvenance(source_branch='A', module_id='C')
     with self.assertRaises(ValueError):
         BranchProvenance(module_id='C')
     with self.assertRaises(ValueError):
         BranchProvenance(workflow_id='B', module_id='C')
     with self.assertRaises(ValueError):
         BranchProvenance(workflow_id='B')
Beispiel #2
0
    def load_branch(
            identifier: str, 
            is_default: bool, 
            base_path: str, 
            modules_folder: str, 
            object_store: Optional[ObjectStore] = None
        ):
        """Load branch from disk. Reads the branch provenance information and
        descriptors for all workflows in the branch history. If the branch
        history is not empty the modules for the workflow at the branch head
        will be read as well.

        Parameters
        ----------
        identifier: string
            Unique branch identifier
        is_default: bool
            True if this is the default branch for its viztrail
        base_path: string
            Path to folder containing branch resources
        modules_folder: string
            Path to folder containing workflow modules
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.branch.OSBranchHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Load branch provenance. The object will contain the created_at
        # timestamp and optionally the three entries that define the branch
        # point.
        doc = cast(Dict[str, Any], object_store.read_object(
            object_store.join(base_path, OBJ_METADATA)
        ))
        created_at = to_datetime(doc[KEY_CREATED_AT])
        if len(doc) == 4:
            provenance = BranchProvenance(
                source_branch=doc[KEY_SOURCE_BRANCH],
                workflow_id=doc[KEY_WORKFLOW_ID],
                module_id=doc[KEY_MODULE_ID],
                created_at=created_at
            )
        else:
            provenance = BranchProvenance(created_at=created_at)
        # Read descriptors for all branch workflows. Workflow descriptors are
        # objects in the base directory that do no match the name of any of the
        # predefied branch object.
        workflows = list()
        for resource in object_store.list_objects(base_path):
            if not resource in [OBJ_METADATA, OBJ_PROPERTIES]:
                resource_path = object_store.join(base_path, resource)
                obj = cast(Dict[str, Any], object_store.read_object(resource_path))
                desc = obj[KEY_WORKFLOW_DESCRIPTOR]
                workflows.append(
                    WorkflowDescriptor(
                        identifier=obj[KEY_WORKFLOW_ID],
                        action=desc[KEY_ACTION],
                        package_id=desc[KEY_PACKAGE_ID],
                        command_id=desc[KEY_COMMAND_ID],
                        created_at=to_datetime(desc[KEY_CREATED_AT])
                    )
                )
        # Sort workflows in ascending order of their identifier
        workflows.sort(key=lambda x: x.identifier)
        # Read all modules for the workflow at the branch head (if exists)
        head = None
        if len(workflows) > 0:
            # The workflow descriptor is the last element in the workflows list
            descriptor = workflows[-1]
            head = read_workflow(
                branch_id=identifier,
                workflow_descriptor=descriptor,
                workflow_path=object_store.join(
                    base_path,
                    descriptor.identifier
                ),
                modules_folder=modules_folder,
                object_store=object_store
            )
        return OSBranchHandle(
            identifier=identifier,
            is_default=is_default,
            base_path=base_path,
            modules_folder=modules_folder,
            provenance=provenance,
            properties=PersistentAnnotationSet(
                object_path=object_store.join(base_path, OBJ_PROPERTIES),
                object_store=object_store
            ),
            workflows=workflows,
            head=head,
            object_store=object_store
        )
Beispiel #3
0
    def create_branch(
        self, project_id, branch_id=None, workflow_id=None, module_id=None,
        properties=None
    ):
        """Create a new branch for a given project. The branch point is
        specified by the branch_id, workflow_id, and module_id parameters. If
        all values are None an empty branch is created.

        The properties for the new branch are set from the given properties
        dictionary.

        Returns None if the specified project does not exist. Raises ValueError
        if the specified branch point does not exists.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id: string, optional
            Unique branch identifier
        workflow_id: string, optional
            Unique workflow identifier
        module_id: string, optional
            Unique module identifier
        properties: dict, optional
            Properties for new workflow branch

        Returns
        -------
        dict
        """
        # Retrieve the project from the repository to ensure that it exists
        project = self.projects.get_project(project_id)
        if project is None:
            return None
        if branch_id is None and workflow_id is None and module_id is None:
            # Create an empty branch if the branch point is not specified
            branch = project.viztrail.create_branch(properties=properties)
        else:
            # Ensure that the branch point exist and get the index position of
            # the source module
            source_branch = project.viztrail.get_branch(branch_id)
            if source_branch is None:
                raise ValueError('unknown source branch \'' + str(branch_id) + '\'')
            workflow = source_branch.get_workflow(workflow_id)
            if workflow is None:
                raise ValueError('unknown workflow \'' + str(workflow_id) + '\'')
            module_index = -1
            for i in range(len(workflow.modules)):
                module = workflow.modules[i]
                if module.identifier == module_id:
                    module_index = i
                    break
            if module_index == -1:
                raise ValueError('unknown module \'' + str(module_id) + '\'')
            modules = [m.identifier for m in workflow.modules[:module_index+1]]
            # Create a new branch that contains all source modules including
            # the specified one.
            branch = project.viztrail.create_branch(
                provenance=BranchProvenance(
                    source_branch=source_branch.identifier,
                    workflow_id=workflow_id,
                    module_id=module_id
                ),
                properties=properties,
                modules=modules
            )
        return serialize.BRANCH_DESCRIPTOR(
            branch=branch,
            project=project,
            urls=self.urls
        )
Beispiel #4
0
    def create_branch(
        identifier: str, 
        base_path: str, 
        modules_folder: str, 
        is_default: bool = False, 
        provenance: Optional[BranchProvenance] = None,
        properties: Optional[Dict[str, Any]] = None, 
        created_at: Optional[datetime] = None, 
        modules: Optional[List[str]] = None, 
        object_store: Optional[ObjectStore] = None
    ):
        """Create a new branch. If the workflow is given the new branch contains
        exactly this workflow. Otherwise, the branch is empty.

        Raises ValueError if any of the modules in the given list is in an
        active state.

        Parameters
        ----------
        identifier: string
            Unique branch identifier
        base_path: string
            path to the folder for branch resources
        modules_folder: string
            Path to module resources folder
        is_default: bool, optional
            True if this is the default branch for its viztrail
        provenance: vizier.viztrail.branch.BranchProvenance, optional
            Branch provenance information
        properties: dict, optional
            Initial set of branch properties
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources
        modules: list(string), optional
            List of module identifier for the modules in the workflow at the
            head of the branch

        Returns
        -------
        vizier.viztrail.objectstore.branch.OSBranchHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # If base path does not exist raise an exception
        if not object_store.exists(base_path):
            raise ValueError('base path does not exist')
        # Read module handles first to ensure that none of the modules is in
        # an active state
        if not modules is None:
            wf_modules = read_workflow_modules(
                modules_list=modules,
                modules_folder=modules_folder,
                object_store=object_store
            )
            for m in wf_modules:
                if m.is_active:
                    raise ValueError('cannot branch from active workflow')
        # Set provenance object if not given
        if provenance is None:
            provenance = BranchProvenance()
        # Write provenance information to disk
        doc: Dict[str, Any] = {KEY_CREATED_AT: provenance.created_at.isoformat()}
        if not provenance.source_branch is None:
            # If one propery is not None all are expected to be not None
            doc[KEY_SOURCE_BRANCH] = provenance.source_branch
            doc[KEY_WORKFLOW_ID] = provenance.workflow_id
            doc[KEY_MODULE_ID] = provenance.module_id
        object_store.write_object(
            object_path=object_store.join(base_path, OBJ_METADATA),
            content=doc
        )
        # Create the initial workflow if the list of modules is given
        workflows = list()
        head = None
        if not modules is None:
            # Write handle for workflow at branch head
            descriptor = write_workflow_handle(
                modules=modules,
                workflow_count=0,
                base_path=base_path,
                object_store=object_store,
                action=ACTION_CREATE,
                created_at=provenance.created_at
            )
            workflows.append(descriptor)
            # Set the new workflow as the branch head
            head = WorkflowHandle(
                identifier=descriptor.identifier,
                branch_id=identifier,
                modules=wf_modules,
                descriptor=descriptor
            )
        # Return handle for new viztrail branch
        return OSBranchHandle(
            identifier=identifier,
            is_default=is_default,
            base_path=base_path,
            modules_folder=modules_folder,
            provenance=provenance,
            properties=PersistentAnnotationSet(
                object_path=object_store.join(base_path, OBJ_PROPERTIES),
                object_store=object_store,
                properties=properties
            ),
            workflows=workflows,
            head=head,
            object_store=object_store
        )
    def create_viztrail(identifier: str,
                        base_path: str,
                        object_store: Optional[ObjectStore] = None,
                        properties: Optional[Dict[str, Any]] = None):
        """Create a new viztrail resource. Will create the base directory for
        the viztrail.

        Creates subfolders for viztrail resources. Writes viztrail metadata and
        properties to file. Create an empty default branch

        Parameters
        ----------
        properties: dict(string, any)
            Dictionary of properties for the new viztrail
        base_path: string
            Identifier for folder containing viztrail resources
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.driver.fs.viztrail.FSViztrailHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Create empty index file and subfolders for branches, workflows, and
        # modules. The base path folder is expected to exist.
        branch_folder = object_store.join(base_path, FOLDER_BRANCHES)
        object_store.create_folder(base_path, identifier=FOLDER_BRANCHES)
        branch_index = object_store.join(branch_folder, OBJ_BRANCHINDEX)
        content: List[str] = []
        object_store.write_object(object_path=branch_index, content=content)
        modules_folder = object_store.join(base_path, FOLDER_MODULES)
        object_store.create_folder(base_path, identifier=FOLDER_MODULES)
        # Write viztrail metadata to disk
        created_at = get_current_time()
        object_store.write_object(object_path=object_store.join(
            base_path, OBJ_METADATA),
                                  content={
                                      KEY_IDENTIFIER: identifier,
                                      KEY_CREATED_AT: created_at.isoformat()
                                  })
        # Create the default branch for the new viztrail
        default_branch = create_branch(
            provenance=BranchProvenance(created_at=created_at),
            properties={PROPERTY_NAME: DEFAULT_BRANCH},
            modules=None,
            branch_folder=branch_folder,
            modules_folder=modules_folder,
            object_store=object_store,
            is_default=True,
            created_at=created_at)
        # Materialize the updated branch index
        write_branch_index(
            branches={default_branch.identifier: default_branch},
            object_path=branch_index,
            object_store=object_store)
        # Return handle for new viztrail
        return OSViztrailHandle(identifier=identifier,
                                properties=PersistentAnnotationSet(
                                    object_path=object_store.join(
                                        base_path, OBJ_PROPERTIES),
                                    object_store=object_store,
                                    properties=properties),
                                branches=[default_branch],
                                default_branch=default_branch,
                                created_at=created_at,
                                base_path=base_path,
                                object_store=object_store,
                                branch_index=branch_index,
                                branch_folder=branch_folder,
                                modules_folder=modules_folder)