def test_create_and_delete_branch(self):
     """Test creating and deleting a new branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     self.assertEqual(len(vt.branches), 1)
     branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'})
     self.assertEqual(len(vt.branches), 2)
     self.assertIsNone(branch.head)
     self.assertEqual(len(branch.workflows), 0)
     vt = OSViztrailHandle.load_viztrail(base_path)
     self.assertEqual(len(vt.branches), 2)
     self.assertTrue(branch.identifier in vt.branches)
     self.assertEqual(vt.get_branch(branch.identifier).name, 'My Branch')
     branch = vt.get_branch(branch.identifier)
     self.assertIsNone(branch.head)
     self.assertEqual(len(branch.workflows), 0)
     # Ensure that all branch files exist
     branch_path = os.path.join(base_path, viztrail.FOLDER_BRANCHES,
                                branch.identifier)
     self.assertTrue(os.path.isdir(branch_path))
     self.assertTrue(
         os.path.isfile(os.path.join(branch_path, br.OBJ_METADATA)))
     self.assertTrue(
         os.path.isfile(os.path.join(branch_path, br.OBJ_PROPERTIES)))
     vt.delete_branch(branch.identifier)
     self.assertFalse(os.path.isdir(branch_path))
     self.assertEqual(len(vt.branches), 1)
     vt = OSViztrailHandle.load_viztrail(base_path)
     self.assertEqual(len(vt.branches), 1)
 def test_load_active(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # This is a hack to simulate loading workflows with active modules
     # Change state of last two modules in branch head to an active state
     m = branch.get_head().modules[-2]
     m.state = MODULE_RUNNING
     m.write_module()
     m = branch.get_head().modules[-1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.get_head().modules[0].is_success)
     self.assertTrue(branch.get_head().modules[1].is_success)
     self.assertTrue(branch.get_head().modules[2].is_success)
     self.assertTrue(branch.get_head().modules[3].is_canceled)
     self.assertTrue(branch.get_head().modules[4].is_canceled)
     # Change state of last module in second workflow to an active state
     m = branch.get_head().modules[1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     wf = branch.get_workflow(branch.get_history()[1].identifier)
     self.assertTrue(wf.modules[0].is_success)
     self.assertTrue(wf.modules[1].is_canceled)
 def test_default_branch(self):
     """Test behaviour of the viztrail default branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     self.assertEqual(len(vt.branches), 1)
     branch = vt.get_default_branch()
     self.assertTrue(vt.is_default_branch(branch.identifier))
     # Attempt to delete the branch that is the default should raise
     # ValueError
     with self.assertRaises(ValueError):
         vt.delete_branch(branch.identifier)
     # Attempt to delete the default branch folder should raise runtime error
     self.assertTrue(branch.is_default)
     with self.assertRaises(RuntimeError):
         branch.delete_branch()
     # Reload viztrail to ensure that default branch information is persisted
     vt = OSViztrailHandle.load_viztrail(base_path)
     self.assertEqual(len(vt.branches), 1)
     branch = vt.get_default_branch()
     # Attempt to delete the branch that is the default should raise
     # ValueError
     with self.assertRaises(ValueError):
         vt.delete_branch(branch.identifier)
     # Attempt to delete the default branch folder should raise runtime error
     self.assertTrue(branch.is_default)
     with self.assertRaises(RuntimeError):
         branch.delete_branch()
     # Add a new branch
     second_branch = vt.create_branch(
         properties={PROPERTY_NAME: 'My Branch'})
     self.assertFalse(second_branch.is_default)
     self.assertNotEqual(vt.get_default_branch().identifier,
                         second_branch.identifier)
     self.assertFalse(vt.is_default_branch(second_branch.identifier))
     vt = OSViztrailHandle.load_viztrail(base_path)
     self.assertNotEqual(vt.get_default_branch().identifier,
                         second_branch.identifier)
     # Set second branch as default branch
     second_branch = vt.set_default_branch(second_branch.identifier)
     self.assertTrue(second_branch.is_default)
     self.assertFalse(vt.get_branch(branch.identifier).is_default)
     self.assertEqual(vt.get_default_branch().identifier,
                      second_branch.identifier)
     # It should be possible to delete the first branch now
     self.assertTrue(vt.delete_branch(branch.identifier))
     vt = OSViztrailHandle.load_viztrail(base_path)
     self.assertIsNone(vt.get_branch(branch.identifier))
     self.assertEqual(vt.get_default_branch().identifier,
                      second_branch.identifier)
     # Set default branch to unknown branch should raise ValueError
     with self.assertRaises(ValueError):
         vt.set_default_branch(branch.identifier)
 def test_single_append(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     ts = get_current_time()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=ts,
                                   started_at=ts,
                                   finished_at=ts),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     # We expect that there exists a file for the workflow handle and one for
     # the new module
     self.assertTrue(
         os.path.isfile(os.path.join(branch.base_path, wf.identifier)))
     self.assertTrue(
         os.path.isfile(os.path.join(wf.modules[-1].module_path)))
     # Load the viztrail and get the module at the branch head
     vt = OSViztrailHandle.load_viztrail(base_path)
     module = vt.get_default_branch().get_head().modules[-1]
     self.assertEqual(module.external_form, 'print 2+2')
     self.assertEqual(module.outputs.stdout[-1].value, '4')
 def test_create_load_delete(self):
     """Ensure that create and load works properly."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(
         identifier='DEF',
         properties={PROPERTY_NAME: 'My Viztrail'},
         base_path=base_path)
     # Ensure that all files and subfolders are created
     vt_folder = os.path.join(REPO_DIR, 'ABC')
     self.assertTrue(os.path.isdir(vt_folder))
     self.assertTrue(
         os.path.isdir(os.path.join(vt_folder, viztrail.FOLDER_BRANCHES)))
     self.assertTrue(
         os.path.isdir(os.path.join(vt_folder, viztrail.FOLDER_MODULES)))
     self.assertTrue(
         os.path.isfile(
             os.path.join(vt_folder, viztrail.FOLDER_BRANCHES,
                          viztrail.OBJ_BRANCHINDEX)))
     self.assertTrue(
         os.path.isfile(os.path.join(vt_folder, viztrail.OBJ_METADATA)))
     self.assertTrue(
         os.path.isfile(os.path.join(vt_folder, viztrail.OBJ_PROPERTIES)))
     # Update name property
     self.assertEqual(vt.identifier, 'DEF')
     self.assertEqual(vt.name, 'My Viztrail')
     vt.name = 'A Name'
     self.assertEqual(vt.name, 'A Name')
     # Load viztrail from disk
     vt = OSViztrailHandle.load_viztrail(base_path)
     self.assertEqual(vt.identifier, 'DEF')
     self.assertEqual(vt.name, 'A Name')
     # Delete viztrail
     vt.delete_viztrail()
     self.assertFalse(os.path.exists(vt_folder))
 def test_load_with_dataset_delete(self):
     """Test loading workflows where each module creates a new dataset and
     deletes the previous dataset (except for the first module).
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         deleted_datasets = list()
         if i > 0:
             deleted_datasets.append('DS' + str(i - 1))
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(write={
                 'DS' + str(i):
                 DatasetDescriptor(
                     identifier=str(i),
                     name='DS' + str(i),
                     columns=[
                         DatasetColumn(identifier=j, name=str(j))
                         for j in range(i)
                     ],
                 )
             },
                                         delete=deleted_datasets),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     vt = OSViztrailHandle.load_viztrail(base_path)
     workflow = vt.get_default_branch().get_head()
     self.assertEqual(len(workflow.modules), 5)
     datasets = {}
     for i in range(5):
         module = workflow.modules[i]
         datasets = module.provenance.get_database_state(datasets)
         self.assertEqual(len(datasets), 1)
         key = 'DS' + str(i)
         self.assertTrue(key in datasets)
         self.assertEqual(len(datasets[key].columns), i)
    def __init__(self,
                 base_path: str,
                 object_store: Optional[ObjectStore] = None):
        """Initialize the repository from a configuration dictionary. Expects
        a dictionary that contains at least the base path for the repository.
        The definition of the object store is optional. If none is given the
        default object store will be used.

        Parameters
        ---------
        base_path: string
            Path to the base directory for viztrail resources
        object_store: vizier.core.io.base.ObjectStore, optional
            Store for objects that represent viztrail resources
            not
        """
        # Raise an exception if the base directory argument is not given
        if base_path is None:
            raise ValueError('missing path for base directory')
        # Create the base directory if it does not exist
        self.base_path = base_path
        if not os.path.isdir(self.base_path):
            os.makedirs(self.base_path)
        # The object store element is optional. If not given the default object
        # store is used.
        if object_store is not None:
            self.object_store: ObjectStore = object_store
        else:
            self.object_store = DefaultObjectStore()
        # Initialize the viztrails index. Create the index file if it does not
        # exist.
        self.viztrails_index = self.object_store.join(self.base_path,
                                                      OBJ_VIZTRAILINDEX)
        if not self.object_store.exists(self.viztrails_index):
            self.object_store.create_object(parent_folder=self.base_path,
                                            identifier=OBJ_VIZTRAILINDEX,
                                            content=list())
        # Load viztrails and intialize the remaining instance variables by
        # calling the constructor of the super class
        self.viztrails: Dict[str, OSViztrailHandle] = dict()
        for identifier in cast(
                Dict[str, Any],
                self.object_store.read_object(self.viztrails_index)):
            vt = OSViztrailHandle.load_viztrail(
                base_path=self.object_store.join(self.base_path, identifier),
                object_store=self.object_store)
            # We just got the identifier from the repository... the loaded
            # viztrail had better exist.
            assert vt is not None
            self.viztrails[vt.identifier] = vt
 def test_completed_append(self):
     """Test appending a completed workflow to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     wf = branch.append_workflow(modules=head_modules[:-1],
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
 def test_multi_append(self):
     """Test appending modules to viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 10)
     for i in range(10):
         wf = branch.get_workflow(history[i].identifier)
         self.assertEqual(len(wf.modules), (i + 1))
         for m in range(i + 1):
             module = wf.modules[m]
             self.assertEqual(module.external_form,
                              'print ' + str(m) + '+' + str(m))
             self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
 def test_create_and_delete_branch_with_default_workflow(self):
     """Ensure that creating and loading branches works if the head workflow
     for the new branch is given.
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(
         identifier='DEF',
         properties={PROPERTY_NAME: 'My Viztrail'},
         base_path=base_path)
     self.assertEqual(vt.last_modified_at,
                      vt.default_branch.last_modified_at)
     # Create five modules
     modules = list()
     for i in range(5):
         identifier = OSModuleHandle.create_module(
             command=python_cell(source='print ' + str(i)),
             external_form='TEST MODULE ' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(),
             datasets=dict(),
             module_folder=vt.modules_folder,
         ).identifier
         modules.append(identifier)
     branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'},
                               modules=modules)
     self.assertIsNotNone(branch.head)
     self.assertEqual(len(branch.workflows), 1)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_branch(branch.identifier)
     self.assertIsNotNone(branch.head)
     self.assertEqual(len(branch.workflows), 1)
     wf = branch.get_workflow(branch.head.identifier)
     self.assertEqual(len(wf.modules), 5)
     for i in range(5):
         self.assertEqual(wf.modules[i].external_form,
                          'TEST MODULE ' + str(i))
     self.assertEqual(vt.last_modified_at, branch.last_modified_at)
     self.assertEqual(vt.last_modified_at, branch.last_modified_at)
 def test_load_with_missing_modules(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # Delete the file for the third module to simulate an error condition in
     # which a file wasn't written properly
     os.remove(branch.head.modules[2].module_path)
     self.assertFalse(os.path.isfile(branch.head.modules[2].module_path))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.head.get_state().is_error)
     self.assertTrue(branch.head.modules[2].is_error)
Exemple #12
0
 def test_branch_cache(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     for i in range(DEFAULT_CACHE_SIZE):
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print 2+2',
             state=MODULE_SUCCESS,
             timestamp=ModuleTimestamp(created_at=get_current_time(),
                                       started_at=get_current_time(),
                                       finished_at=get_current_time()),
             outputs=ModuleOutputs(stdout=[TextOutput('4')]),
             provenance=ModuleProvenance(),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         branch.append_workflow(modules=branch.head.modules + [module],
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.cache), (i + 1))
         self.assertTrue(
             wf.identifier in [w.identifier for w in branch.cache])
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertEqual(len(branch.cache), 0)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     branch.get_workflow(wf.identifier)
     self.assertTrue(wf.identifier in [w.identifier for w in branch.cache])
     for wf_desc in branch.get_history():
         if wf_desc.identifier != wf.identifier:
             branch.get_workflow(wf_desc.identifier)
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])