コード例 #1
0
 def test_single_append(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     ts = get_current_time()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=ts,
                                   started_at=ts,
                                   finished_at=ts),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     # We expect that there exists a file for the workflow handle and one for
     # the new module
     self.assertTrue(
         os.path.isfile(os.path.join(branch.base_path, wf.identifier)))
     self.assertTrue(
         os.path.isfile(os.path.join(wf.modules[-1].module_path)))
     # Load the viztrail and get the module at the branch head
     vt = OSViztrailHandle.load_viztrail(base_path)
     module = vt.get_default_branch().get_head().modules[-1]
     self.assertEqual(module.external_form, 'print 2+2')
     self.assertEqual(module.outputs.stdout[-1].value, '4')
コード例 #2
0
ファイル: branch.py プロジェクト: sanchitcop19/web-api-async
    def append_workflow(self, modules, action, command, pending_modules=None):
        """Append a workflow as the new head of the branch. The new workflow may
        contain modules that have not been persisted prevoiusly (pending
        modules). These modules are persisted as part of the workflow being
        created.

        Parameters
        ----------
        modules: list(vizier.viztrail.module.ModuleHandle
            List of modules in the workflow that are completed
        action: string
            Identifier of the action that created the workflow
        command: vizier.viztrail.module.ModuleCommand
            Specification of the executed command that created the workflow
        pending_modules: list(vizier.viztrail.module.ModuleHandle, optional
            List of modules in the workflow that need to be materialized

        Returns
        -------
        vizier.viztrail.workflow.base.WorkflowHandle
        """
        workflow_modules = list(modules)
        if not pending_modules is None:
            for pm in pending_modules:
                # Make sure the started_at timestamp is set if the module is
                # running
                if pm.is_running and pm.timestamp.started_at is None:
                    pm.timestamp.started_at = pm.timestamp.created_at
                module = OSModuleHandle.create_module(
                    command=pm.command,
                    external_form=pm.external_form,
                    state=pm.state,
                    timestamp=pm.timestamp,
                    datasets=pm.datasets,
                    outputs=pm.outputs,
                    provenance=pm.provenance,
                    module_folder=self.modules_folder,
                    object_store=self.object_store)
                workflow_modules.append(module)
        # Write handle for workflow at branch head
        descriptor = write_workflow_handle(
            modules=[m.identifier for m in workflow_modules],
            workflow_count=len(self.workflows),
            base_path=self.base_path,
            object_store=self.object_store,
            action=action,
            command=command,
            created_at=get_current_time())
        # Get new workflow and replace the branch head. Move the current head
        # to the cache.
        workflow = WorkflowHandle(identifier=descriptor.identifier,
                                  branch_id=self.identifier,
                                  modules=workflow_modules,
                                  descriptor=descriptor)
        self.workflows.append(workflow.descriptor)
        if not self.head is None:
            self.add_to_cache(self.head)
        self.head = workflow
        return workflow
コード例 #3
0
 def test_load_with_dataset_delete(self):
     """Test loading workflows where each module creates a new dataset and
     deletes the previous dataset (except for the first module).
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         deleted_datasets = list()
         if i > 0:
             deleted_datasets.append('DS' + str(i - 1))
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(write={
                 'DS' + str(i):
                 DatasetDescriptor(
                     identifier=str(i),
                     name='DS' + str(i),
                     columns=[
                         DatasetColumn(identifier=j, name=str(j))
                         for j in range(i)
                     ],
                 )
             },
                                         delete=deleted_datasets),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     vt = OSViztrailHandle.load_viztrail(base_path)
     workflow = vt.get_default_branch().get_head()
     self.assertEqual(len(workflow.modules), 5)
     datasets = {}
     for i in range(5):
         module = workflow.modules[i]
         datasets = module.provenance.get_database_state(datasets)
         self.assertEqual(len(datasets), 1)
         key = 'DS' + str(i)
         self.assertTrue(key in datasets)
         self.assertEqual(len(datasets[key].columns), i)
コード例 #4
0
 def test_timestamps(self):
     """Test reading and writing modules with different timestamp values."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     # Test timestamps
     created_at = m.timestamp.created_at
     started_at = to_datetime('2018-11-26T13:00:00.000000')
     m.timestamp.started_at = started_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     finished_at = to_datetime('2018-11-26T13:00:00.000010')
     m.timestamp.created_at = finished_at
     m.timestamp.finished_at = finished_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, finished_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertEqual(m.timestamp.finished_at, finished_at)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=created_at,
                                   started_at=started_at),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertIsNone(m.timestamp.finished_at)
コード例 #5
0
 def test_load_active(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # This is a hack to simulate loading workflows with active modules
     # Change state of last two modules in branch head to an active state
     m = branch.get_head().modules[-2]
     m.state = MODULE_RUNNING
     m.write_module()
     m = branch.get_head().modules[-1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.get_head().modules[0].is_success)
     self.assertTrue(branch.get_head().modules[1].is_success)
     self.assertTrue(branch.get_head().modules[2].is_success)
     self.assertTrue(branch.get_head().modules[3].is_canceled)
     self.assertTrue(branch.get_head().modules[4].is_canceled)
     # Change state of last module in second workflow to an active state
     m = branch.get_head().modules[1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     wf = branch.get_workflow(branch.get_history()[1].identifier)
     self.assertTrue(wf.modules[0].is_success)
     self.assertTrue(wf.modules[1].is_canceled)
コード例 #6
0
 def test_datasets(self):
     """Test reading and writing modules with dataset information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path,
                                    prev_state=dict())
     self.assertEqual(len(m.datasets), 0)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path,
                                    prev_state=dict())
     self.assertEqual(len(m.datasets), 2)
     self.assertEqual(m.datasets['DS1'].identifier, 'ID1')
     self.assertEqual(len(m.datasets['DS1'].columns), 0)
     self.assertEqual(m.datasets['DS1'].row_count, 0)
     ds2 = m.datasets['DS2']
     self.assertEqual(ds2.identifier, 'ID2')
     self.assertEqual(len(ds2.columns), 2)
     col0 = ds2.columns[0]
     self.assertEqual(col0.identifier, 0)
     self.assertEqual(col0.name, 'ABC')
     self.assertEqual(col0.data_type, 'int')
     col1 = ds2.columns[1]
     self.assertEqual(col1.identifier, 1)
     self.assertEqual(col1.name, 'xyz')
     self.assertEqual(col1.data_type, 'real')
     self.assertEqual(ds2.row_count, 100)
コード例 #7
0
 def test_completed_append(self):
     """Test appending a completed workflow to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     wf = branch.append_workflow(modules=head_modules[:-1],
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
コード例 #8
0
 def test_state(self):
     """Ensure that only one of the state flag is True at the same time."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     # Pending
     self.assertTrue(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Running
     module.set_running(external_form='TEST MODULE')
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertTrue(module.is_running)
     self.assertFalse(module.is_success)
     # Canceled
     module.set_canceled()
     self.assertFalse(module.is_pending)
     self.assertTrue(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Error
     module.set_error()
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertTrue(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Success
     module.set_success()
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertTrue(module.is_success)
コード例 #9
0
 def test_running(self):
     """Update module state from pending to running."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         datasets={'DS1': DS1},
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2')},
             resources={'fileid': '0123456789'}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Set running with all optional parameters
     module.set_running(started_at=module.timestamp.created_at,
                        external_form='Some form')
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
コード例 #10
0
 def test_multi_append(self):
     """Test appending modules to viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 10)
     for i in range(10):
         wf = branch.get_workflow(history[i].identifier)
         self.assertEqual(len(wf.modules), (i + 1))
         for m in range(i + 1):
             module = wf.modules[m]
             self.assertEqual(module.external_form,
                              'print ' + str(m) + '+' + str(m))
             self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
コード例 #11
0
 def test_create_and_delete_branch_with_default_workflow(self):
     """Ensure that creating and loading branches works if the head workflow
     for the new branch is given.
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(
         identifier='DEF',
         properties={PROPERTY_NAME: 'My Viztrail'},
         base_path=base_path)
     self.assertEqual(vt.last_modified_at,
                      vt.default_branch.last_modified_at)
     # Create five modules
     modules = list()
     for i in range(5):
         identifier = OSModuleHandle.create_module(
             command=python_cell(source='print ' + str(i)),
             external_form='TEST MODULE ' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(),
             datasets=dict(),
             module_folder=vt.modules_folder,
         ).identifier
         modules.append(identifier)
     branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'},
                               modules=modules)
     self.assertIsNotNone(branch.head)
     self.assertEqual(len(branch.workflows), 1)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_branch(branch.identifier)
     self.assertIsNotNone(branch.head)
     self.assertEqual(len(branch.workflows), 1)
     wf = branch.get_workflow(branch.head.identifier)
     self.assertEqual(len(wf.modules), 5)
     for i in range(5):
         self.assertEqual(wf.modules[i].external_form,
                          'TEST MODULE ' + str(i))
     self.assertEqual(vt.last_modified_at, branch.last_modified_at)
     self.assertEqual(vt.last_modified_at, branch.last_modified_at)
コード例 #12
0
 def test_load_with_missing_modules(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # Delete the file for the third module to simulate an error condition in
     # which a file wasn't written properly
     os.remove(branch.head.modules[2].module_path)
     self.assertFalse(os.path.isfile(branch.head.modules[2].module_path))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.head.get_state().is_error)
     self.assertTrue(branch.head.modules[2].is_error)
コード例 #13
0
 def test_safe_write(self):
     """Update module state with write error."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     module.set_success(outputs=ModuleOutputs(stderr=[None]))
     self.assertTrue(module.is_error)
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
コード例 #14
0
 def test_create_branch_of_active_workflow(self):
     """Ensure thatan exception is raised when attempting to branch of a
     workflow with active modules. None of the branch resources should be
     created.
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(
         identifier='DEF',
         properties={PROPERTY_NAME: 'My Viztrail'},
         base_path=base_path)
     # Create one branch
     branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'})
     branch_path = os.path.join(base_path, viztrail.FOLDER_BRANCHES,
                                branch.identifier)
     self.assertTrue(os.path.isdir(branch_path))
     files = os.listdir(os.path.join(base_path, viztrail.FOLDER_BRANCHES))
     # Create five modules. The last one is active
     modules = list()
     for i in range(5):
         m = OSModuleHandle.create_module(
             command=python_cell(source='print ' + str(i)),
             external_form='TEST MODULE ' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(),
             datasets=dict(),
             module_folder=vt.modules_folder,
         )
         modules.append(m.identifier)
     m.set_running(external_form='TEST MODULE')
     with self.assertRaises(ValueError):
         vt.create_branch(properties={PROPERTY_NAME: 'My Branch'},
                          modules=modules)
     # Ensure that no additional entry in the branches folder is created
     self.assertEqual(
         len(files),
         len(os.listdir(os.path.join(base_path, viztrail.FOLDER_BRANCHES))))
コード例 #15
0
 def test_read_write_module(self):
     """Test reading and writing modules."""
     mod0 = OSModuleHandle.create_module(
         command=create_plot(
             dataset_name='dataset',
             chart_name='My Chart',
             series=[{
                 'column': 1,
                 'range': '0:50',
                 'label': 'A'
             }, {
                 'column': 2,
                 'range': '51:100',
                 'label': 'B'
             }, {
                 'column': 3,
                 'label': 'C'
             }, {
                 'column': 4
             }],
             chart_type='bar',
             chart_grouped=False,
             xaxis_range='0:100',
             xaxis_column=None,
         ),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
     )
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertTrue(os.path.isfile(m.module_path))
     self.assertEqual(m.external_form, 'TEST MODULE')
     self.assertTrue(m.is_pending)
コード例 #16
0
 def test_outputs(self):
     """Test reading and writing modules with output information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stderr), 0)
     self.assertEqual(len(m.outputs.stdout), 0)
     # Module with error output
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(stderr=[TextOutput('Some text')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stderr), 1)
     self.assertTrue(m.outputs.stderr[0].is_text)
     self.assertEqual(m.outputs.stderr[0].value, 'Some text')
     self.assertEqual(len(m.outputs.stdout), 0)
     # Module with standard output
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(stdout=[
             TextOutput('Some text'),
             OutputObject(type='chart', value='123')
         ]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stdout), 2)
     self.assertTrue(m.outputs.stdout[0].is_text)
     self.assertEqual(m.outputs.stdout[0].value, 'Some text')
     self.assertFalse(m.outputs.stdout[1].is_text)
     self.assertEqual(m.outputs.stdout[1].value, '123')
     self.assertEqual(len(m.outputs.stderr), 0)
     # Module with standard error and standard output
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(stderr=[TextOutput('Some text')],
                               stdout=[
                                   TextOutput('Some text'),
                                   OutputObject(type='chart', value='123')
                               ]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stdout), 2)
     self.assertEqual(len(m.outputs.stderr), 1)
コード例 #17
0
 def test_provenance(self):
     """Test reading and writing modules with provenance information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNone(m.provenance.read)
     self.assertIsNone(m.provenance.write)
     self.assertIsNone(m.provenance.delete)
     self.assertIsNone(m.provenance.resources)
     # Modules that only has read provenance
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(read={'DS1': 'ID1'},
                                     resources={'fileId': '0123456789'}),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNotNone(m.provenance.read)
     self.assertEqual(len(m.provenance.read), 1)
     self.assertEqual(m.provenance.read['DS1'], 'ID1')
     self.assertEqual(m.provenance.resources['fileId'], '0123456789')
     self.assertIsNone(m.provenance.write)
     # Modules that only has write provenance
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNotNone(m.provenance.write)
     self.assertEqual(len(m.provenance.write), 2)
     self.assertEqual(m.provenance.write['DS1'].identifier, 'ID1')
     self.assertEqual(m.provenance.write['DS2'].identifier, 'ID2')
     self.assertIsNone(m.provenance.read)
     # Module with read and write provenance
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(read={'DS1': 'ID1'},
                                     write=DATASETS,
                                     delete=['A', 'B']),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNotNone(m.provenance.read)
     self.assertEqual(len(m.provenance.read), 1)
     self.assertEqual(m.provenance.read['DS1'], 'ID1')
     self.assertIsNotNone(m.provenance.write)
     self.assertEqual(len(m.provenance.write), 2)
     self.assertEqual(m.provenance.write['DS1'].identifier, 'ID1')
     self.assertEqual(m.provenance.write['DS2'].identifier, 'ID2')
     self.assertEqual(m.provenance.delete, ['A', 'B'])
     # Module with chart
     chart = ChartViewHandle(identifier='A',
                             dataset_name='DS1',
                             chart_name='My Chart',
                             data=[
                                 DataSeriesHandle(column='COL1',
                                                  label='SERIES1',
                                                  range_start=0,
                                                  range_end=100),
                                 DataSeriesHandle(column='COL2',
                                                  range_start=101,
                                                  range_end=200),
                                 DataSeriesHandle(column='COL3',
                                                  label='SERIES2')
                             ],
                             x_axis=1,
                             chart_type='bar',
                             grouped_chart=True)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(charts=[chart]),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.provenance.charts), 1)
     c = m.provenance.charts[0]
     self.assertEqual(chart.identifier, c.identifier)
     self.assertEqual(chart.dataset_name, c.dataset_name)
     self.assertEqual(chart.chart_name, c.chart_name)
     self.assertEqual(chart.x_axis, c.x_axis)
     self.assertEqual(chart.chart_type, c.chart_type)
     self.assertEqual(chart.grouped_chart, c.grouped_chart)
     self.assertEqual(len(c.data), 3)
     for i in range(3):
         self.assertEqual(c.data[i].column, chart.data[i].column)
         self.assertEqual(c.data[i].label, chart.data[i].label)
         self.assertEqual(c.data[i].range_start, chart.data[i].range_start)
         self.assertEqual(c.data[i].range_end, chart.data[i].range_end)
コード例 #18
0
 def test_error(self):
     """Update module state from pending to error."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')},
             resources={'fileid': '0123456789'}),
         timestamp=ModuleTimestamp())
     module.set_error()
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
     # Set canceled with timestamp and output information
     ts = get_current_time()
     module.set_error(
         finished_at=ts,
         outputs=ModuleOutputs(stderr=[TextOutput('Some Error')]))
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 1)
     self.assertEqual(module.outputs.stderr[0].value, 'Some Error')
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 1)
     self.assertEqual(module.outputs.stderr[0].value, 'Some Error')
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
コード例 #19
0
 def test_branch_cache(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     for i in range(DEFAULT_CACHE_SIZE):
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print 2+2',
             state=MODULE_SUCCESS,
             timestamp=ModuleTimestamp(created_at=get_current_time(),
                                       started_at=get_current_time(),
                                       finished_at=get_current_time()),
             outputs=ModuleOutputs(stdout=[TextOutput('4')]),
             provenance=ModuleProvenance(),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         branch.append_workflow(modules=branch.head.modules + [module],
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.cache), (i + 1))
         self.assertTrue(
             wf.identifier in [w.identifier for w in branch.cache])
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertEqual(len(branch.cache), 0)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     branch.get_workflow(wf.identifier)
     self.assertTrue(wf.identifier in [w.identifier for w in branch.cache])
     for wf_desc in branch.get_history():
         if wf_desc.identifier != wf.identifier:
             branch.get_workflow(wf_desc.identifier)
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
コード例 #20
0
 def test_cache_active_workflows(self):
     """Test caching for workflows that are active."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     pending_module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_PENDING,
         timestamp=ModuleTimestamp(created_at=get_current_time()),
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[pending_module],
                                 action=ACTION_INSERT,
                                 command=command)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     for i in range(DEFAULT_CACHE_SIZE):
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print 2+2',
             state=MODULE_SUCCESS,
             timestamp=ModuleTimestamp(created_at=get_current_time(),
                                       started_at=get_current_time(),
                                       finished_at=get_current_time()),
             outputs=ModuleOutputs(stdout=[TextOutput('4')]),
             provenance=ModuleProvenance(),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         branch.append_workflow(modules=branch.head.modules + [module],
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.cache), (i + 1))
         self.assertTrue(
             wf.identifier in [w.identifier for w in branch.cache])
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     # The active workflow should not be removed
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE + 1)
     self.assertTrue(wf.identifier in [w.identifier for w in branch.cache])
     # Set module state to error and append another workflow. This should
     # evict two workflows
     second_wf = branch.cache[1]
     third_wf = branch.cache[2]
     pending_module.set_error()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     # The active workflow should not be removed
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     self.assertFalse(
         second_wf.identifier in [w.identifier for w in branch.cache])
     self.assertTrue(
         third_wf.identifier in [w.identifier for w in branch.cache])
コード例 #21
0
 def test_success(self) -> None:
     """Update module state from pending to success."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     module.set_success()
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertTrue(module.provenance.read == {})
     self.assertTrue(module.provenance.write == {})
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertTrue(module.provenance.read == {})
     self.assertTrue(module.provenance.write == {})
     # Set success with all optional parameters
     ts = get_current_time()
     module.set_success(
         finished_at=ts,
         outputs=ModuleOutputs(stdout=[TextOutput('XYZ')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 1)
     self.assertEqual(module.outputs.stdout[0].value, 'XYZ')
     self.assertIsNotNone(module.provenance.read)
     self.assertEqual(module.provenance.read['DS1'], 'ID1')
     self.assertIsNotNone(module.provenance.write)
     self.assertEqual(module.provenance.write['DS1'].identifier, 'ID2')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path,
                                         prev_state=dict())
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 1)
     self.assertEqual(module.outputs.stdout[0].value, 'XYZ')
     self.assertIsNotNone(module.provenance.read)
     self.assertEqual(module.provenance.read['DS1'], 'ID1')
     self.assertIsNotNone(module.provenance.write)
     self.assertEqual(module.provenance.write['DS1'].identifier, 'ID2')