def test_single_append(self): """Test appending a single module to an empty viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() command = python_cell(source='print 2+2') ts = get_current_time() module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) wf = branch.append_workflow(modules=[module], action=ACTION_INSERT, command=command) # We expect that there exists a file for the workflow handle and one for # the new module self.assertTrue( os.path.isfile(os.path.join(branch.base_path, wf.identifier))) self.assertTrue( os.path.isfile(os.path.join(wf.modules[-1].module_path))) # Load the viztrail and get the module at the branch head vt = OSViztrailHandle.load_viztrail(base_path) module = vt.get_default_branch().get_head().modules[-1] self.assertEqual(module.external_form, 'print 2+2') self.assertEqual(module.outputs.stdout[-1].value, '4')
def append_workflow(self, modules, action, command, pending_modules=None): """Append a workflow as the new head of the branch. The new workflow may contain modules that have not been persisted prevoiusly (pending modules). These modules are persisted as part of the workflow being created. Parameters ---------- modules: list(vizier.viztrail.module.ModuleHandle List of modules in the workflow that are completed action: string Identifier of the action that created the workflow command: vizier.viztrail.module.ModuleCommand Specification of the executed command that created the workflow pending_modules: list(vizier.viztrail.module.ModuleHandle, optional List of modules in the workflow that need to be materialized Returns ------- vizier.viztrail.workflow.base.WorkflowHandle """ workflow_modules = list(modules) if not pending_modules is None: for pm in pending_modules: # Make sure the started_at timestamp is set if the module is # running if pm.is_running and pm.timestamp.started_at is None: pm.timestamp.started_at = pm.timestamp.created_at module = OSModuleHandle.create_module( command=pm.command, external_form=pm.external_form, state=pm.state, timestamp=pm.timestamp, datasets=pm.datasets, outputs=pm.outputs, provenance=pm.provenance, module_folder=self.modules_folder, object_store=self.object_store) workflow_modules.append(module) # Write handle for workflow at branch head descriptor = write_workflow_handle( modules=[m.identifier for m in workflow_modules], workflow_count=len(self.workflows), base_path=self.base_path, object_store=self.object_store, action=action, command=command, created_at=get_current_time()) # Get new workflow and replace the branch head. Move the current head # to the cache. workflow = WorkflowHandle(identifier=descriptor.identifier, branch_id=self.identifier, modules=workflow_modules, descriptor=descriptor) self.workflows.append(workflow.descriptor) if not self.head is None: self.add_to_cache(self.head) self.head = workflow return workflow
def test_load_with_dataset_delete(self): """Test loading workflows where each module creates a new dataset and deletes the previous dataset (except for the first module). """ base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() deleted_datasets = list() if i > 0: deleted_datasets.append('DS' + str(i - 1)) command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(write={ 'DS' + str(i): DatasetDescriptor( identifier=str(i), name='DS' + str(i), columns=[ DatasetColumn(identifier=j, name=str(j)) for j in range(i) ], ) }, delete=deleted_datasets), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) vt = OSViztrailHandle.load_viztrail(base_path) workflow = vt.get_default_branch().get_head() self.assertEqual(len(workflow.modules), 5) datasets = {} for i in range(5): module = workflow.modules[i] datasets = module.provenance.get_database_state(datasets) self.assertEqual(len(datasets), 1) key = 'DS' + str(i) self.assertTrue(key in datasets) self.assertEqual(len(datasets[key].columns), i)
def test_timestamps(self): """Test reading and writing modules with different timestamp values.""" mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) # Test timestamps created_at = m.timestamp.created_at started_at = to_datetime('2018-11-26T13:00:00.000000') m.timestamp.started_at = started_at m.write_module() m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(m.timestamp.created_at, created_at) self.assertEqual(m.timestamp.started_at, started_at) finished_at = to_datetime('2018-11-26T13:00:00.000010') m.timestamp.created_at = finished_at m.timestamp.finished_at = finished_at m.write_module() m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(m.timestamp.created_at, finished_at) self.assertEqual(m.timestamp.started_at, started_at) self.assertEqual(m.timestamp.finished_at, finished_at) mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=created_at, started_at=started_at), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(m.timestamp.created_at, created_at) self.assertEqual(m.timestamp.started_at, started_at) self.assertIsNone(m.timestamp.finished_at)
def test_load_active(self): """Test loading workflows with active modules.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) # This is a hack to simulate loading workflows with active modules # Change state of last two modules in branch head to an active state m = branch.get_head().modules[-2] m.state = MODULE_RUNNING m.write_module() m = branch.get_head().modules[-1] m.state = MODULE_RUNNING m.write_module() vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertTrue(branch.get_head().modules[0].is_success) self.assertTrue(branch.get_head().modules[1].is_success) self.assertTrue(branch.get_head().modules[2].is_success) self.assertTrue(branch.get_head().modules[3].is_canceled) self.assertTrue(branch.get_head().modules[4].is_canceled) # Change state of last module in second workflow to an active state m = branch.get_head().modules[1] m.state = MODULE_RUNNING m.write_module() vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() wf = branch.get_workflow(branch.get_history()[1].identifier) self.assertTrue(wf.modules[0].is_success) self.assertTrue(wf.modules[1].is_canceled)
def test_datasets(self): """Test reading and writing modules with dataset information.""" mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(write=DATASETS), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR, datasets=DATASETS) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path, prev_state=dict()) self.assertEqual(len(m.datasets), 0) mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_SUCCESS, outputs=ModuleOutputs(), provenance=ModuleProvenance(write=DATASETS), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR, datasets=DATASETS) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path, prev_state=dict()) self.assertEqual(len(m.datasets), 2) self.assertEqual(m.datasets['DS1'].identifier, 'ID1') self.assertEqual(len(m.datasets['DS1'].columns), 0) self.assertEqual(m.datasets['DS1'].row_count, 0) ds2 = m.datasets['DS2'] self.assertEqual(ds2.identifier, 'ID2') self.assertEqual(len(ds2.columns), 2) col0 = ds2.columns[0] self.assertEqual(col0.identifier, 0) self.assertEqual(col0.name, 'ABC') self.assertEqual(col0.data_type, 'int') col1 = ds2.columns[1] self.assertEqual(col1.identifier, 1) self.assertEqual(col1.name, 'xyz') self.assertEqual(col1.data_type, 'real') self.assertEqual(ds2.row_count, 100)
def test_completed_append(self): """Test appending a completed workflow to a branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) head_modules = branch.get_head().modules wf = branch.append_workflow(modules=head_modules[:-1], action=ACTION_DELETE, command=head_modules[-1].command) self.assertEqual(len(wf.modules), 9) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 11) wf = branch.get_head() self.assertEqual(len(wf.modules), 9) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
def test_state(self): """Ensure that only one of the state flag is True at the same time.""" # Create original module module = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, module_folder=MODULE_DIR, timestamp=ModuleTimestamp(), outputs=ModuleOutputs(stdout=[TextOutput('ABC')]), provenance=ModuleProvenance( read={'DS1': 'ID1'}, write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')})) # Pending self.assertTrue(module.is_pending) self.assertFalse(module.is_canceled) self.assertFalse(module.is_error) self.assertFalse(module.is_running) self.assertFalse(module.is_success) # Running module.set_running(external_form='TEST MODULE') self.assertFalse(module.is_pending) self.assertFalse(module.is_canceled) self.assertFalse(module.is_error) self.assertTrue(module.is_running) self.assertFalse(module.is_success) # Canceled module.set_canceled() self.assertFalse(module.is_pending) self.assertTrue(module.is_canceled) self.assertFalse(module.is_error) self.assertFalse(module.is_running) self.assertFalse(module.is_success) # Error module.set_error() self.assertFalse(module.is_pending) self.assertFalse(module.is_canceled) self.assertTrue(module.is_error) self.assertFalse(module.is_running) self.assertFalse(module.is_success) # Success module.set_success() self.assertFalse(module.is_pending) self.assertFalse(module.is_canceled) self.assertFalse(module.is_error) self.assertFalse(module.is_running) self.assertTrue(module.is_success)
def test_running(self): """Update module state from pending to running.""" # Create original module module = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, module_folder=MODULE_DIR, timestamp=ModuleTimestamp(), datasets={'DS1': DS1}, outputs=ModuleOutputs(stdout=[TextOutput('ABC')]), provenance=ModuleProvenance( read={'DS1': 'ID1'}, write={'DS1': DatasetDescriptor(identifier='ID2')}, resources={'fileid': '0123456789'})) self.assertTrue(module.is_pending) module.set_running(external_form='TEST MODULE') self.assertTrue(module.is_running) self.assertIsNotNone(module.timestamp.started_at) self.assertEqual(len(module.datasets), 0) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 0) self.assertIsNotNone(module.provenance.read) self.assertIsNotNone(module.provenance.write) self.assertIsNotNone(module.provenance.resources) # Read module from object store and ensure that tall changes have been # materialized properly module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) self.assertTrue(module.is_running) self.assertIsNotNone(module.timestamp.started_at) self.assertEqual(len(module.datasets), 0) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 0) self.assertIsNotNone(module.provenance.read) self.assertIsNotNone(module.provenance.write) self.assertIsNotNone(module.provenance.resources) # Set running with all optional parameters module.set_running(started_at=module.timestamp.created_at, external_form='Some form') self.assertEqual(module.timestamp.started_at, module.timestamp.created_at) self.assertEqual(module.external_form, 'Some form') module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) self.assertEqual(module.timestamp.started_at, module.timestamp.created_at) self.assertEqual(module.external_form, 'Some form')
def test_multi_append(self): """Test appending modules to viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 10) for i in range(10): wf = branch.get_workflow(history[i].identifier) self.assertEqual(len(wf.modules), (i + 1)) for m in range(i + 1): module = wf.modules[m] self.assertEqual(module.external_form, 'print ' + str(m) + '+' + str(m)) self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
def test_create_and_delete_branch_with_default_workflow(self): """Ensure that creating and loading branches works if the head workflow for the new branch is given. """ base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail( identifier='DEF', properties={PROPERTY_NAME: 'My Viztrail'}, base_path=base_path) self.assertEqual(vt.last_modified_at, vt.default_branch.last_modified_at) # Create five modules modules = list() for i in range(5): identifier = OSModuleHandle.create_module( command=python_cell(source='print ' + str(i)), external_form='TEST MODULE ' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), datasets=dict(), module_folder=vt.modules_folder, ).identifier modules.append(identifier) branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'}, modules=modules) self.assertIsNotNone(branch.head) self.assertEqual(len(branch.workflows), 1) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_branch(branch.identifier) self.assertIsNotNone(branch.head) self.assertEqual(len(branch.workflows), 1) wf = branch.get_workflow(branch.head.identifier) self.assertEqual(len(wf.modules), 5) for i in range(5): self.assertEqual(wf.modules[i].external_form, 'TEST MODULE ' + str(i)) self.assertEqual(vt.last_modified_at, branch.last_modified_at) self.assertEqual(vt.last_modified_at, branch.last_modified_at)
def test_load_with_missing_modules(self): """Test loading workflows with active modules.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) # Delete the file for the third module to simulate an error condition in # which a file wasn't written properly os.remove(branch.head.modules[2].module_path) self.assertFalse(os.path.isfile(branch.head.modules[2].module_path)) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertTrue(branch.head.get_state().is_error) self.assertTrue(branch.head.modules[2].is_error)
def test_safe_write(self): """Update module state with write error.""" # Create original module module = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, module_folder=MODULE_DIR, timestamp=ModuleTimestamp(), outputs=ModuleOutputs(stdout=[TextOutput('ABC')]), provenance=ModuleProvenance( read={'DS1': 'ID1'}, write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')})) self.assertTrue(module.is_pending) module.set_running(external_form='TEST MODULE') self.assertTrue(module.is_running) module.set_success(outputs=ModuleOutputs(stderr=[None])) self.assertTrue(module.is_error) module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) self.assertTrue(module.is_running)
def test_create_branch_of_active_workflow(self): """Ensure thatan exception is raised when attempting to branch of a workflow with active modules. None of the branch resources should be created. """ base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail( identifier='DEF', properties={PROPERTY_NAME: 'My Viztrail'}, base_path=base_path) # Create one branch branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'}) branch_path = os.path.join(base_path, viztrail.FOLDER_BRANCHES, branch.identifier) self.assertTrue(os.path.isdir(branch_path)) files = os.listdir(os.path.join(base_path, viztrail.FOLDER_BRANCHES)) # Create five modules. The last one is active modules = list() for i in range(5): m = OSModuleHandle.create_module( command=python_cell(source='print ' + str(i)), external_form='TEST MODULE ' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), datasets=dict(), module_folder=vt.modules_folder, ) modules.append(m.identifier) m.set_running(external_form='TEST MODULE') with self.assertRaises(ValueError): vt.create_branch(properties={PROPERTY_NAME: 'My Branch'}, modules=modules) # Ensure that no additional entry in the branches folder is created self.assertEqual( len(files), len(os.listdir(os.path.join(base_path, viztrail.FOLDER_BRANCHES))))
def test_read_write_module(self): """Test reading and writing modules.""" mod0 = OSModuleHandle.create_module( command=create_plot( dataset_name='dataset', chart_name='My Chart', series=[{ 'column': 1, 'range': '0:50', 'label': 'A' }, { 'column': 2, 'range': '51:100', 'label': 'B' }, { 'column': 3, 'label': 'C' }, { 'column': 4 }], chart_type='bar', chart_grouped=False, xaxis_range='0:100', xaxis_column=None, ), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR, ) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertTrue(os.path.isfile(m.module_path)) self.assertEqual(m.external_form, 'TEST MODULE') self.assertTrue(m.is_pending)
def test_outputs(self): """Test reading and writing modules with output information.""" mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(len(m.outputs.stderr), 0) self.assertEqual(len(m.outputs.stdout), 0) # Module with error output mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(stderr=[TextOutput('Some text')]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(len(m.outputs.stderr), 1) self.assertTrue(m.outputs.stderr[0].is_text) self.assertEqual(m.outputs.stderr[0].value, 'Some text') self.assertEqual(len(m.outputs.stdout), 0) # Module with standard output mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(stdout=[ TextOutput('Some text'), OutputObject(type='chart', value='123') ]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(len(m.outputs.stdout), 2) self.assertTrue(m.outputs.stdout[0].is_text) self.assertEqual(m.outputs.stdout[0].value, 'Some text') self.assertFalse(m.outputs.stdout[1].is_text) self.assertEqual(m.outputs.stdout[1].value, '123') self.assertEqual(len(m.outputs.stderr), 0) # Module with standard error and standard output mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(stderr=[TextOutput('Some text')], stdout=[ TextOutput('Some text'), OutputObject(type='chart', value='123') ]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(len(m.outputs.stdout), 2) self.assertEqual(len(m.outputs.stderr), 1)
def test_provenance(self): """Test reading and writing modules with provenance information.""" mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR, datasets=DATASETS) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertIsNone(m.provenance.read) self.assertIsNone(m.provenance.write) self.assertIsNone(m.provenance.delete) self.assertIsNone(m.provenance.resources) # Modules that only has read provenance mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(read={'DS1': 'ID1'}, resources={'fileId': '0123456789'}), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertIsNotNone(m.provenance.read) self.assertEqual(len(m.provenance.read), 1) self.assertEqual(m.provenance.read['DS1'], 'ID1') self.assertEqual(m.provenance.resources['fileId'], '0123456789') self.assertIsNone(m.provenance.write) # Modules that only has write provenance mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(write=DATASETS), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertIsNotNone(m.provenance.write) self.assertEqual(len(m.provenance.write), 2) self.assertEqual(m.provenance.write['DS1'].identifier, 'ID1') self.assertEqual(m.provenance.write['DS2'].identifier, 'ID2') self.assertIsNone(m.provenance.read) # Module with read and write provenance mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(read={'DS1': 'ID1'}, write=DATASETS, delete=['A', 'B']), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertIsNotNone(m.provenance.read) self.assertEqual(len(m.provenance.read), 1) self.assertEqual(m.provenance.read['DS1'], 'ID1') self.assertIsNotNone(m.provenance.write) self.assertEqual(len(m.provenance.write), 2) self.assertEqual(m.provenance.write['DS1'].identifier, 'ID1') self.assertEqual(m.provenance.write['DS2'].identifier, 'ID2') self.assertEqual(m.provenance.delete, ['A', 'B']) # Module with chart chart = ChartViewHandle(identifier='A', dataset_name='DS1', chart_name='My Chart', data=[ DataSeriesHandle(column='COL1', label='SERIES1', range_start=0, range_end=100), DataSeriesHandle(column='COL2', range_start=101, range_end=200), DataSeriesHandle(column='COL3', label='SERIES2') ], x_axis=1, chart_type='bar', grouped_chart=True) mod0 = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, outputs=ModuleOutputs(), provenance=ModuleProvenance(charts=[chart]), timestamp=ModuleTimestamp(), module_folder=MODULE_DIR) m = OSModuleHandle.load_module(identifier=mod0.identifier, module_path=mod0.module_path) self.assertEqual(len(m.provenance.charts), 1) c = m.provenance.charts[0] self.assertEqual(chart.identifier, c.identifier) self.assertEqual(chart.dataset_name, c.dataset_name) self.assertEqual(chart.chart_name, c.chart_name) self.assertEqual(chart.x_axis, c.x_axis) self.assertEqual(chart.chart_type, c.chart_type) self.assertEqual(chart.grouped_chart, c.grouped_chart) self.assertEqual(len(c.data), 3) for i in range(3): self.assertEqual(c.data[i].column, chart.data[i].column) self.assertEqual(c.data[i].label, chart.data[i].label) self.assertEqual(c.data[i].range_start, chart.data[i].range_start) self.assertEqual(c.data[i].range_end, chart.data[i].range_end)
def test_error(self): """Update module state from pending to error.""" # Create original module module = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, module_folder=MODULE_DIR, outputs=ModuleOutputs(stdout=[TextOutput('ABC')]), provenance=ModuleProvenance( read={'DS1': 'ID1'}, write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')}, resources={'fileid': '0123456789'}), timestamp=ModuleTimestamp()) module.set_error() self.assertTrue(module.is_error) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 0) self.assertIsNotNone(module.provenance.read) self.assertIsNotNone(module.provenance.write) self.assertIsNotNone(module.provenance.resources) self.assertEqual(module.provenance.resources['fileid'], '0123456789') # Read module from object store and ensure that tall changes have been # materialized properly module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) self.assertTrue(module.is_error) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 0) self.assertIsNotNone(module.provenance.read) self.assertIsNotNone(module.provenance.write) self.assertIsNotNone(module.provenance.resources) self.assertEqual(module.provenance.resources['fileid'], '0123456789') # Set canceled with timestamp and output information ts = get_current_time() module.set_error( finished_at=ts, outputs=ModuleOutputs(stderr=[TextOutput('Some Error')])) self.assertTrue(module.is_error) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(module.timestamp.finished_at, ts) self.assertEqual(len(module.outputs.stderr), 1) self.assertEqual(module.outputs.stderr[0].value, 'Some Error') self.assertEqual(len(module.outputs.stdout), 0) self.assertIsNotNone(module.provenance.read) self.assertIsNotNone(module.provenance.write) self.assertIsNotNone(module.provenance.resources) self.assertEqual(module.provenance.resources['fileid'], '0123456789') module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) self.assertTrue(module.is_error) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(module.timestamp.finished_at, ts) self.assertEqual(len(module.outputs.stderr), 1) self.assertEqual(module.outputs.stderr[0].value, 'Some Error') self.assertEqual(len(module.outputs.stdout), 0) self.assertIsNotNone(module.provenance.read) self.assertIsNotNone(module.provenance.write) self.assertIsNotNone(module.provenance.resources) self.assertEqual(module.provenance.resources['fileid'], '0123456789')
def test_branch_cache(self): """Test appending a single module to an empty viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() command = python_cell(source='print 2+2') module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) wf = branch.append_workflow(modules=[module], action=ACTION_INSERT, command=command) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) for i in range(DEFAULT_CACHE_SIZE): module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) self.assertEqual(len(branch.cache), (i + 1)) self.assertTrue( wf.identifier in [w.identifier for w in branch.cache]) module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertEqual(len(branch.cache), 0) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) branch.get_workflow(wf.identifier) self.assertTrue(wf.identifier in [w.identifier for w in branch.cache]) for wf_desc in branch.get_history(): if wf_desc.identifier != wf.identifier: branch.get_workflow(wf_desc.identifier) self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
def test_cache_active_workflows(self): """Test caching for workflows that are active.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() command = python_cell(source='print 2+2') pending_module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_PENDING, timestamp=ModuleTimestamp(created_at=get_current_time()), outputs=ModuleOutputs(), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) wf = branch.append_workflow(modules=[pending_module], action=ACTION_INSERT, command=command) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) for i in range(DEFAULT_CACHE_SIZE): module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) self.assertEqual(len(branch.cache), (i + 1)) self.assertTrue( wf.identifier in [w.identifier for w in branch.cache]) module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) # The active workflow should not be removed self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE + 1) self.assertTrue(wf.identifier in [w.identifier for w in branch.cache]) # Set module state to error and append another workflow. This should # evict two workflows second_wf = branch.cache[1] third_wf = branch.cache[2] pending_module.set_error() module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) # The active workflow should not be removed self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) self.assertFalse( second_wf.identifier in [w.identifier for w in branch.cache]) self.assertTrue( third_wf.identifier in [w.identifier for w in branch.cache])
def test_success(self) -> None: """Update module state from pending to success.""" # Create original module module = OSModuleHandle.create_module( command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=MODULE_PENDING, module_folder=MODULE_DIR, timestamp=ModuleTimestamp(), outputs=ModuleOutputs(stdout=[TextOutput('ABC')]), provenance=ModuleProvenance( read={'DS1': 'ID1'}, write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')})) self.assertTrue(module.is_pending) module.set_running(external_form='TEST MODULE') module.set_success() self.assertTrue(module.is_success) self.assertIsNotNone(module.timestamp.started_at) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 0) self.assertTrue(module.provenance.read == {}) self.assertTrue(module.provenance.write == {}) # Read module from object store and ensure that tall changes have been # materialized properly module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) self.assertTrue(module.is_success) self.assertIsNotNone(module.timestamp.started_at) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 0) self.assertTrue(module.provenance.read == {}) self.assertTrue(module.provenance.write == {}) # Set success with all optional parameters ts = get_current_time() module.set_success( finished_at=ts, outputs=ModuleOutputs(stdout=[TextOutput('XYZ')]), provenance=ModuleProvenance( read={'DS1': 'ID1'}, write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')})) self.assertTrue(module.is_success) self.assertIsNotNone(module.timestamp.started_at) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(module.timestamp.finished_at, ts) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 1) self.assertEqual(module.outputs.stdout[0].value, 'XYZ') self.assertIsNotNone(module.provenance.read) self.assertEqual(module.provenance.read['DS1'], 'ID1') self.assertIsNotNone(module.provenance.write) self.assertEqual(module.provenance.write['DS1'].identifier, 'ID2') module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path) module = OSModuleHandle.load_module(identifier=module.identifier, module_path=module.module_path, prev_state=dict()) self.assertTrue(module.is_success) self.assertIsNotNone(module.timestamp.started_at) self.assertIsNotNone(module.timestamp.finished_at) self.assertEqual(module.timestamp.finished_at, ts) self.assertEqual(len(module.outputs.stderr), 0) self.assertEqual(len(module.outputs.stdout), 1) self.assertEqual(module.outputs.stdout[0].value, 'XYZ') self.assertIsNotNone(module.provenance.read) self.assertEqual(module.provenance.read['DS1'], 'ID1') self.assertIsNotNone(module.provenance.write) self.assertEqual(module.provenance.write['DS1'].identifier, 'ID2')