def __init__(self, identifier: str, properties: ObjectAnnotationSet, branches: Optional[List[BranchHandle]]=None, default_branch: Optional[BranchHandle] = None, created_at: datetime = get_current_time() ): """Initialize the viztrail descriptor. Parameters ---------- identifier : string Unique viztrail identifier properties: dict(string, any), optional Handler for user-defined properties branches: list(vizier.viztrail.branch.BranchHandle), optional List of branches in the viztrail default_branch: vizier.viztrail.branch.BranchHandle, optional Default branch for the viztrail created_at : datetime.datetime, optional Timestamp of project creation (UTC) """ super(ViztrailHandle, self).__init__( properties=properties ) self.identifier = identifier self.branches = dict() # Initialize the branch index from the given list (if present) if not branches is None: for b in branches: self.branches[b.identifier] = b self.default_branch = default_branch # If created_at timestamp is None the viztrail is expected to be a newly # created viztrail. self.created_at = created_at if not created_at is None else get_current_time()
def set_success(self, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs(), provenance: ModuleProvenance = ModuleProvenance(), updated_arguments: Optional[ModuleArguments] = None): """Set status of the module to success. The finished_at property of the timestamp is set to the given value or the current time (if None). If case of a successful module execution the database state and module provenance information are also adjusted together with the module output streams. Parameters ---------- finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional Provenance information about datasets that were read and writen by previous execution of the module. """ # Update state, timestamp, database state, outputs and provenance # information. self.state = MODULE_SUCCESS self.timestamp.finished_at = finished_at # If the module is set to success straight from pending state the # started_at timestamp may not have been set. if self.timestamp.started_at is None: self.timestamp.started_at = self.timestamp.finished_at if updated_arguments is not None: self.command.arguments = updated_arguments self.outputs = outputs self.provenance = provenance
def __init__(self, created_at: Optional[datetime] = None, started_at: Optional[datetime] = None, finished_at: Optional[datetime] = None): """Initialize the timestamp components. If created_at is None the other two timestamps are expected to be None as well. Will raise ValueError if created_at is None but one of the other two timestamps is not None. Parameters ---------- created_at: datatime.datetime Time when module was first created started_at: datatime.datetime Time when module execution started finished_at: datatime.datetime Time when module execution finished """ # Raise ValueError if created_at is None but one of the other two # timestamps is not None if created_at is None and not (started_at is None and finished_at is None): raise ValueError('invalid timestamp information') self.created_at = created_at if not created_at is None else get_current_time() self.started_at = started_at self.finished_at = finished_at
def set_success(self, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs(), provenance: ModuleProvenance = ModuleProvenance(), updated_arguments: Optional[ModuleArguments] = None ): """Set status of the module to success. The finished_at property of the timestamp is set to the given value or the current time (if None). If case of a successful module execution the database state and module provenance information are also adjusted together with the module output streams. Parameters ---------- finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional Provenance information about datasets that were read and writen by previous execution of the module. """ # Update state, timestamp, database state, outputs and provenance # information. super().set_success(finished_at, outputs, provenance, updated_arguments) # Materialize module state self.write_safe()
def __init__(self, identifier: str, action: str, package_id: Optional[str] = None, command_id: Optional[str] = None, created_at: Optional[datetime] = None): """Initialize the descriptor. If action is not the branch create action the package_id and command_id are expected to not be None. Parameters ---------- identifier: string Unique workflow identifier action: string Identifier of the action that created the workflow version (create, insert, delete, or replace) package_id: string Identifier of the package the module command is from command_id: string Identifier of the module command create_at: datetime.datetime Timestamp of workflow creation (UTC) """ if action != ACTION_CREATE and (package_id is None or command_id is None): raise ValueError('invalid workflow provenance information') self.identifier = identifier self.action = action self.package_id = package_id self.command_id = command_id self.created_at = init_value(created_at, get_current_time())
def upload_file(self, filename, provenance=None): """Upload a new file. Parameters ---------- filename: string Path to file on disk provenance: dict, optional Optional file provenance information Returns ------- FileHandle """ name = os.path.basename(filename).lower() if not provenance is None: properties = dict(provenance) else: properties = dict() properties[FH_UPLOAD_NAME] = os.path.basename(filename) # Create a new unique identifier for the file. identifier = os.path.basename(filename) created_at = get_current_time() output_file = filename # Add file to file index f_handle = FileHandle(identifier, name, output_file, created_at, properties=properties) self.files[identifier] = f_handle self.write_index(self.files) return f_handle
def set_success(self, task_id: str, finished_at: datetime = get_current_time(), result: ExecResult = ExecResult()): self.task_id = task_id self.outputs = result.outputs self.state = 'SUCCESS'
def to_file(self): """Write the current state of the viztrail to file. Sets the last modified at timestamp to the current time. """ self.last_modified_at = get_current_time() # Serialize viztrail doc = { 'id': self.identifier, 'env': self.exec_env.identifier, 'branches': [{ 'id': b, 'versions': [w.to_dict() for w in self.branches[b].workflows] } for b in self.branches], 'timestamps': { 'createdAt': self.created_at.isoformat(), 'lastModifiedAt': self.last_modified_at.isoformat() }, 'versionCounter': self.version_counter.value, 'moduleCounter': self.module_counter.value } # Write viztrail serialization to file with open(os.path.join(self.fs_dir, VIZTRAIL_FILE), 'w') as f: #yaml.dump(doc, f, default_flow_style=False, Dumper=CDumper) dump_json(doc, f)
def test_single_append(self): """Test appending a single module to an empty viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() command = python_cell(source='print 2+2') ts = get_current_time() module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) wf = branch.append_workflow(modules=[module], action=ACTION_INSERT, command=command) # We expect that there exists a file for the workflow handle and one for # the new module self.assertTrue( os.path.isfile(os.path.join(branch.base_path, wf.identifier))) self.assertTrue( os.path.isfile(os.path.join(wf.modules[-1].module_path))) # Load the viztrail and get the module at the branch head vt = OSViztrailHandle.load_viztrail(base_path) module = vt.get_default_branch().get_head().modules[-1] self.assertEqual(module.external_form, 'print 2+2') self.assertEqual(module.outputs.stdout[-1].value, '4')
def __init__(self, source_branch: Optional[str] = None, workflow_id: Optional[str] = None, module_id: Optional[str] = None, created_at: Optional[datetime] = None): """Initialize the provenance object. Raises ValueError if at least one but not all arguments are None. Parameters ---------- source_branch : string Unique identifier of source branch workflow_id: string Identifier of source workflow module_id: string Identifier of module at which the new branch started created_at: datetime.datetime, optional Timestamp of branch creation (UTC) """ # Raise an exception if one argument is None but not all of them if not source_branch is None and not workflow_id is None and not module_id is None: pass elif source_branch is None and workflow_id is None and module_id is None: pass else: raise ValueError('invalid arguments for branch provenance') self.source_branch = source_branch self.workflow_id = workflow_id self.module_id = module_id self.created_at = created_at if not created_at is None else get_current_time( )
def __init__(self, version, action=None, package_id=None, command_id=None, created_at=None): """Initialize the descriptor. Parameters ---------- version: int Workflow version identifier actions: string Identifier of the action that created the workflow version (create, insert, delete, or replace) package_id: string Identifier of the package the module command is from command_id: string Identifier of the module command create_at: datetime.datetime Timestamp of workflow creation (UTC) """ self.version = version self.action = action self.package_id = package_id self.command_id = command_id self.created_at = created_at if not created_at is None else get_current_time( )
def set_error( self, task_id: str, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs() ) -> Optional[bool]: """Set status of the module that is associated with the given task identifier to error. The finished_at property of the timestamp is set to the given value or the current time (if None). The module outputs are adjusted to the given value. The output streams are empty if no value is given for the outputs parameter. Cancels all pending modules in the workflow. Returns True if the state of the workflow was changed and False otherwise. The result is None if the project or task did not exist. Parameters ---------- task_id : string Unique task identifier finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module Returns ------- bool """ raise NotImplementedError
def append_workflow(self, modules, action, command, pending_modules=None): """Append a workflow as the new head of the branch. The new workflow may contain modules that have not been persisted prevoiusly (pending modules). These modules are persisted as part of the workflow being created. Parameters ---------- modules: list(vizier.viztrail.module.ModuleHandle List of modules in the workflow that are completed action: string Identifier of the action that created the workflow command: vizier.viztrail.module.ModuleCommand Specification of the executed command that created the workflow pending_modules: list(vizier.viztrail.module.ModuleHandle, optional List of modules in the workflow that need to be materialized Returns ------- vizier.viztrail.workflow.base.WorkflowHandle """ workflow_modules = list(modules) if not pending_modules is None: for pm in pending_modules: # Make sure the started_at timestamp is set if the module is # running if pm.is_running and pm.timestamp.started_at is None: pm.timestamp.started_at = pm.timestamp.created_at module = OSModuleHandle.create_module( command=pm.command, external_form=pm.external_form, state=pm.state, timestamp=pm.timestamp, datasets=pm.datasets, outputs=pm.outputs, provenance=pm.provenance, module_folder=self.modules_folder, object_store=self.object_store) workflow_modules.append(module) # Write handle for workflow at branch head descriptor = write_workflow_handle( modules=[m.identifier for m in workflow_modules], workflow_count=len(self.workflows), base_path=self.base_path, object_store=self.object_store, action=action, command=command, created_at=get_current_time()) # Get new workflow and replace the branch head. Move the current head # to the cache. workflow = WorkflowHandle(identifier=descriptor.identifier, branch_id=self.identifier, modules=workflow_modules, descriptor=descriptor) self.workflows.append(workflow.descriptor) if not self.head is None: self.add_to_cache(self.head) self.head = workflow return workflow
def execute(task_id, project_id, command_doc, context, resources): """Execute the givven command. Parameters: ----------- task_id: string Unique task identifier project_id: string Unique project identifier command_doc : dict Dictionary serialization of the module command context: dict Dictionary of available resources in the database state. The key is the resource name. Values are resource identifiers. resources: dict Optional information about resources that were generated during a previous execution of the command """ # Create a remote workflow controller for the given task controller = worker_env.get_controller(project_id) # Notify the workflow controller that the task started to run controller.set_running(task_id=task_id, started_at=get_current_time()) # Get the processor and execute the command. In case of an unknown package # the result is set to error. command = ModuleCommand.from_dict(command_doc) if command.package_id in worker_env.processors: processor = worker_env.processors[command.package_id] _, exec_result = exec_command( task_id=task_id, command=command, context=TaskContext( project_id=project_id, datastore=worker_env.datastores.get_datastore(project_id), filestore=worker_env.filestores.get_filestore(project_id), datasets=context[labels.CONTEXT_DATASETS], resources=resources, dataobjects=context[labels.CONTEXT_DATAOBJECTS] ), processor=processor ) else: message = 'unknown package \'' + str(command.package_id) + '\'' exec_result = ExecResult( is_success=False, outputs=ModuleOutputs(stderr=[TextOutput(message)]) ) # Notify the workflow controller that the task has finished if exec_result.is_success: controller.set_success( task_id=task_id, outputs=exec_result.outputs, provenance=exec_result.provenance ) else: controller.set_error( task_id=task_id, outputs=exec_result.outputs )
def test_load_with_dataset_delete(self): """Test loading workflows where each module creates a new dataset and deletes the previous dataset (except for the first module). """ base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() deleted_datasets = list() if i > 0: deleted_datasets.append('DS' + str(i - 1)) command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(write={ 'DS' + str(i): DatasetDescriptor( identifier=str(i), name='DS' + str(i), columns=[ DatasetColumn(identifier=j, name=str(j)) for j in range(i) ], ) }, delete=deleted_datasets), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) vt = OSViztrailHandle.load_viztrail(base_path) workflow = vt.get_default_branch().get_head() self.assertEqual(len(workflow.modules), 5) datasets = {} for i in range(5): module = workflow.modules[i] datasets = module.provenance.get_database_state(datasets) self.assertEqual(len(datasets), 1) key = 'DS' + str(i) self.assertTrue(key in datasets) self.assertEqual(len(datasets[key].columns), i)
def test_load_active(self): """Test loading workflows with active modules.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) # This is a hack to simulate loading workflows with active modules # Change state of last two modules in branch head to an active state m = branch.get_head().modules[-2] m.state = MODULE_RUNNING m.write_module() m = branch.get_head().modules[-1] m.state = MODULE_RUNNING m.write_module() vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertTrue(branch.get_head().modules[0].is_success) self.assertTrue(branch.get_head().modules[1].is_success) self.assertTrue(branch.get_head().modules[2].is_success) self.assertTrue(branch.get_head().modules[3].is_canceled) self.assertTrue(branch.get_head().modules[4].is_canceled) # Change state of last module in second workflow to an active state m = branch.get_head().modules[1] m.state = MODULE_RUNNING m.write_module() vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() wf = branch.get_workflow(branch.get_history()[1].identifier) self.assertTrue(wf.modules[0].is_success) self.assertTrue(wf.modules[1].is_canceled)
def set_canceled(self, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs() ) -> None: """Set status of the module to canceled. The finished_at property of the timestamp is set to the given value or the current time (if None). The module outputs are set to the given value. If no outputs are given the module output streams will be empty. """ super().set_canceled(finished_at, outputs) # Materialize module state self.write_safe()
def set_error( self, task_id: str, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs() ) -> Optional[bool]: """Set status of the module that is associated with the given task identifier to error. The finished_at property of the timestamp is set to the given value or the current time (if None). The module outputs are adjusted to the given value. The output streams are empty if no value is given for the outputs parameter. Cancels all pending modules in the workflow. Returns True if the state of the workflow was changed and False otherwise. The result is None if the project or task did not exist. Parameters ---------- task_id : string Unique task identifier finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module Returns ------- bool """ print("ERROR: {}".format(task_id)) with self.backend.lock: # Get task handle and remove it from the internal index. The result # is None if the task does not exist. task = pop_task(tasks=self.tasks, task_id=task_id) if task is None: return None # Get the handle for the head workflow of the specified branch and # the index for the module matching the identifier in the task. workflow, module_index = self.get_task_module(task) if workflow is None or module_index == -1: return None # Notify the backend that the task is finished self.backend.task_finished(task_id) module = workflow.modules[module_index] if module.is_active: module.set_error(finished_at=finished_at, outputs=outputs) for m in workflow.modules[module_index + 1:]: m.set_canceled() return True else: return False
def __init__(self, identifier: str, properties: PersistentAnnotationSet, base_path: str, branches: List[BranchHandle], default_branch: Optional[BranchHandle], object_store: ObjectStore = DefaultObjectStore(), created_at: datetime = get_current_time(), branch_index: Optional[str] = None, branch_folder: Optional[str] = None, modules_folder: Optional[str] = None): """Initialize the viztrail descriptor. Parameters ---------- identifier : string Unique viztrail identifier properties: dict(string, any) Dictionary of user-defined properties base_path: string Identifier for folder containing viztrail resources object_store: vizier.core.io.base.ObjectStore, optional Object store implementation to access and maintain resources branches: list(vizier.viztrail.branch.BranchHandle) List of branches in the viztrail default_branch: vizier.viztrail.branch.BranchHandle Default branch for the viztrail created_at : datetime.datetime, optional Timestamp of project creation (UTC) branch_index: string, optional Path to branch index list branch_folder: string, optional Path to branches folder modules_folder: string, optional Path to modules folder """ super(OSViztrailHandle, self).__init__(identifier=identifier, properties=properties, branches=branches, default_branch=default_branch, created_at=created_at) # Initizlize the object store and identifier for all subfolders. self.base_path = base_path self.object_store = object_store self.branch_folder = init_value( branch_folder, self.object_store.join(base_path, FOLDER_BRANCHES)) self.branch_index = init_value( branch_index, self.object_store.join(self.branch_folder, OBJ_BRANCHINDEX)) self.modules_folder = init_value( modules_folder, self.object_store.join(base_path, FOLDER_MODULES))
def test_completed_append(self): """Test appending a completed workflow to a branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) head_modules = branch.get_head().modules wf = branch.append_workflow(modules=head_modules[:-1], action=ACTION_DELETE, command=head_modules[-1].command) self.assertEqual(len(wf.modules), 9) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 11) wf = branch.get_head() self.assertEqual(len(wf.modules), 9) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
def __init__(self, identifier, branches, env_id, command_repository, properties, created_at=None, last_modified_at=None): """Initialize the viztrail identifier and branch dictionary. Parameters ---------- identifier : string Unique viztrail identifier branches : dict(ViztrailBranch) Dictionary of branches. env_id: string Unique execution environment identifier command_repository: dict Dictionary containing specifications for all commands that are supported by the execution environment. properties: vizier.core.properties.ObjectPropertiesHandler Handler for user-defined properties that are associated with this viztrail created_at : datetime.datetime, optional Timestamp of project creation (UTC) last_modified_at : datetime.datetime, optional Timestamp when project was last modified (UTC) """ self.identifier = identifier self.branches = branches self.env_id = env_id self.command_repository = command_repository self.properties = properties # If created_at timestamp is None the viztrail is expected to be a newly # created viztrail. For new viztrails the last_modified timestamp and # branches listing are expected to be None. For existing viztrails # last_modified and branches should not be None if not created_at is None: if last_modified_at is None: raise ValueError('unexpected value for \'last_modified\'') self.created_at = created_at self.last_modified_at = last_modified_at else: if not last_modified_at is None: raise ValueError('missing value for \'last_modified\'') self.created_at = get_current_time() self.last_modified_at = self.created_at
def unload_dataset(self, dataset_name, format='csv', options=[], filename=""): """Export a dataset from a given name. Raises ValueError if the given dataset could not be exported. Parameters ---------- dataset_name: string Name of the dataset to unload format: string Format for output (csv, json, ect.) options: dict Options for data unload filename: string The output filename - may be empty if outputting to a database Returns ------- vizier.filestore.base.FileHandle """ name = os.path.basename(filename).lower() # Create a new unique identifier for the file. identifier = get_unique_identifier() abspath = "" if not filename == "": abspath = os.path.abspath((r'%s' % os.getcwd().replace('\\', '/')) + '/' + identifier) mimir._mimir.unloadDataSource(dataset_name, abspath, format, mimir._jvmhelper.to_scala_seq(options)) created_at = get_current_time() output_file = abspath # Add file to file index f_handle = FileHandle(identifier, name, output_file, created_at, properties=dict()) return f_handle
def set_running(self, started_at: datetime = get_current_time(), external_form: Optional[str] = None ): """Set status of the module to running. The started_at property of the timestamp is set to the given value or the current time (if None). Parameters ---------- started_at: datetime.datetime, optional Timestamp when module started running external_form: string, optional Adjusted external representation for the module command. """ super().set_running(started_at, external_form) # Materialize module state self.write_safe()
def test_multi_append(self): """Test appending modules to viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 10) for i in range(10): wf = branch.get_workflow(history[i].identifier) self.assertEqual(len(wf.modules), (i + 1)) for m in range(i + 1): module = wf.modules[m] self.assertEqual(module.external_form, 'print ' + str(m) + '+' + str(m)) self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
def get_workflow(self, branch_id=DEFAULT_BRANCH, version=-1): """Get the workflow with the given version number from the workflow history of the given branch. Returns None if the branch or the workflow version do not exist. Parameters ---------- branch_id: string, optional Unique branch identifier version: int, optional Workflow version number """ # Return None if branch does not exist if not branch_id in self.branches: return None branch = self.branches[branch_id] if version <= 0 and len(branch.workflows) == 0: # Returns an empty workflow if the branch does not contain any # executed workflows yet. return WorkflowHandle(branch_id, -1, get_current_time(), []) # Get version number of branch HEAD if negative version is given wf_file = None if version < 0 and len(branch.workflows) > 0: wf_file = workflow_file(self.fs_dir, branch.workflows[-1].version) else: for wf_desc in branch.workflows: if wf_desc.version == version: wf_file = workflow_file(self.fs_dir, version) break # Return None if version number is not in branch (indicated by an non- # existing workflow file) if wf_file is None: return None # Read workflow handle from file try: with open(wf_file, 'r') as f: doc = load_json(f.read()) except: with open(wf_file, 'r') as f: doc = yaml.load(f.read(), Loader=CLoader) return WorkflowHandle( branch_id, doc['version'], to_datetime(doc['createdAt']), [ModuleHandle.from_dict(m) for m in doc['modules']])
def init(self): """Initialize the API before the first request.""" # Initialize the API compinents self.urls = ContainerApiUrlFactory( base_url=self.config.app_base_url, api_doc_url=self.config.webservice.doc_url ) self.engine = get_engine(self.config) self.projects =self.engine.projects self.datasets = VizierDatastoreApi( projects=self.projects, urls=self.urls, defaults=self.config.webservice.defaults ) self.views = VizierDatasetViewApi( projects=self.projects, urls=self.urls ) self.files = VizierFilestoreApi( projects=self.projects, urls=self.urls ) self.tasks = VizierContainerTaskApi( engine=self.engine, controller_url=self.config.controller_url ) # Initialize the service descriptor self.service_descriptor = { 'name': self.config.webservice.name, 'startedAt': get_current_time().isoformat(), 'defaults': { 'maxFileSize': self.config.webservice.defaults.max_file_size }, 'environment': { 'name': self.engine.name, 'version': VERSION_INFO, 'backend': self.config.engine.backend.identifier, 'packages': list(self.engine.packages.keys()) }, labels.LINKS: serialize.HATEOAS({ 'self': self.urls.service_descriptor(), 'doc': self.urls.api_doc() }) }
def set_error(self, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs() ): """Set status of the module to error. The finished_at property of the timestamp is set to the given value or the current time (if None). The module outputs are adjusted to the given value. the output streams are empty if no value is given for the outputs parameter. Parameters ---------- finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module """ super().set_error(finished_at, outputs) # Materialize module state self.write_safe()
def set_running(self, started_at: datetime = get_current_time(), external_form: Optional[str] = None) -> None: """Set status of the module to running. The started_at property of the timestamp is set to the given value or the current time (if None). Parameters ---------- started_at: datetime.datetime, optional Timestamp when module started running external_form: string, optional Adjusted external representation for the module command. """ # Update state and timestamp information. Clear outputs and, database # state, if external_form is not None: self.external_form = external_form self.state = MODULE_RUNNING self.timestamp.started_at = started_at self.outputs = ModuleOutputs()
def set_error(self, finished_at=None, outputs=None): """Set status of the module to error. The finished_at property of the timestamp is set to the given value or the current time (if None). The module outputs are adjusted to the given value. the output streams are empty if no value is given for the outputs parameter. Parameters ---------- finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module """ # Update state, timestamp and output information. Clear database state. self.state = mstate.MODULE_ERROR self.timestamp.finished_at = finished_at if not finished_at is None else get_current_time( ) self.outputs = outputs if not outputs is None else ModuleOutputs() self.datasets = dict() # Materialize module state self.write_safe()
def set_canceled( self, finished_at: datetime = get_current_time(), outputs: ModuleOutputs = ModuleOutputs() ) -> None: """Set status of the module to canceled. The finished_at property of the timestamp is set to the given value or the current time (if None). The module outputs are set to the given value. If no outputs are given the module output streams will be empty. Parameters ---------- finished_at: datetime.datetime, optional Timestamp when module started running outputs: vizier.viztrail.module.output.ModuleOutputs, optional Output streams for module """ # Update state, timestamp and output information. Clear database state. self.state = MODULE_CANCELED self.timestamp.finished_at = finished_at self.outputs = outputs