def get_run_files(run: RunObject, state: WorkflowState, rundir: str) -> Tuple[List[RunFile], List[str]]: """Create list of output files for a successful run. The list of files depends on whether files are specified in the workflow specification or not. If files are specified only those files are included in the returned lists. Otherwise, all result files that are listed in the run state are returned. Parameters ---------- run: flowserv.model.base.RunObject Handle for a workflow run. state: flowserv.model.workflow.state.WorkflowState SUCCESS state for the workflow run. rundir: string Directory containing run result files. Returns ------- list of RunObject, list of string """ filekeys = None outputs = run.outputs() if outputs: # List only existing files for output specifications in the # workflow handle. Note that (i) the result of run.outputs() is # always a dictionary and (ii) that the keys in the returned # dictionary are not necessary equal to the file sources. filekeys = [f.source for f in run.outputs().values()] else: # List all files that were generated by the workflow run as # output. filekeys = state.files # For each run file ensure that it exist before adding a file # handle to the run. We use the file system store's walk method to # get a list of all files that need to be retained for a run. walklist = list() for filekey in filekeys: filename = os.path.join(rundir, filekey) if not os.path.exists(filename): continue walklist.append((filename, filekey)) # Get files that will be copied to the file store. runfiles = list() storefiles = walk(files=walklist) for file, filekey in storefiles: mime_type, _ = mimetypes.guess_type(url=file.filename) rf = RunFile( key=filekey, name=filekey, mime_type=mime_type, size=file.size() ) runfiles.append(rf) return runfiles, storefiles
def store_run_files(run: RunObject, files: List[str], source: StorageVolume, target: StorageVolume) -> List[RunFile]: """Create list of output files for a successful run. The list of files depends on whether files are specified in the workflow specification or not. If files are specified only those files are included in the returned lists. Otherwise, all result files that are listed in the run state are returned. Parameters ---------- run: flowserv.model.base.RunObject Handle for a workflow run. files: list of string List of result files for a successful workflow run. source: flowserv.volume.base.StorageVolume Storage volume containing the run (result) files for a successful workflow run. target: flowserv.volume.base.StorageVolume Storage volume for persiting run result files. Returns ------- list of RunObject, list of string """ outputs = run.outputs() if outputs: # List only existing files for output specifications in the # workflow handle. Note that (i) the result of run.outputs() is # always a dictionary and (ii) that the keys in the returned # dictionary are not necessary equal to the file sources. files = [f.source for f in run.outputs().values()] # Copy files to the target volume. runfiles = list() for key in files: f = source.load(key) target.store(file=f, dst=key) mime_type, _ = mimetypes.guess_type(url=key) runfile = RunFile(key=key, name=key, mime_type=mime_type, size=f.size()) runfiles.append(runfile) return runfiles
def run_handle(self, run: RunObject, group: Optional[GroupObject] = None) -> Dict: """Get serialization for a run handle. The run handle extends the run descriptor with the run arguments, the parameter declaration taken from the workflow group handle (since it may differ from the parameter list of the workflow), and additional information associated with the run state. Parameters ---------- run: flowserv.model.base.RunObject Workflow run handle group: flowserv.model.base.GroupObject, default=None Workflow group handle. Missing for post-processing workflows Returns ------- dict """ doc = self.run_descriptor(run) # Add information about the run workflow and the run group doc[RUN_WORKFLOW] = run.workflow_id if run.group_id is not None: doc[RUN_GROUP] = run.group_id # Add run arguments doc[RUN_ARGUMENTS] = run.arguments # Add group specific parameters if group is not None: parameters = group.parameters.values() doc[RUN_PARAMETERS] = [p.to_dict() for p in parameters] # Add additional information from the run state if not run.is_pending(): doc[RUN_STARTED] = run.state().started_at if run.is_canceled() or run.is_error(): doc[RUN_FINISHED] = run.state().stopped_at doc[RUN_ERRORS] = run.state().messages elif run.is_success(): doc[RUN_FINISHED] = run.state().finished_at output_spec = run.outputs() # Serialize file resources. The default serialization contains the # file identifier and name. If an output specification is present # for the file the values for that specification will be added # to the serialization. files = list() for f in run.files: obj = {FILE_ID: f.file_id, FILE_NAME: f.name} if f.name in output_spec: fspec = output_spec[f.name] obj[FILE_NAME] = fspec.key if fspec.title is not None: obj[FILE_TITLE] = fspec.title if fspec.caption is not None: obj[FILE_CAPTION] = fspec.caption if fspec.widget is not None: obj[FILE_WIDGET] = fspec.widget if fspec.format is not None: obj[FILE_FORMAT] = fspec.format files.append(obj) doc[RUN_FILES] = files return doc