def test_cancel_run_helloworld(async_service): """Test cancelling a helloworld run.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with async_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with async_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('sleeptime', 10), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=args) # Poll run after sleeping for one second. time.sleep(1) with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] in st.ACTIVE_STATES # -- Cancel the active run ------------------------------------------------ with async_service(user_id=user_id) as api: run = api.runs().cancel_run( run_id=run_id, reason='done' ) assert run['state'] == st.STATE_CANCELED assert run['messages'][0] == 'done' with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] == st.STATE_CANCELED assert run['messages'][0] == 'done'
def test_run_helloworld_sync(sync_service, specfile, state): """Execute the helloworld example.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with sync_service() as api: workflow_id = create_workflow(api, source=TEMPLATE_DIR, specfile=specfile) user_id = create_user(api) with sync_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id, 'data/names.txt')), serialize_arg('sleeptime', 3) ] run_id = start_run(api, group_id, arguments=args) # -- Validate the run handle against the expected state ------------------- with sync_service(user_id=user_id) as api: r = api.runs().get_run(run_id) serialize.validate_run_handle(r, state=state) if state == st.STATE_SUCCESS: # The run should have the greetings.txt file as a result. files = dict() for obj in r['files']: files[obj['name']] = obj['id'] assert len(files) == 1 fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/greetings.txt']) value = fh.open().read().decode('utf-8').strip() assert 'Hello Alice!' in value assert 'Hello Bob!' in value
def start_run(ctx, group, configfile): """Start new workflow run.""" group_id = ctx.obj.get_group(ctx.params) config = factory.read_config(configfile) if configfile else None with service() as api: doc = api.groups().get_group(group_id=group_id) config = config if config else doc[glbls.ENGINE_CONFIG] # Create list of file descriptors for uploaded files that are included # in the submission handle files = [] for fh in doc[glbls.GROUP_UPLOADS]: files.append(( fh[flbls.FILE_ID], fh[flbls.FILE_NAME], fh[flbls.FILE_DATE][:19]) ) # Create list of additional user-provided template parameters parameters = ParameterIndex.from_dict(doc[glbls.GROUP_PARAMETERS]) # Read values for all parameters. user_input = read(parameters.sorted(), files=files) args = [serialize_arg(key, val) for key, val in user_input.items()] # Start the run and print returned run state information. doc = api.runs().start_run(group_id=group_id, arguments=args, config=config) run_id = doc[labels.RUN_ID] run_state = doc[labels.RUN_STATE] click.echo('started run {} is {}'.format(run_id, run_state))
def test_run_helloworld_sync_env(sync_service): """Successfully execute the helloworld example that contains a notebook step in the Python environment that rund flowServ.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with sync_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR, specfile=SPEC_FILE) user_id = create_user(api) with sync_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) args = [serialize_arg('greeting', 'Hey there')] run_id = start_run(api, group_id, arguments=args) # -- Validate the run handle against the expected state ------------------- with sync_service(user_id=user_id) as api: r = api.runs().get_run(run_id) serialize.validate_run_handle(r, state=st.STATE_SUCCESS) # The run should have the greetings.txt file as a result. files = dict() for obj in r['files']: files[obj['name']] = obj['id'] assert len(files) == 2 fh = api.runs().get_result_file(run_id=run_id, file_id=files['results/greetings.txt']) value = fh.open().read().decode('utf-8').strip() assert 'Hey there Alice!' in value assert 'Hey there Bob!' in value
def test_deserialize_run_argument(): """Test deserialization of run arguments.""" key, value = deserialize_arg(serialize_arg('names', 'names.txt')) assert key == 'names' assert value == 'names.txt' with pytest.raises(ValueError): deserialize_arg({'id': 'names'})
def run_erroneous_workflow(service, specfile): """Execute the modified helloworld example.""" with service() as api: # Create workflow template, user, and the workflow group. workflow_id = create_workflow( api, source=TEMPLATE_DIR, specfile=specfile ) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) # Upload the names file. names = io_file(data=NAMES, format='txt/plain') file_id = upload_file(api, group_id, names) # Run the workflow. arguments = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=arguments) # Poll workflow state every second. run = poll_run(service, run_id, user_id) assert run['state'] == st.STATE_SUCCESS with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts = 0 while 'postproc' not in wh: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert 'postproc' in wh serialize.validate_workflow_handle(wh) attmpts = 0 while wh['postproc']['state'] in st.ACTIVE_STATES: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert wh['postproc']['state'] not in st.ACTIVE_STATES serialize.validate_workflow_handle(wh) assert wh['postproc']['state'] == st.STATE_ERROR
def test_run_helloworld_async(async_service, target): """Execute the helloworld example.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with async_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with async_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id, target)), serialize_arg('sleeptime', 1), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=args) # Poll workflow state every second. with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) watch_dog = 30 while run['state'] in st.ACTIVE_STATES and watch_dog: time.sleep(1) watch_dog -= 1 with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] == st.STATE_SUCCESS files = dict() for f in run['files']: files[f['name']] = f['id'] fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/greetings.txt'] ) greetings = fh.open().read().decode('utf-8').strip() assert 'Hi Alice' in greetings assert 'Hi Bob' in greetings assert 'Hi Zoe' in greetings fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/analytics.json'] ) assert json.load(fh.open()) is not None
def run_postproc_workflow(postproc_spec: Dict, workflow: WorkflowObject, ranking: List, runs: List, run_manager: RunManager, backend: WorkflowController): """Run post-processing workflow for a workflow template.""" workflow_spec = postproc_spec.get('workflow') pp_inputs = postproc_spec.get('inputs', {}) pp_files = pp_inputs.get('files', []) # Prepare temporary directory with result files for all # runs in the ranking. The created directory is the only # run argument strace = None try: datadir = postutil.prepare_postproc_data(input_files=pp_files, ranking=ranking, run_manager=run_manager) dst = pp_inputs.get('runs', postbase.RUNS_DIR) run_args = { postbase.PARA_RUNS: InputFile(source=FSFile(datadir), target=dst) } arg_list = [ serialize_arg(postbase.PARA_RUNS, serialize_fh(datadir, dst)) ] except Exception as ex: logging.error(ex) strace = util.stacktrace(ex) run_args = dict() arg_list = [] # Create a new run for the workflow. The identifier for the run group is # None. run = run_manager.create_run(workflow=workflow, arguments=arg_list, runs=runs) if strace is not None: # If there were data preparation errors set the created run into an # error state and return. run_manager.update_run(run_id=run.run_id, state=run.state().error(messages=strace)) else: # Execute the post-processing workflow asynchronously if # there were no data preparation errors. postproc_state, rundir = backend.exec_workflow( run=run, template=WorkflowTemplate(workflow_spec=workflow_spec, parameters=postbase.PARAMETERS), arguments=run_args, config=workflow.engine_config) # Update the post-processing workflow run state if it is # no longer pending for execution. if not postproc_state.is_pending(): run_manager.update_run(run_id=run.run_id, state=postproc_state, rundir=rundir) # Remove the temporary input folder shutil.rmtree(datadir)
def start_run(self, arguments: Dict, config: Optional[Dict] = None, poll_interval: Optional[int] = None) -> Run: """Run the associated workflow for the given set of arguments. Parameters ---------- arguments: dict Dictionary of user-provided arguments. config: dict, default=None Optional implementation-specific configuration settings that can be used to overwrite settings that were initialized at object creation. poll_interval: int, default=None Optional poll interval that is used to check the state of a run until it is no longer in active state. Returns ------- flowserv.client.app.run.Run """ arguments = self._parameters.set_defaults(arguments=arguments) with self.service() as api: # Upload any argument values as files that are either of type # StringIO or BytesIO. arglist = list() for key, val in arguments.items(): # Convert arguments to the format that is expected by the run # manager. We pay special attention to file parameters. Input # files may be represented as strings, IO buffers or file # objects. para = self._parameters.get(key) if para is None: raise err.UnknownParameterError(key) if para.is_file(): # Upload a given file prior to running the application. upload_file = None target = None if isinstance(val, str): upload_file = FSFile(val) elif isinstance(val, StringIO): buf = BytesIO(val.read().encode('utf8')) upload_file = IOBuffer(buf) elif isinstance(val, BytesIO): upload_file = IOBuffer(val) elif isinstance(val, IOHandle): upload_file = val else: msg = 'invalid argument {} for {}'.format(key, val) raise err.InvalidArgumentError(msg) fh = api.uploads().upload_file(group_id=self.group_id, file=upload_file, name=key) val = serialize_fh(fh[filelbls.FILE_ID], target=target) arglist.append(serialize_arg(key, val)) # Execute the run and return the serialized run handle. run = api.runs().start_run(group_id=self.group_id, arguments=arglist, config=config) rh = Run(doc=run, service=self.service) # Wait for run to finish if active an poll interval is given. while poll_interval and rh.is_active(): time.sleep(poll_interval) rh = self.poll_run(run_id=rh.run_id) pprun = self.get_postproc_results() if pprun is not None: while poll_interval and pprun.is_active(): time.sleep(poll_interval) pprun = self.get_postproc_results() return rh
def run_postproc_workflow(workflow: WorkflowObject, ranking: List[RunResult], keys: List[str], run_manager: RunManager, tmpstore: StorageVolume, staticfs: StorageVolume, backend: WorkflowController): """Run post-processing workflow for a workflow template. Parameters ---------- workflow: flowserv.model.base.WorkflowObject Handle for the workflow that triggered the post-processing workflow run. ranking: list(flowserv.model.ranking.RunResult) List of runs in the current result ranking. keys: list of string Sorted list of run identifier for runs in the ranking. run_manager: flowserv.model.run.RunManager Manager for workflow runs tmpstore: flowserv.volume.base.StorageVolume Temporary storage volume where the created post-processing files are stored. This volume will be erased after the workflow is started. staticfs: flowserv.volume.base.StorageVolume Storage volume that contains the static files from the workflow template. backend: flowserv.controller.base.WorkflowController Backend that is used to execute the post-processing workflow. """ # Get workflow specification and the list of input files from the # post-processing statement. postproc_spec = workflow.postproc_spec workflow_spec = postproc_spec.get('workflow') pp_inputs = postproc_spec.get('inputs', {}) pp_files = pp_inputs.get('files', []) # Prepare temporary directory with result files for all # runs in the ranking. The created directory is the only # run argument strace = None try: prepare_postproc_data(input_files=pp_files, ranking=ranking, run_manager=run_manager, store=tmpstore) dst = pp_inputs.get('runs', RUNS_DIR) run_args = {PARA_RUNS: InputDirectory(store=tmpstore, target=RUNS_DIR)} arg_list = [serialize_arg(PARA_RUNS, dst)] except Exception as ex: logging.error(ex, exc_info=True) strace = util.stacktrace(ex) run_args = dict() arg_list = [] # Create a new run for the workflow. The identifier for the run group is # None. run = run_manager.create_run(workflow=workflow, arguments=arg_list, runs=keys) if strace is not None: # If there were data preparation errors set the created run into an # error state and return. run_manager.update_run(run_id=run.run_id, state=run.state().error(messages=strace)) else: # Execute the post-processing workflow asynchronously if # there were no data preparation errors. try: postproc_state, runstore = backend.exec_workflow( run=run, template=WorkflowTemplate(workflow_spec=workflow_spec, parameters=PARAMETERS), arguments=run_args, staticfs=staticfs, config=workflow.engine_config) except Exception as ex: # Make sure to catch exceptions and set the run into an error state. postproc_state = run.state().error(messages=util.stacktrace(ex)) runstore = None # Update the post-processing workflow run state if it is # no longer pending for execution. if not postproc_state.is_pending(): run_manager.update_run(run_id=run.run_id, state=postproc_state, runstore=runstore) # Erase the temporary storage volume. tmpstore.erase()
def start_run(self, group_id: str, arguments: List[Dict], config: Optional[Dict] = None) -> Dict: """Start a new workflow run for the given group. The user provided arguments are expected to be a list of (name,value)-pairs. The name identifies the template parameter. The data type of the value depends on the type of the parameter. Returns a serialization of the handle for the started run. Raises an unauthorized access error if the user does not have the necessary access to modify the workflow group. Parameters ---------- group_id: string Unique workflow group identifier arguments: list(dict) List of user provided arguments for template parameters. config: dict, default=None Optional implementation-specific configuration settings that can be used to overwrite settings that were initialized at object creation. Returns ------- dict Raises ------ flowserv.error.InvalidArgumentError flowserv.error.MissingArgumentError flowserv.error.UnauthorizedAccessError flowserv.error.UnknownFileError flowserv.error.UnknownParameterError flowserv.error.UnknownWorkflowGroupError """ # Raise an error if the user does not have rights to start new runs for # the workflow group or if the workflow group does not exist. if not self.auth.is_group_member(group_id=group_id, user_id=self.user_id): raise err.UnauthorizedAccessError() # Get handle for the given user group to enable access to uploaded # files and the identifier of the associated workflow. group = self.group_manager.get_group(group_id) # Get the template from the workflow that the workflow group belongs # to. Get a modified copy of the template based on the (potentially) # modified workflow specification and parameters of the workflow group. template = group.workflow.get_template( workflow_spec=group.workflow_spec, parameters=group.parameters) # Create instances of the template arguments from the given list of # values. At this point we only distinguish between scalar values and # input files. Also create a mapping from he argument list that is used # stored in the database. run_args = dict() serialized_args = list() for arg in arguments: arg_id, arg_val = deserialize_arg(arg) # Raise an error if multiple values are given for the same argument if arg_id in run_args: raise err.DuplicateArgumentError(arg_id) para = template.parameters.get(arg_id) if para is None: raise err.UnknownParameterError(arg_id) if is_fh(arg_val): file_id, target = deserialize_fh(arg_val) # The argument value is expected to be the identifier of an # previously uploaded file. This will raise an exception if the # file identifier is unknown. fileobj = self.group_manager.get_uploaded_file( group_id=group_id, file_id=file_id).fileobj run_args[arg_id] = para.cast(value=(fileobj, target)) else: run_args[arg_id] = para.cast(arg_val) # Actor values as parameter values canno be serialized. for now, # we only store the serialized workflow step but no information # about the additional input files. if isinstance(arg_val, ActorValue): arg_val = arg_val.spec serialized_args.append(serialize_arg(name=arg_id, value=arg_val)) # Before we start creating directories and copying files make sure that # there are values for all template parameters (either in the arguments # dictionary or set as default values) template.validate_arguments(run_args) # Start the run. run = self.run_manager.create_run(group=group, arguments=serialized_args) run_id = run.run_id # Use default engine configuration if the configuration argument was # not given. config = config if config else group.engine_config staticdir = dirs.workflow_staticdir(group.workflow.workflow_id) state, runstore = self.backend.exec_workflow( run=run, template=template, arguments=run_args, staticfs=self.fs.get_store_for_folder(key=staticdir), config=config) # Update the run state if it is no longer pending for execution. Make # sure to call the update run method for the server to ensure that # results are inserted and post-processing workflows started. if not state.is_pending(): self.update_run(run_id=run_id, state=state, runstore=runstore) return self.get_run(run_id) return self.serialize.run_handle(run, group)
def test_postproc_workflow(fsconfig, tmpdir): """Execute the modified helloworld example.""" # -- Setup ---------------------------------------------------------------- # # It is important here that we do not use the SQLite in-memory database # since this fails (for unknown reason; presumably due to different threads) # when the post-processing run is updated. # -- env = Config().basedir(tmpdir).run_async().auth() env.update(fsconfig) service = LocalAPIFactory(env=env) # Start a new run for the workflow template. with service() as api: # Need to set the file store in the backend to the new instance as # well. Otherwise, the post processing workflow may attempt to use # the backend which was initialized prior with a different file store. workflow_id = create_workflow( api, source=TEMPLATE_DIR, specfile=SPEC_FILE ) user_id = create_user(api) # Create four groups and run the workflow with a slightly different input # file for i in range(4): with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=NAMES[:(i + 1)], format='plain/text') file_id = upload_file(api, group_id, names) # Set the template argument values arguments = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=arguments) # Poll workflow state every second. run = poll_run(service, run_id, user_id) assert run['state'] == st.STATE_SUCCESS with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts = 0 while 'postproc' not in wh: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert 'postproc' in wh serialize.validate_workflow_handle(wh) attmpts = 0 while wh['postproc']['state'] in st.ACTIVE_STATES: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break serialize.validate_workflow_handle(wh) with service() as api: ranking = api.workflows().get_ranking(workflow_id=workflow_id) serialize.validate_ranking(ranking) for fobj in wh['postproc']['files']: if fobj['name'] == 'results/compare.json': file_id = fobj['id'] with service(user_id=user_id) as api: fh = api.workflows().get_result_file( workflow_id=workflow_id, file_id=file_id ) compare = util.read_object(fh.open()) assert len(compare) == (i + 1) # Access the post-processing result files. with service() as api: fh = api.workflows().get_result_archive(workflow_id=workflow_id) assert fh.name.startswith('run') assert fh.mime_type == 'application/gzip'