def test_deserialize_file_argument(): """Test deserialization of file arguments.""" file_id, target = deserialize_fh(serialize_fh('0000', 'names.txt')) assert file_id == '0000' assert target == 'names.txt' file_id, target = deserialize_fh(serialize_fh('0000')) assert file_id == '0000' assert target is None with pytest.raises(ValueError): deserialize_fh({'fileId': '0000'})
def test_run_helloworld_sync(sync_service, specfile, state): """Execute the helloworld example.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with sync_service() as api: workflow_id = create_workflow(api, source=TEMPLATE_DIR, specfile=specfile) user_id = create_user(api) with sync_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id, 'data/names.txt')), serialize_arg('sleeptime', 3) ] run_id = start_run(api, group_id, arguments=args) # -- Validate the run handle against the expected state ------------------- with sync_service(user_id=user_id) as api: r = api.runs().get_run(run_id) serialize.validate_run_handle(r, state=state) if state == st.STATE_SUCCESS: # The run should have the greetings.txt file as a result. files = dict() for obj in r['files']: files[obj['name']] = obj['id'] assert len(files) == 1 fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/greetings.txt']) value = fh.open().read().decode('utf-8').strip() assert 'Hello Alice!' in value assert 'Hello Bob!' in value
def start_hello_world(api, group_id): """Start a new run for the Hello World template. Returns the run identifier and the identifier for the input file. Parameters ---------- api: flowserv.service.api.API Service API manager. group_id: string Unique group identifier. Returns ------- string, string """ file_id = api.uploads().upload_file(group_id=group_id, file=io_file(data=['Alice', 'Bob'], format='txt/plain'), name='n.txt')['id'] run_id = api.runs().start_run(group_id=group_id, arguments=[{ 'name': 'names', 'value': serialize_fh(file_id=file_id) }])['id'] api.runs().backend.start(run_id) return run_id, file_id
def test_cancel_run_helloworld(async_service): """Test cancelling a helloworld run.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with async_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with async_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('sleeptime', 10), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=args) # Poll run after sleeping for one second. time.sleep(1) with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] in st.ACTIVE_STATES # -- Cancel the active run ------------------------------------------------ with async_service(user_id=user_id) as api: run = api.runs().cancel_run( run_id=run_id, reason='done' ) assert run['state'] == st.STATE_CANCELED assert run['messages'][0] == 'done' with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] == st.STATE_CANCELED assert run['messages'][0] == 'done'
def test_read_file_parameter_with_uploads(tmpdir): """Test reading a file parameter with a given list of upload files.""" parameters = [File(name='A', index=0, target='target1')] sc = Scanner(reader=ListReader(['f1'])) arguments = cli.read(parameters, sc, files=[('f1', 'F', '123')]) assert len(arguments) == 1 assert arguments['A'] == serialize_fh('f1', target='target1')
def test_cancel_run(prepare_submission): """Test cancelling a submission run.""" # Create user, submission and upload the run file. client, headers, benchmark_id, submission_id, file_id = prepare_submission # -- Start run ------------------------------------------------------------ url = SUBMISSION_RUN.format(config.API_PATH(), submission_id) body = { rlbls.RUN_ARGUMENTS: [{ 'name': 'names', 'value': serialize_fh(file_id) }, { 'name': 'greeting', 'value': 'Hi' }, { 'name': 'sleeptime', 'value': 5 }] } r = client.post(url, json=body, headers=headers) assert r.status_code == 201 run_id = r.json['id'] # -- Cancel and delete run ------------------------------------------------ url = RUN_CANCEL.format(config.API_PATH(), run_id) r = client.put(url, json={rlbls.CANCEL_REASON: 'Test'}, headers=headers) assert r.status_code == 200 url = RUN_CANCEL.format(config.API_PATH(), run_id) # Error when cancelling inactive run or providing invalid body. r = client.put(url, headers=headers) assert r.status_code == 400 r = client.put(url, json={'messgae': 'invalid'}, headers=headers) assert r.status_code == 400
def run_postproc_workflow(postproc_spec: Dict, workflow: WorkflowObject, ranking: List, runs: List, run_manager: RunManager, backend: WorkflowController): """Run post-processing workflow for a workflow template.""" workflow_spec = postproc_spec.get('workflow') pp_inputs = postproc_spec.get('inputs', {}) pp_files = pp_inputs.get('files', []) # Prepare temporary directory with result files for all # runs in the ranking. The created directory is the only # run argument strace = None try: datadir = postutil.prepare_postproc_data(input_files=pp_files, ranking=ranking, run_manager=run_manager) dst = pp_inputs.get('runs', postbase.RUNS_DIR) run_args = { postbase.PARA_RUNS: InputFile(source=FSFile(datadir), target=dst) } arg_list = [ serialize_arg(postbase.PARA_RUNS, serialize_fh(datadir, dst)) ] except Exception as ex: logging.error(ex) strace = util.stacktrace(ex) run_args = dict() arg_list = [] # Create a new run for the workflow. The identifier for the run group is # None. run = run_manager.create_run(workflow=workflow, arguments=arg_list, runs=runs) if strace is not None: # If there were data preparation errors set the created run into an # error state and return. run_manager.update_run(run_id=run.run_id, state=run.state().error(messages=strace)) else: # Execute the post-processing workflow asynchronously if # there were no data preparation errors. postproc_state, rundir = backend.exec_workflow( run=run, template=WorkflowTemplate(workflow_spec=workflow_spec, parameters=postbase.PARAMETERS), arguments=run_args, config=workflow.engine_config) # Update the post-processing workflow run state if it is # no longer pending for execution. if not postproc_state.is_pending(): run_manager.update_run(run_id=run.run_id, state=postproc_state, rundir=rundir) # Remove the temporary input folder shutil.rmtree(datadir)
def run_erroneous_workflow(service, specfile): """Execute the modified helloworld example.""" with service() as api: # Create workflow template, user, and the workflow group. workflow_id = create_workflow( api, source=TEMPLATE_DIR, specfile=specfile ) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) # Upload the names file. names = io_file(data=NAMES, format='txt/plain') file_id = upload_file(api, group_id, names) # Run the workflow. arguments = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=arguments) # Poll workflow state every second. run = poll_run(service, run_id, user_id) assert run['state'] == st.STATE_SUCCESS with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts = 0 while 'postproc' not in wh: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert 'postproc' in wh serialize.validate_workflow_handle(wh) attmpts = 0 while wh['postproc']['state'] in st.ACTIVE_STATES: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert wh['postproc']['state'] not in st.ACTIVE_STATES serialize.validate_workflow_handle(wh) assert wh['postproc']['state'] == st.STATE_ERROR
def read_file( para: Parameter, scanner: Scanner, files: Optional[Tuple[str, str, str]] = None ): """Read value for a file parameter. Parameters ---------- para: flowserv.model.parameter.base.Parameter Workflow template parameter declaration scanner: flowserv.scanner.Scanner Input scanner. files: list, default=None List of tuples representing uploaded files. Each tuple has three elements: file_id, name, timestamp. """ # Distinguish between the case where a list of uploaded files # is given or not. if files is not None: print('\nSelect file identifier from uploaded files:\n') table = ResultTable( headline=['ID', 'Name', 'Created at'], types=[PARA_STRING] * 3 ) for file_id, name, created_at in files: table.add([file_id, name, created_at]) for line in table.format(): print(line) print('\n{}'.format(para.prompt()), end='') filename = scanner.next_string() else: filename = scanner.next_file() target_path = None if para.target is None: print('Target Path:', end='') target_path = scanner.next_string() if target_path == '': target_path = para.default else: target_path = para.target # The type of the returned value depends on whether the list of # uploaded files is given or not. if files is not None: return serialize_fh(file_id=filename, target=target_path) else: return InputFile(FSFile(filename), target_path)
def test_run_helloworld_async(async_service, target): """Execute the helloworld example.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with async_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with async_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id, target)), serialize_arg('sleeptime', 1), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=args) # Poll workflow state every second. with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) watch_dog = 30 while run['state'] in st.ACTIVE_STATES and watch_dog: time.sleep(1) watch_dog -= 1 with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] == st.STATE_SUCCESS files = dict() for f in run['files']: files[f['name']] = f['id'] fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/greetings.txt'] ) greetings = fh.open().read().decode('utf-8').strip() assert 'Hi Alice' in greetings assert 'Hi Bob' in greetings assert 'Hi Zoe' in greetings fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/analytics.json'] ) assert json.load(fh.open()) is not None
def test_delete_run(prepare_submission): """Test deleting a submission run.""" # Create user, submission and upload the run file. client, headers, benchmark_id, submission_id, file_id = prepare_submission # -- Start run ------------------------------------------------------------ url = SUBMISSION_RUN.format(config.API_PATH(), submission_id) body = { rlbls.RUN_ARGUMENTS: [{ 'name': 'names', 'value': serialize_fh(file_id) }, { 'name': 'greeting', 'value': 'Hi' }, { 'name': 'sleeptime', 'value': 0 }] } r = client.post(url, json=body, headers=headers) assert r.status_code == 201 run_id = r.json['id'] url = RUN_GET.format(config.API_PATH(), run_id) r = client.get(url, headers=headers) assert r.status_code == 200 obj = r.json while obj['state'] == st.STATE_RUNNING: time.sleep(1) r = client.get(url, headers=headers) assert r.status_code == 200 obj = r.json assert obj['state'] == st.STATE_SUCCESS # -- Delete run ----------------------------------------------------------- url = RUNS_LIST.format(config.API_PATH(), submission_id) r = client.get(url, headers=headers) doc = r.json assert len(doc[rlbls.RUN_LIST]) == 1 url = RUN_DELETE.format(config.API_PATH(), run_id) r = client.delete(url, headers=headers) assert r.status_code == 204 url = RUNS_LIST.format(config.API_PATH(), submission_id) r = client.get(url, headers=headers) doc = r.json assert len(doc[rlbls.RUN_LIST]) == 0
def start_run(self, arguments: Dict, config: Optional[Dict] = None, poll_interval: Optional[int] = None) -> Run: """Run the associated workflow for the given set of arguments. Parameters ---------- arguments: dict Dictionary of user-provided arguments. config: dict, default=None Optional implementation-specific configuration settings that can be used to overwrite settings that were initialized at object creation. poll_interval: int, default=None Optional poll interval that is used to check the state of a run until it is no longer in active state. Returns ------- flowserv.client.app.run.Run """ arguments = self._parameters.set_defaults(arguments=arguments) with self.service() as api: # Upload any argument values as files that are either of type # StringIO or BytesIO. arglist = list() for key, val in arguments.items(): # Convert arguments to the format that is expected by the run # manager. We pay special attention to file parameters. Input # files may be represented as strings, IO buffers or file # objects. para = self._parameters.get(key) if para is None: raise err.UnknownParameterError(key) if para.is_file(): # Upload a given file prior to running the application. upload_file = None target = None if isinstance(val, str): upload_file = FSFile(val) elif isinstance(val, StringIO): buf = BytesIO(val.read().encode('utf8')) upload_file = IOBuffer(buf) elif isinstance(val, BytesIO): upload_file = IOBuffer(val) elif isinstance(val, IOHandle): upload_file = val else: msg = 'invalid argument {} for {}'.format(key, val) raise err.InvalidArgumentError(msg) fh = api.uploads().upload_file(group_id=self.group_id, file=upload_file, name=key) val = serialize_fh(fh[filelbls.FILE_ID], target=target) arglist.append(serialize_arg(key, val)) # Execute the run and return the serialized run handle. run = api.runs().start_run(group_id=self.group_id, arguments=arglist, config=config) rh = Run(doc=run, service=self.service) # Wait for run to finish if active an poll interval is given. while poll_interval and rh.is_active(): time.sleep(poll_interval) rh = self.poll_run(run_id=rh.run_id) pprun = self.get_postproc_results() if pprun is not None: while poll_interval and pprun.is_active(): time.sleep(poll_interval) pprun = self.get_postproc_results() return rh
def test_check_file_argument(): """Test checking an argument value for representing an input file.""" assert is_fh(serialize_fh('0000')) assert not is_fh({'id': 'names'}) assert not is_fh({'dtype': '$record', 'value': []})
def test_postproc_workflow(fsconfig, tmpdir): """Execute the modified helloworld example.""" # -- Setup ---------------------------------------------------------------- # # It is important here that we do not use the SQLite in-memory database # since this fails (for unknown reason; presumably due to different threads) # when the post-processing run is updated. # -- env = Config().basedir(tmpdir).run_async().auth() env.update(fsconfig) service = LocalAPIFactory(env=env) # Start a new run for the workflow template. with service() as api: # Need to set the file store in the backend to the new instance as # well. Otherwise, the post processing workflow may attempt to use # the backend which was initialized prior with a different file store. workflow_id = create_workflow( api, source=TEMPLATE_DIR, specfile=SPEC_FILE ) user_id = create_user(api) # Create four groups and run the workflow with a slightly different input # file for i in range(4): with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=NAMES[:(i + 1)], format='plain/text') file_id = upload_file(api, group_id, names) # Set the template argument values arguments = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=arguments) # Poll workflow state every second. run = poll_run(service, run_id, user_id) assert run['state'] == st.STATE_SUCCESS with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts = 0 while 'postproc' not in wh: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert 'postproc' in wh serialize.validate_workflow_handle(wh) attmpts = 0 while wh['postproc']['state'] in st.ACTIVE_STATES: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break serialize.validate_workflow_handle(wh) with service() as api: ranking = api.workflows().get_ranking(workflow_id=workflow_id) serialize.validate_ranking(ranking) for fobj in wh['postproc']['files']: if fobj['name'] == 'results/compare.json': file_id = fobj['id'] with service(user_id=user_id) as api: fh = api.workflows().get_result_file( workflow_id=workflow_id, file_id=file_id ) compare = util.read_object(fh.open()) assert len(compare) == (i + 1) # Access the post-processing result files. with service() as api: fh = api.workflows().get_result_archive(workflow_id=workflow_id) assert fh.name.startswith('run') assert fh.mime_type == 'application/gzip'
def test_submission_run(prepare_submission): """Tests start and monitor a run and access run resources.""" # Create user, submission and upload the run file. client, headers, benchmark_id, submission_id, file_id = prepare_submission # -- Start run ------------------------------------------------------------ url = SUBMISSION_RUN.format(config.API_PATH(), submission_id) body = { rlbls.RUN_ARGUMENTS: [{ 'name': 'names', 'value': serialize_fh(file_id) }, { 'name': 'greeting', 'value': 'Hi' }, { 'name': 'sleeptime', 'value': 2 }] } r = client.post(url, json=body, headers=headers) assert r.status_code == 201 run_id = r.json['id'] # -- Monitor run state ---------------------------------------------------- url = RUN_GET.format(config.API_PATH(), run_id) r = client.get(url, headers=headers) assert r.status_code == 200 obj = r.json while obj['state'] == st.STATE_RUNNING: time.sleep(1) r = client.get(url, headers=headers) assert r.status_code == 200 obj = r.json assert obj['state'] == st.STATE_SUCCESS # -- Run resources -------------------------------------------------------- resources = {r['name']: r for r in obj['files']} assert len(resources) == 2 assert 'results/greetings.txt' in resources assert 'results/analytics.json' in resources result_file_id = resources['results/greetings.txt']['id'] res_url = RUN_FILE.format(config.API_PATH(), run_id, result_file_id) r = client.get(res_url, headers=headers) assert r.status_code == 200 data = str(r.data) assert 'Hi Alice' in data assert 'Hi Bob' in data # Run archive url = RUN_ARCHIVE.format(config.API_PATH(), run_id) r = client.get(url, headers=headers) assert r.status_code == 200 # -- Workflow resources --------------------------------------------------- url = BENCHMARK_GET.format(config.API_PATH(), benchmark_id) b = client.get(url).json counter = 0 while 'postproc' not in b: counter += 1 if counter == 10: break time.sleep(1) b = client.get(url).json assert counter < 10 counter = 0 while b['postproc']['state'] != st.STATE_SUCCESS: counter += 1 if counter == 10: break time.sleep(1) b = client.get(url).json assert counter < 10 url = BENCHMARK_ARCHIVE.format(config.API_PATH(), benchmark_id) r = client.get(url) assert r.status_code == 200 assert 'results.tar.gz' in r.headers['Content-Disposition'] resource_id = b['postproc']['files'][0]['id'] url = BENCHMARK_FILE.format(config.API_PATH(), benchmark_id, resource_id) r = client.get(url) assert r.status_code == 200 assert 'results/compare.json' in r.headers['Content-Disposition'] # -- Leaderboard ---------------------------------------------------------- url = BENCHMARK_LEADERBOARD.format(config.API_PATH(), benchmark_id) r = client.get(url) assert r.status_code == 200 url += '?includeAll' r = client.get(url) assert r.status_code == 200 url += '=true' r = client.get(url) assert r.status_code == 200 url += '&orderBy=max_len:asc,max_line:desc,avg_count' r = client.get(url) assert r.status_code == 200 # Error for runs with invalid arguments. url = SUBMISSION_RUN.format(config.API_PATH(), submission_id) body = { rlbls.RUN_ARGUMENTS: [{ 'name': 'names', 'value': serialize_fh(file_id) }, { 'name': 'greeting', 'value': 'Hi' }, { 'name': 'sleepfor', 'value': 2 }] } r = client.post(url, json=body, headers=headers) assert r.status_code == 400