Ejemplo n.º 1
0
def test_deserialize_file_argument():
    """Test deserialization of file arguments."""
    file_id, target = deserialize_fh(serialize_fh('0000', 'names.txt'))
    assert file_id == '0000'
    assert target == 'names.txt'
    file_id, target = deserialize_fh(serialize_fh('0000'))
    assert file_id == '0000'
    assert target is None
    with pytest.raises(ValueError):
        deserialize_fh({'fileId': '0000'})
Ejemplo n.º 2
0
def test_run_helloworld_sync(sync_service, specfile, state):
    """Execute the helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with sync_service() as api:
        workflow_id = create_workflow(api,
                                      source=TEMPLATE_DIR,
                                      specfile=specfile)
        user_id = create_user(api)
    with sync_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        names = io_file(data=['Alice', 'Bob'], format='plain/text')
        file_id = upload_file(api, group_id, names)
        args = [
            serialize_arg('names', serialize_fh(file_id, 'data/names.txt')),
            serialize_arg('sleeptime', 3)
        ]
        run_id = start_run(api, group_id, arguments=args)
    # -- Validate the run handle against the expected state -------------------
    with sync_service(user_id=user_id) as api:
        r = api.runs().get_run(run_id)
        serialize.validate_run_handle(r, state=state)
        if state == st.STATE_SUCCESS:
            # The run should have the greetings.txt file as a result.
            files = dict()
            for obj in r['files']:
                files[obj['name']] = obj['id']
            assert len(files) == 1
            fh = api.runs().get_result_file(
                run_id=run_id, file_id=files['results/greetings.txt'])
            value = fh.open().read().decode('utf-8').strip()
            assert 'Hello Alice!' in value
            assert 'Hello Bob!' in value
Ejemplo n.º 3
0
def start_hello_world(api, group_id):
    """Start a new run for the Hello World template. Returns the run identifier
    and the identifier for the input file.

    Parameters
    ----------
    api: flowserv.service.api.API
        Service API manager.
    group_id: string
        Unique group identifier.

    Returns
    -------
    string, string
    """
    file_id = api.uploads().upload_file(group_id=group_id,
                                        file=io_file(data=['Alice', 'Bob'],
                                                     format='txt/plain'),
                                        name='n.txt')['id']
    run_id = api.runs().start_run(group_id=group_id,
                                  arguments=[{
                                      'name':
                                      'names',
                                      'value':
                                      serialize_fh(file_id=file_id)
                                  }])['id']
    api.runs().backend.start(run_id)
    return run_id, file_id
def test_cancel_run_helloworld(async_service):
    """Test cancelling a helloworld run."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with async_service() as api:
        workflow_id = create_workflow(api, source=BENCHMARK_DIR)
        user_id = create_user(api)
    with async_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text')
        file_id = upload_file(api, group_id, names)
        args = [
            serialize_arg('names', serialize_fh(file_id)),
            serialize_arg('sleeptime', 10),
            serialize_arg('greeting', 'Hi')
        ]
        run_id = start_run(api, group_id, arguments=args)
    # Poll run after sleeping for one second.
    time.sleep(1)
    with async_service(user_id=user_id) as api:
        run = api.runs().get_run(run_id=run_id)
    assert run['state'] in st.ACTIVE_STATES
    # -- Cancel the active run ------------------------------------------------
    with async_service(user_id=user_id) as api:
        run = api.runs().cancel_run(
            run_id=run_id,
            reason='done'
        )
        assert run['state'] == st.STATE_CANCELED
        assert run['messages'][0] == 'done'
    with async_service(user_id=user_id) as api:
        run = api.runs().get_run(run_id=run_id)
        assert run['state'] == st.STATE_CANCELED
        assert run['messages'][0] == 'done'
Ejemplo n.º 5
0
def test_read_file_parameter_with_uploads(tmpdir):
    """Test reading a file parameter with a given list of upload files."""
    parameters = [File(name='A', index=0, target='target1')]
    sc = Scanner(reader=ListReader(['f1']))
    arguments = cli.read(parameters, sc, files=[('f1', 'F', '123')])
    assert len(arguments) == 1
    assert arguments['A'] == serialize_fh('f1', target='target1')
Ejemplo n.º 6
0
def test_cancel_run(prepare_submission):
    """Test cancelling a submission run."""
    # Create user, submission and upload the run file.
    client, headers, benchmark_id, submission_id, file_id = prepare_submission
    # -- Start run ------------------------------------------------------------
    url = SUBMISSION_RUN.format(config.API_PATH(), submission_id)
    body = {
        rlbls.RUN_ARGUMENTS: [{
            'name': 'names',
            'value': serialize_fh(file_id)
        }, {
            'name': 'greeting',
            'value': 'Hi'
        }, {
            'name': 'sleeptime',
            'value': 5
        }]
    }
    r = client.post(url, json=body, headers=headers)
    assert r.status_code == 201
    run_id = r.json['id']
    # -- Cancel and delete run ------------------------------------------------
    url = RUN_CANCEL.format(config.API_PATH(), run_id)
    r = client.put(url, json={rlbls.CANCEL_REASON: 'Test'}, headers=headers)
    assert r.status_code == 200
    url = RUN_CANCEL.format(config.API_PATH(), run_id)
    # Error when cancelling inactive run or providing invalid body.
    r = client.put(url, headers=headers)
    assert r.status_code == 400
    r = client.put(url, json={'messgae': 'invalid'}, headers=headers)
    assert r.status_code == 400
Ejemplo n.º 7
0
def run_postproc_workflow(postproc_spec: Dict, workflow: WorkflowObject,
                          ranking: List, runs: List, run_manager: RunManager,
                          backend: WorkflowController):
    """Run post-processing workflow for a workflow template."""
    workflow_spec = postproc_spec.get('workflow')
    pp_inputs = postproc_spec.get('inputs', {})
    pp_files = pp_inputs.get('files', [])
    # Prepare temporary directory with result files for all
    # runs in the ranking. The created directory is the only
    # run argument
    strace = None
    try:
        datadir = postutil.prepare_postproc_data(input_files=pp_files,
                                                 ranking=ranking,
                                                 run_manager=run_manager)
        dst = pp_inputs.get('runs', postbase.RUNS_DIR)
        run_args = {
            postbase.PARA_RUNS: InputFile(source=FSFile(datadir), target=dst)
        }
        arg_list = [
            serialize_arg(postbase.PARA_RUNS, serialize_fh(datadir, dst))
        ]
    except Exception as ex:
        logging.error(ex)
        strace = util.stacktrace(ex)
        run_args = dict()
        arg_list = []
    # Create a new run for the workflow. The identifier for the run group is
    # None.
    run = run_manager.create_run(workflow=workflow,
                                 arguments=arg_list,
                                 runs=runs)
    if strace is not None:
        # If there were data preparation errors set the created run into an
        # error state and return.
        run_manager.update_run(run_id=run.run_id,
                               state=run.state().error(messages=strace))
    else:
        # Execute the post-processing workflow asynchronously if
        # there were no data preparation errors.
        postproc_state, rundir = backend.exec_workflow(
            run=run,
            template=WorkflowTemplate(workflow_spec=workflow_spec,
                                      parameters=postbase.PARAMETERS),
            arguments=run_args,
            config=workflow.engine_config)
        # Update the post-processing workflow run state if it is
        # no longer pending for execution.
        if not postproc_state.is_pending():
            run_manager.update_run(run_id=run.run_id,
                                   state=postproc_state,
                                   rundir=rundir)
        # Remove the temporary input folder
        shutil.rmtree(datadir)
def run_erroneous_workflow(service, specfile):
    """Execute the modified helloworld example."""
    with service() as api:
        # Create workflow template, user, and the workflow group.
        workflow_id = create_workflow(
            api,
            source=TEMPLATE_DIR,
            specfile=specfile
        )
        user_id = create_user(api)
    with service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        # Upload the names file.
        names = io_file(data=NAMES, format='txt/plain')
        file_id = upload_file(api, group_id, names)
        # Run the workflow.
        arguments = [
            serialize_arg('names', serialize_fh(file_id)),
            serialize_arg('greeting', 'Hi')
        ]
        run_id = start_run(api, group_id, arguments=arguments)
    # Poll workflow state every second.
    run = poll_run(service, run_id, user_id)
    assert run['state'] == st.STATE_SUCCESS
    with service() as api:
        wh = api.workflows().get_workflow(workflow_id=workflow_id)
    attmpts = 0
    while 'postproc' not in wh:
        time.sleep(1)
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts += 1
        if attmpts > 60:
            break
    assert 'postproc' in wh
    serialize.validate_workflow_handle(wh)
    attmpts = 0
    while wh['postproc']['state'] in st.ACTIVE_STATES:
        time.sleep(1)
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts += 1
        if attmpts > 60:
            break
    assert wh['postproc']['state'] not in st.ACTIVE_STATES
    serialize.validate_workflow_handle(wh)
    assert wh['postproc']['state'] == st.STATE_ERROR
Ejemplo n.º 9
0
def read_file(
    para: Parameter, scanner: Scanner, files: Optional[Tuple[str, str, str]] = None
):
    """Read value for a file parameter.

    Parameters
    ----------
    para: flowserv.model.parameter.base.Parameter
        Workflow template parameter declaration
    scanner: flowserv.scanner.Scanner
        Input scanner.
    files: list, default=None
        List of tuples representing uploaded files. Each tuple has three
        elements: file_id, name, timestamp.
    """
    # Distinguish between the case where a list of uploaded files
    # is given or not.
    if files is not None:
        print('\nSelect file identifier from uploaded files:\n')
        table = ResultTable(
            headline=['ID', 'Name', 'Created at'],
            types=[PARA_STRING] * 3
        )
        for file_id, name, created_at in files:
            table.add([file_id, name, created_at])
        for line in table.format():
            print(line)
        print('\n{}'.format(para.prompt()), end='')
        filename = scanner.next_string()
    else:
        filename = scanner.next_file()
    target_path = None
    if para.target is None:
        print('Target Path:', end='')
        target_path = scanner.next_string()
        if target_path == '':
            target_path = para.default
    else:
        target_path = para.target
    # The type of the returned value depends on whether the list of
    # uploaded files is given or not.
    if files is not None:
        return serialize_fh(file_id=filename, target=target_path)
    else:
        return InputFile(FSFile(filename), target_path)
def test_run_helloworld_async(async_service, target):
    """Execute the helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with async_service() as api:
        workflow_id = create_workflow(api, source=BENCHMARK_DIR)
        user_id = create_user(api)
    with async_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text')
        file_id = upload_file(api, group_id, names)
        args = [
            serialize_arg('names', serialize_fh(file_id, target)),
            serialize_arg('sleeptime', 1),
            serialize_arg('greeting', 'Hi')
        ]
        run_id = start_run(api, group_id, arguments=args)
    # Poll workflow state every second.
    with async_service(user_id=user_id) as api:
        run = api.runs().get_run(run_id=run_id)
    watch_dog = 30
    while run['state'] in st.ACTIVE_STATES and watch_dog:
        time.sleep(1)
        watch_dog -= 1
        with async_service(user_id=user_id) as api:
            run = api.runs().get_run(run_id=run_id)
    assert run['state'] == st.STATE_SUCCESS
    files = dict()
    for f in run['files']:
        files[f['name']] = f['id']
    fh = api.runs().get_result_file(
        run_id=run_id,
        file_id=files['results/greetings.txt']
    )
    greetings = fh.open().read().decode('utf-8').strip()
    assert 'Hi Alice' in greetings
    assert 'Hi Bob' in greetings
    assert 'Hi Zoe' in greetings
    fh = api.runs().get_result_file(
        run_id=run_id,
        file_id=files['results/analytics.json']
    )
    assert json.load(fh.open()) is not None
Ejemplo n.º 11
0
def test_delete_run(prepare_submission):
    """Test deleting a submission run."""
    # Create user, submission and upload the run file.
    client, headers, benchmark_id, submission_id, file_id = prepare_submission
    # -- Start run ------------------------------------------------------------
    url = SUBMISSION_RUN.format(config.API_PATH(), submission_id)
    body = {
        rlbls.RUN_ARGUMENTS: [{
            'name': 'names',
            'value': serialize_fh(file_id)
        }, {
            'name': 'greeting',
            'value': 'Hi'
        }, {
            'name': 'sleeptime',
            'value': 0
        }]
    }
    r = client.post(url, json=body, headers=headers)
    assert r.status_code == 201
    run_id = r.json['id']
    url = RUN_GET.format(config.API_PATH(), run_id)
    r = client.get(url, headers=headers)
    assert r.status_code == 200
    obj = r.json
    while obj['state'] == st.STATE_RUNNING:
        time.sleep(1)
        r = client.get(url, headers=headers)
        assert r.status_code == 200
        obj = r.json
    assert obj['state'] == st.STATE_SUCCESS
    # -- Delete run -----------------------------------------------------------
    url = RUNS_LIST.format(config.API_PATH(), submission_id)
    r = client.get(url, headers=headers)
    doc = r.json
    assert len(doc[rlbls.RUN_LIST]) == 1
    url = RUN_DELETE.format(config.API_PATH(), run_id)
    r = client.delete(url, headers=headers)
    assert r.status_code == 204
    url = RUNS_LIST.format(config.API_PATH(), submission_id)
    r = client.get(url, headers=headers)
    doc = r.json
    assert len(doc[rlbls.RUN_LIST]) == 0
Ejemplo n.º 12
0
    def start_run(self,
                  arguments: Dict,
                  config: Optional[Dict] = None,
                  poll_interval: Optional[int] = None) -> Run:
        """Run the associated workflow for the given set of arguments.

        Parameters
        ----------
        arguments: dict
            Dictionary of user-provided arguments.
        config: dict, default=None
            Optional implementation-specific configuration settings that can be
            used to overwrite settings that were initialized at object creation.
        poll_interval: int, default=None
            Optional poll interval that is used to check the state of a run
            until it is no longer in active state.

        Returns
        -------
        flowserv.client.app.run.Run
        """
        arguments = self._parameters.set_defaults(arguments=arguments)
        with self.service() as api:
            # Upload any argument values as files that are either of type
            # StringIO or BytesIO.
            arglist = list()
            for key, val in arguments.items():
                # Convert arguments to the format that is expected by the run
                # manager. We pay special attention to file parameters. Input
                # files may be represented as strings, IO buffers or file
                # objects.
                para = self._parameters.get(key)
                if para is None:
                    raise err.UnknownParameterError(key)
                if para.is_file():
                    # Upload a given file prior to running the application.
                    upload_file = None
                    target = None
                    if isinstance(val, str):
                        upload_file = FSFile(val)
                    elif isinstance(val, StringIO):
                        buf = BytesIO(val.read().encode('utf8'))
                        upload_file = IOBuffer(buf)
                    elif isinstance(val, BytesIO):
                        upload_file = IOBuffer(val)
                    elif isinstance(val, IOHandle):
                        upload_file = val
                    else:
                        msg = 'invalid argument {} for {}'.format(key, val)
                        raise err.InvalidArgumentError(msg)
                    fh = api.uploads().upload_file(group_id=self.group_id,
                                                   file=upload_file,
                                                   name=key)
                    val = serialize_fh(fh[filelbls.FILE_ID], target=target)
                arglist.append(serialize_arg(key, val))
            # Execute the run and return the serialized run handle.
            run = api.runs().start_run(group_id=self.group_id,
                                       arguments=arglist,
                                       config=config)
            rh = Run(doc=run, service=self.service)
            # Wait for run to finish if active an poll interval is given.
            while poll_interval and rh.is_active():
                time.sleep(poll_interval)
                rh = self.poll_run(run_id=rh.run_id)
            pprun = self.get_postproc_results()
            if pprun is not None:
                while poll_interval and pprun.is_active():
                    time.sleep(poll_interval)
                    pprun = self.get_postproc_results()
            return rh
Ejemplo n.º 13
0
def test_check_file_argument():
    """Test checking an argument value for representing an input file."""
    assert is_fh(serialize_fh('0000'))
    assert not is_fh({'id': 'names'})
    assert not is_fh({'dtype': '$record', 'value': []})
def test_postproc_workflow(fsconfig, tmpdir):
    """Execute the modified helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # It is important here that we do not use the SQLite in-memory database
    # since this fails (for unknown reason; presumably due to different threads)
    # when the post-processing run is updated.
    # --
    env = Config().basedir(tmpdir).run_async().auth()
    env.update(fsconfig)
    service = LocalAPIFactory(env=env)
    # Start a new run for the workflow template.
    with service() as api:
        # Need to set the file store in the backend to the new instance as
        # well. Otherwise, the post processing workflow may attempt to use
        # the backend which was initialized prior with a different file store.
        workflow_id = create_workflow(
            api,
            source=TEMPLATE_DIR,
            specfile=SPEC_FILE
        )
        user_id = create_user(api)
    # Create four groups and run the workflow with a slightly different input
    # file
    for i in range(4):
        with service(user_id=user_id) as api:
            group_id = create_group(api, workflow_id)
            names = io_file(data=NAMES[:(i + 1)], format='plain/text')
            file_id = upload_file(api, group_id, names)
            # Set the template argument values
            arguments = [
                serialize_arg('names', serialize_fh(file_id)),
                serialize_arg('greeting', 'Hi')
            ]
            run_id = start_run(api, group_id, arguments=arguments)
        # Poll workflow state every second.
        run = poll_run(service, run_id, user_id)
        assert run['state'] == st.STATE_SUCCESS
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts = 0
        while 'postproc' not in wh:
            time.sleep(1)
            with service() as api:
                wh = api.workflows().get_workflow(workflow_id=workflow_id)
            attmpts += 1
            if attmpts > 60:
                break
        assert 'postproc' in wh
        serialize.validate_workflow_handle(wh)
        attmpts = 0
        while wh['postproc']['state'] in st.ACTIVE_STATES:
            time.sleep(1)
            with service() as api:
                wh = api.workflows().get_workflow(workflow_id=workflow_id)
            attmpts += 1
            if attmpts > 60:
                break
        serialize.validate_workflow_handle(wh)
        with service() as api:
            ranking = api.workflows().get_ranking(workflow_id=workflow_id)
        serialize.validate_ranking(ranking)
        for fobj in wh['postproc']['files']:
            if fobj['name'] == 'results/compare.json':
                file_id = fobj['id']
        with service(user_id=user_id) as api:
            fh = api.workflows().get_result_file(
                workflow_id=workflow_id,
                file_id=file_id
            )
        compare = util.read_object(fh.open())
        assert len(compare) == (i + 1)
    # Access the post-processing result files.
    with service() as api:
        fh = api.workflows().get_result_archive(workflow_id=workflow_id)
    assert fh.name.startswith('run')
    assert fh.mime_type == 'application/gzip'
Ejemplo n.º 15
0
def test_submission_run(prepare_submission):
    """Tests start and monitor a run and access run resources."""
    # Create user, submission and upload the run file.
    client, headers, benchmark_id, submission_id, file_id = prepare_submission
    # -- Start run ------------------------------------------------------------
    url = SUBMISSION_RUN.format(config.API_PATH(), submission_id)
    body = {
        rlbls.RUN_ARGUMENTS: [{
            'name': 'names',
            'value': serialize_fh(file_id)
        }, {
            'name': 'greeting',
            'value': 'Hi'
        }, {
            'name': 'sleeptime',
            'value': 2
        }]
    }
    r = client.post(url, json=body, headers=headers)
    assert r.status_code == 201
    run_id = r.json['id']
    # -- Monitor run state ----------------------------------------------------
    url = RUN_GET.format(config.API_PATH(), run_id)
    r = client.get(url, headers=headers)
    assert r.status_code == 200
    obj = r.json
    while obj['state'] == st.STATE_RUNNING:
        time.sleep(1)
        r = client.get(url, headers=headers)
        assert r.status_code == 200
        obj = r.json
    assert obj['state'] == st.STATE_SUCCESS
    # -- Run resources --------------------------------------------------------
    resources = {r['name']: r for r in obj['files']}
    assert len(resources) == 2
    assert 'results/greetings.txt' in resources
    assert 'results/analytics.json' in resources
    result_file_id = resources['results/greetings.txt']['id']
    res_url = RUN_FILE.format(config.API_PATH(), run_id, result_file_id)
    r = client.get(res_url, headers=headers)
    assert r.status_code == 200
    data = str(r.data)
    assert 'Hi Alice' in data
    assert 'Hi Bob' in data
    # Run archive
    url = RUN_ARCHIVE.format(config.API_PATH(), run_id)
    r = client.get(url, headers=headers)
    assert r.status_code == 200
    # -- Workflow resources ---------------------------------------------------
    url = BENCHMARK_GET.format(config.API_PATH(), benchmark_id)
    b = client.get(url).json
    counter = 0
    while 'postproc' not in b:
        counter += 1
        if counter == 10:
            break
        time.sleep(1)
        b = client.get(url).json
    assert counter < 10
    counter = 0
    while b['postproc']['state'] != st.STATE_SUCCESS:
        counter += 1
        if counter == 10:
            break
        time.sleep(1)
        b = client.get(url).json
    assert counter < 10
    url = BENCHMARK_ARCHIVE.format(config.API_PATH(), benchmark_id)
    r = client.get(url)
    assert r.status_code == 200
    assert 'results.tar.gz' in r.headers['Content-Disposition']
    resource_id = b['postproc']['files'][0]['id']
    url = BENCHMARK_FILE.format(config.API_PATH(), benchmark_id, resource_id)
    r = client.get(url)
    assert r.status_code == 200
    assert 'results/compare.json' in r.headers['Content-Disposition']
    # -- Leaderboard ----------------------------------------------------------
    url = BENCHMARK_LEADERBOARD.format(config.API_PATH(), benchmark_id)
    r = client.get(url)
    assert r.status_code == 200
    url += '?includeAll'
    r = client.get(url)
    assert r.status_code == 200
    url += '=true'
    r = client.get(url)
    assert r.status_code == 200
    url += '&orderBy=max_len:asc,max_line:desc,avg_count'
    r = client.get(url)
    assert r.status_code == 200
    # Error for runs with invalid arguments.
    url = SUBMISSION_RUN.format(config.API_PATH(), submission_id)
    body = {
        rlbls.RUN_ARGUMENTS: [{
            'name': 'names',
            'value': serialize_fh(file_id)
        }, {
            'name': 'greeting',
            'value': 'Hi'
        }, {
            'name': 'sleepfor',
            'value': 2
        }]
    }
    r = client.post(url, json=body, headers=headers)
    assert r.status_code == 400