def test_store_and_copy_folder(store_id, tmpdir):
    """Test uploading and downloading folder files."""
    # -- Setup ----------------------------------------------------------------
    # Initialize the file store and create the input file structure.
    fs = create_store(store_id, os.path.join(tmpdir, 'fs'))
    files = create_files(os.path.join(tmpdir, 'data'))
    # -- Store all files in the file store (change file D which is the last
    # file in the returned file list to E.json instead of docs/D.json) --------
    file_d, _ = files[-1]
    files = files[:-1] + [(file_d, 'E.json')]
    KEY = '0000'
    fs.store_files(files=files, dst=KEY)
    assert json.load(fs.load_file(os.path.join(KEY, FILE_A)).open()) == DATA1
    assert json.load(fs.load_file(os.path.join(KEY, FILE_B)).open()) == DATA2
    assert json.load(fs.load_file(os.path.join(KEY, FILE_C)).open()) == DATA3
    assert json.load(fs.load_file(os.path.join(KEY, FILE_DATA)).open()) == EXDATA  # noqa: E501
    assert json.load(fs.load_file(os.path.join(KEY, 'E.json')).open()) == DATA4
    with pytest.raises(err.UnknownFileError):
        fs.load_file(os.path.join(KEY, FILE_D)).open()
    # -- Download files -------------------------------------------------------
    DOWNLOAD = os.path.join(tmpdir, 'download')
    fs.copy_folder(key=KEY, dst=DOWNLOAD)
    assert util.read_object(os.path.join(DOWNLOAD, FILE_A)) == DATA1
    assert util.read_object(os.path.join(DOWNLOAD, FILE_B)) == DATA2
    assert util.read_object(os.path.join(DOWNLOAD, FILE_C)) == DATA3
    assert util.read_object(os.path.join(DOWNLOAD, FILE_DATA)) == EXDATA
    assert util.read_object(os.path.join(DOWNLOAD, 'E.json')) == DATA4
    assert not os.path.exists(os.path.join(DOWNLOAD, FILE_D))
Example #2
0
def ENGINECONFIG(env: Dict, validate: Optional[bool] = False) -> Dict:
    """Read engine configuration information from the file that is specified
    by the environment variable *FLOWSERV_SERIAL_ENGINECONFIG*.

    Returns an empty dictionary if the environment variable is not set. If the
    validate flag is True the read document will be validated against the
    configuration document schema that is defined in ``config.json``.

    Parameters
    ----------
    env: dict
        Configuration object that provides access to configuration
        parameters in the environment.
    validate: bool, default=False
        Validate the read configuration object if True.

    Returns
    -------
    dict
    """
    filename = env.get(FLOWSERV_SERIAL_ENGINECONFIG)
    if not filename:
        return dict()
    doc = util.read_object(filename=filename)
    if validate:
        validator.validate(doc)
    return doc
Example #3
0
def read_run_results(run: RunObject, schema: ResultSchema, rundir: str):
    """Read the run results from the result file that is specified in the workflow
    result schema. If the file is not found we currently do not raise an error.

    Parameters
    ----------
    run: flowserv.model.base.RunObject
        Handle for a workflow run.
    schema: flowserv.model.template.schema.ResultSchema
        Workflow result schema specification that contains the reference to the
        result file key.
    rundir: string
        Directory containing run result files.
    """
    filename = os.path.join(rundir, schema.result_file)
    if os.path.exists(filename):
        results = util.read_object(filename)
        # Create a dictionary of result values.
        values = dict()
        for col in schema.columns:
            val = util.jquery(doc=results, path=col.jpath())
            col_id = col.column_id
            if val is None and col.required:
                msg = "missing value for '{}'".format(col_id)
                raise err.ConstraintViolationError(msg)
            elif val is not None:
                values[col_id] = col.cast(val)
        run.result = values
def test_access_run_result_files_local(database, tmpdir):
    """Test accessing run result files."""
    # -- Setup ----------------------------------------------------------------
    env = Config().basedir(tmpdir).auth()
    fs = FS(env=env)
    workflow_id, group_id, run_id, user_id = success_run(database, fs, tmpdir)
    local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine())
    # -- Read result files ----------------------------------------------------
    with local_service(user_id=user_id) as api:
        # Map file names to file handles.
        r = api.runs().get_run(run_id=run_id)
        files = dict()
        for fh in r['files']:
            files[fh['name']] = fh['id']
        # Read content of result files.
        fh = api.runs().get_result_file(run_id=run_id,
                                        file_id=files['run/results/B.json'])
        results = util.read_object(fh.open())
        assert results == {'B': 1}
    # -- Error when user 2 attempts to read file ------------------------------
    with database.session() as session:
        user_2 = create_user(session, active=True)
    with local_service(user_id=user_2) as api:
        with pytest.raises(err.UnauthorizedAccessError):
            api.runs().get_result_file(run_id=run_id,
                                       file_id=files['run/results/B.json'])
    # -- With an open access policy user 2 can read the data file -------------
    env = Config().basedir(tmpdir).open_access()
    local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine())
    with local_service(user_id=user_2) as api:
        api.runs().get_result_file(run_id=run_id,
                                   file_id=files['run/results/B.json'])
Example #5
0
def read_run_results(run: RunObject, schema: ResultSchema,
                     runstore: StorageVolume):
    """Read the run results from the result file that is specified in the workflow
    result schema. If the file is not found we currently do not raise an error.

    Parameters
    ----------
    run: flowserv.model.base.RunObject
        Handle for a workflow run.
    schema: flowserv.model.template.schema.ResultSchema
        Workflow result schema specification that contains the reference to the
        result file key.
    runstore: flowserv.volume.base.StorageVolume
        Storage volume containing the run (result) files for a successful
        workflow run.
    """
    with runstore.load(schema.result_file).open() as f:
        results = util.read_object(f)
    # Create a dictionary of result values.
    values = dict()
    for col in schema.columns:
        val = util.jquery(doc=results, path=col.jpath())
        col_id = col.column_id
        if val is None and col.required:
            msg = "missing value for '{}'".format(col_id)
            raise err.ConstraintViolationError(msg)
        elif val is not None:
            values[col_id] = col.cast(val)
    run.result = values
Example #6
0
def test_load_config_from_json_file():
    """Test loading worker factory configuration from a Json file."""
    worker = WorkerFactory.load_json(JSON_FILE).get('test')
    assert worker.variables['a'] == 0
    # Passing the file content directly to the object constructor should yield
    # the same result.
    worker = WorkerFactory(util.read_object(JSON_FILE)).get('test')
    assert worker.variables['a'] == 0
Example #7
0
def multi_by_x(filename: str, x: int) -> int:
    """Read input file with single integer value (in Json format) and multiplies
    the value with the given x.

    Expects a Json object with format: {"value": v}

    Returns the multiplication result.
    """
    doc = util.read_object(filename=filename)
    return doc['value'] * x
Example #8
0
def create_group(ctx, workflow, name, members, configfile):
    """Create a new user group."""
    workflow_id = ctx.obj.get_workflow(ctx.params)
    config = util.read_object(configfile) if configfile else None
    with service() as api:
        doc = api.groups().create_group(
            workflow_id=workflow_id,
            name=name,
            members=members.split(',') if members is not None else None,
            engine_config=config)
    group_id = doc[labels.GROUP_ID]
    click.echo('export {}={}'.format(ctx.obj.vars['group'], group_id))
def test_parse_top_tagger_template():
    """Test parsing the Top-Tagger template that contains parameter references
    as workflow steps.
    """
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_TOPTAGGER))
    doc = {'environment': 'test', 'commands': ['python analyze']}
    args = {'tagger': ActorValue(spec=doc)}
    steps, _, _ = parser.parse_template(template=template, arguments=args)
    assert len(steps) == 2
    step = steps[0]
    assert step.image == 'test'
    assert step.commands == ['python analyze']
Example #10
0
    def worker_config(self) -> Union[Dict, List]:
        """Get the configuration settings for workers that are used by the
        serial workflow controller.

        If the configuration is not set an empty dictionary is returned.

        Returns
        -------
        dict of list
        """
        wconf = self.get(FLOWSERV_SERIAL_WORKERS, dict())
        if wconf and isinstance(wconf, str):
            wconf = util.read_object(filename=wconf)
        return wconf if wconf else dict()
Example #11
0
def test_load_file_and_write(store_id, tmpdir):
    """Test getting a previously uploaded file and writing the content to the
    file system.
    """
    # -- Setup ----------------------------------------------------------------
    # Initialize the file store and create the input file structure. Upload
    # only file A.
    fs = create_store(store_id, os.path.join(tmpdir, 'fs'))
    files = create_files(os.path.join(tmpdir, 'data'))
    KEY = '0000'
    fs.store_files(files=[files[0]], dst=KEY)
    # -- Read and write  file A.
    filename = os.path.join(tmpdir, 'tmp')
    fs.load_file(os.path.join(KEY, FILE_A)).store(filename)
    assert util.read_object(filename) == DATA1
Example #12
0
def read_config_obj(filename: Union[str, Dict]) -> Dict:
    """Read configuration object from a file.

    This function only attempts to read an object from disk if the type of the
    filename argument is string.

    Parameters
    ----------
    filename: str or dict
        Path to file on disk.

    Returns
    -------
    dict
    """
    if isinstance(filename, dict):
        return filename
    return util.read_object(filename=filename)
def test_parse_hello_world_notebook_template():
    """Extract commands and output files from the 'Hello world' template
    that included a notebook step.
    """
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_NOTEBOOK))
    steps, args, output_files = parser.parse_template(
        template=template, arguments={'greeting': 'Hey'})
    assert len(steps) == 2
    step = steps[0]
    assert step.notebook == 'notebooks/HelloWorld.ipynb'
    assert step.inputs == ['data/names.txt', 'notebooks/HelloWorld.ipynb']
    assert step.outputs == ['results/greetings.txt']
    assert output_files == ['results/greetings.txt', 'results/analytics.json']
    assert args == {
        'inputfile': 'data/names.txt',
        'outputfile': 'results/greetings.txt',
        'greeting': 'Hey'
    }
Example #14
0
    def __init__(self, basedir):
        """Read the run result index file in the given base directory to
        initialize the result handles.

        Parameters
        ----------
        basedir: string
            Base directory for run result that have benn made available to the
            post-processing workflow
        """
        self.runs = list()
        doc = util.read_object(filename=os.path.join(basedir, base.RUNS_FILE))
        for obj in doc:
            run_id = obj[base.LABEL_ID]
            name = obj[base.LABEL_NAME]
            files = dict()
            for filename in obj[base.LABEL_FILES]:
                files[filename] = os.path.join(basedir, run_id, filename)
            run = Run(run_id=run_id, name=name, files=files)
            self.runs.append(run)
def test_parse_hello_world_template():
    """Extract commands and output files from the 'Hello world' template."""
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_HELLOWORLD))
    steps, args, output_files = parser.parse_template(template=template,
                                                      arguments={
                                                          'names': 'names.txt',
                                                          'sleeptime': 10
                                                      })
    assert len(steps) == 1
    step = steps[0]
    assert step.image == 'python:3.7'
    assert len(step.commands) == 2
    assert output_files == ['results/greetings.txt', 'results/analytics.json']
    assert args == {
        'inputfile': 'names.txt',
        'outputfile': 'results/greetings.txt',
        'sleeptime': 10,
        'greeting': 'Hello'
    }
Example #16
0
def create_workflow(ctx, key, name, description, instructions, specfile,
                    manifest, template, configfile, ignore_postproc):
    """Create a new workflow for a given template."""
    config = util.read_object(configfile) if configfile else None
    with service() as api:
        # The create_workflow() method is only supported by the local API. If
        # an attempte is made to create a new workflow via a remote API an
        # error will be raised.
        doc = api.workflows().create_workflow(
            source=template,
            identifier=key,
            name=name,
            description=description,
            instructions=read_instructions(instructions),
            specfile=specfile,
            manifestfile=manifest,
            engine_config=config,
            ignore_postproc=ignore_postproc)
    workflow_id = doc[labels.WORKFLOW_ID]
    click.echo('export {}={}'.format(ctx.obj.vars['workflow'], workflow_id))
Example #17
0
def read_config(filename: str,
                format: Optional[str] = None,
                validate: Optional[bool] = False) -> Dict:
    """Read worker configuration object from a given file.

    Parameters
    ----------
    filename: str
        Input file name
    format: string, optional
        Optional file format identifier.
    validate: bool, default=True
        Validate the given worker specifications against the `workerSpec`
        schema if True.

    Returns
    -------
    dict
    """
    return convert_config(doc=util.read_object(filename, format=format),
                          validate=validate)
Example #18
0
def main(rundir, outputfile):
    """Write greeting for every name in a given input file to the output file.
    The optional waiting period delays the output between each input name.

    """
    # Read avg_count for all runs in the ranking
    results = list()
    for run in Runs(rundir):
        filename = run.get_file(name='results/analytics.json')
        doc = util.read_object(filename=filename)
        results.append(doc)
    # Write analytics results. Ensure that output directory exists:
    # influenced by http://stackoverflow.com/a/12517490
    if not os.path.exists(os.path.dirname(outputfile)):
        try:
            os.makedirs(os.path.dirname(outputfile))
        except OSError as exc:  # guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    with open(outputfile, "at") as f:
        json.dump(results, f)
def test_parse_hello_world_template():
    """Extract commands and output files from the 'Hello world' template."""
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_HELLOWORLD))
    steps, args, output_files = parser.parse_template(template=template,
                                                      arguments={
                                                          'names': 'names.txt',
                                                          'sleeptime': 10
                                                      })
    assert len(steps) == 1
    step = steps[0]
    assert step.image == 'python:2.7'
    assert len(step.commands) == 1
    assert step.commands[
        0] == '${python} "${helloworld}" --inputfile "${inputfile}" --outputfile "${outputfile}" --sleeptime ${sleeptime}'  # noqa: E501
    assert output_files == ['results/greetings.txt']
    assert args == {
        'helloworld': 'code/helloworld.py',
        'inputfile': 'names.txt',
        'outputfile': 'results/greetings.txt',
        'sleeptime': '10'
    }  # noqa: E501
Example #20
0
def main(rundir, outputfile):
    """
    Create summary of analytics results for all runs.
    """
    # Read avg_count for all runs in the ranking
    results = list()
    for run in Runs(rundir):
        filename = run.get_file(name='results/analytics.json')
        doc = util.read_object(filename=filename)
        results.append(doc)
        # Delay execution to allow for testing running post-processing
        # workflows
        time.sleep(1)
    # Write analytics results. Ensure that output directory exists:
    # influenced by http://stackoverflow.com/a/12517490
    if not os.path.exists(os.path.dirname(outputfile)):
        try:
            os.makedirs(os.path.dirname(outputfile))
        except OSError as exc:  # guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    with open(outputfile, "at") as f:
        json.dump(results, f)
Example #21
0
def read_config_obj(var: str, env: Dict) -> Dict:
    """Read configuration object from a given environment variables.

    If the variable is set and contains a dictionary as value that value is
    returned. Otherwise, it is assumed that the variable references a Json or
    Yaml file that contains the configuration object.

    Parameters
    ----------
    var: string
        Name of the environment variable.
    env: dict
        Dictionary representing the current environment settings.

    Returns
    -------
    dict
    """
    obj = env.get(var)
    if not obj:
        return None
    if isinstance(obj, dict):
        return obj
    return read_object(filename=obj)
Example #22
0
def test_read_write_object(tmpdir):
    """Test reading and writing dictionary objects to file in Json format
    and in Yaml format.
    """
    doc = {'A': 1, 'B': 2, 'C': {'D': 3}}
    json_file = os.path.join(str(tmpdir), 'file.json')
    txt_file = os.path.join(str(tmpdir), 'file.txt')
    yaml_file = os.path.join(str(tmpdir), 'file.yaml')
    # Read and write Json file
    util.write_object(filename=json_file, obj=doc)
    obj = util.read_object(filename=json_file)
    assert obj == doc
    obj = util.read_object(filename=json_file, format=util.FORMAT_JSON)
    assert obj == doc
    util.write_object(filename=json_file, obj=doc, format=util.FORMAT_YAML)
    obj = util.read_object(filename=json_file, format=util.FORMAT_YAML)
    assert obj == doc
    with pytest.raises(JSONDecodeError):
        util.read_object(filename=json_file)
    # Yaml format
    util.write_object(filename=yaml_file, obj=doc)
    obj = util.read_object(filename=yaml_file)
    assert obj == doc
    obj = util.read_object(filename=yaml_file, format=util.FORMAT_YAML)
    assert obj == doc
    util.write_object(filename=yaml_file, obj=doc, format=util.FORMAT_JSON)
    obj = util.read_object(filename=yaml_file, format=util.FORMAT_JSON)
    assert obj == doc
    doc = util.read_object(filename=yaml_file)
    buf = io.BytesIO(str(doc).encode("utf-8"))
    obj = util.read_object(filename=buf, format=util.FORMAT_YAML)
    assert doc == obj
    # The Yaml parser can read Json files
    obj = util.read_object(filename=yaml_file)
    assert obj == doc
    # File with non-standard suffix is written in Yaml format
    util.write_object(filename=txt_file, obj=doc)
    obj = util.read_object(filename=txt_file)
    assert obj == doc
    obj = util.read_object(filename=txt_file, format=util.FORMAT_YAML)
    assert obj == doc
    with pytest.raises(JSONDecodeError):
        util.read_object(filename=txt_file, format=util.FORMAT_JSON)
    with pytest.raises(ValueError):
        util.read_object(filename=txt_file, format='UNKNOWN')
    with pytest.raises(ValueError):
        util.write_object(filename=txt_file, obj=doc, format='UNKNOWN')
def test_postproc_workflow(fsconfig, tmpdir):
    """Execute the modified helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # It is important here that we do not use the SQLite in-memory database
    # since this fails (for unknown reason; presumably due to different threads)
    # when the post-processing run is updated.
    # --
    env = Config().basedir(tmpdir).run_async().auth()
    env.update(fsconfig)
    service = LocalAPIFactory(env=env)
    # Start a new run for the workflow template.
    with service() as api:
        # Need to set the file store in the backend to the new instance as
        # well. Otherwise, the post processing workflow may attempt to use
        # the backend which was initialized prior with a different file store.
        workflow_id = create_workflow(
            api,
            source=TEMPLATE_DIR,
            specfile=SPEC_FILE
        )
        user_id = create_user(api)
    # Create four groups and run the workflow with a slightly different input
    # file
    for i in range(4):
        with service(user_id=user_id) as api:
            group_id = create_group(api, workflow_id)
            names = io_file(data=NAMES[:(i + 1)], format='plain/text')
            file_id = upload_file(api, group_id, names)
            # Set the template argument values
            arguments = [
                serialize_arg('names', serialize_fh(file_id)),
                serialize_arg('greeting', 'Hi')
            ]
            run_id = start_run(api, group_id, arguments=arguments)
        # Poll workflow state every second.
        run = poll_run(service, run_id, user_id)
        assert run['state'] == st.STATE_SUCCESS
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts = 0
        while 'postproc' not in wh:
            time.sleep(1)
            with service() as api:
                wh = api.workflows().get_workflow(workflow_id=workflow_id)
            attmpts += 1
            if attmpts > 60:
                break
        assert 'postproc' in wh
        serialize.validate_workflow_handle(wh)
        attmpts = 0
        while wh['postproc']['state'] in st.ACTIVE_STATES:
            time.sleep(1)
            with service() as api:
                wh = api.workflows().get_workflow(workflow_id=workflow_id)
            attmpts += 1
            if attmpts > 60:
                break
        serialize.validate_workflow_handle(wh)
        with service() as api:
            ranking = api.workflows().get_ranking(workflow_id=workflow_id)
        serialize.validate_ranking(ranking)
        for fobj in wh['postproc']['files']:
            if fobj['name'] == 'results/compare.json':
                file_id = fobj['id']
        with service(user_id=user_id) as api:
            fh = api.workflows().get_result_file(
                workflow_id=workflow_id,
                file_id=file_id
            )
        compare = util.read_object(fh.open())
        assert len(compare) == (i + 1)
    # Access the post-processing result files.
    with service() as api:
        fh = api.workflows().get_result_archive(workflow_id=workflow_id)
    assert fh.name.startswith('run')
    assert fh.mime_type == 'application/gzip'
Example #24
0
    def load(basedir,
             manifestfile=None,
             name=None,
             description=None,
             instructions=None,
             specfile=None,
             existing_names=set()):
        """Read the workflow manifest from file. By default, an attempt is made
        to read a file with one the following names in the basedir (in the
        given order): flowserv.json, flowserv.yaml, flowserv.yml. If the
        manifest file parameter is given the specified file is being read
        instead.

        The parameters name, description, instructions, and specfile are used
        to override the respective properties in the manifest file.

        Raises a ValueError if no manifest file is found or if no name or
        workflow specification is present in the resulting manifest object.

        Parameters
        ----------
        basedir: string
            Path to the base directory containing the workflow files. This
            directory is used when reading the manifest file (if not given as
            argument) and the instructions file (if not given as argument).
        manifestfile: string, default=None
            Path to manifest file. If not given an attempt is made to read one
            of the default manifest file names in the base directory.
        name: string
            Unique workflow name
        description: string
            Optional short description for display in workflow listings
        instructions: string
            File containing instructions for workflow users.
        specfile: string
            Path to the workflow template specification file (absolute or
            relative to the workflow directory)
        existing_names: set, default=set()
            Set of names for existing projects.

        Returns
        -------
        flowserv.model.workflow.manifest.WorkflowManifest

        Raises
        ------
        IOError, OSError, ValueError, flowserv.error.InvalidManifestError
        """
        doc = dict()
        if manifestfile is not None:
            doc = util.read_object(manifestfile)
        else:
            # Attempt to read default manifest files.
            for filename in MANIFEST_FILES:
                filename = os.path.join(basedir, filename)
                if os.path.isfile(filename):
                    doc = util.read_object(filename)
                    break
        # Validate the the manifest file.
        try:
            util.validate_doc(doc,
                              optional=[
                                  'name', 'description', 'instructions',
                                  'files', 'specfile'
                              ])
            for obj in doc.get('files', []):
                util.validate_doc(obj,
                                  mandatory=['source'],
                                  optional=['target'])
        except ValueError as ex:
            raise err.InvalidManifestError(str(ex))
        # Override metadata with given arguments
        if name is not None:
            doc['name'] = name
        if description is not None:
            doc['description'] = description
        # Raise error if no name or no workflow specification is present.
        if 'name' not in doc:
            raise err.InvalidManifestError('missing name')
        if 'specfile' not in doc and specfile is None:
            raise err.InvalidManifestError('missing workflow specification')
        # Ensure that the name is valid an unique.
        doc['name'] = unique_name(doc['name'], existing_names)
        # Read the instructions file if specified.
        if instructions is not None or 'instructions' in doc:
            filename = getfile(basedir=basedir,
                               manifest_value=doc.get('instructions'),
                               user_argument=instructions)
            with open(filename, 'r') as f:
                doc['instructions'] = f.read().strip()
        # Get the workflow specification file.
        filename = getfile(basedir=basedir,
                           manifest_value=doc.get('specfile'),
                           user_argument=specfile)
        return WorkflowManifest(basedir=basedir,
                                name=doc['name'],
                                workflow_spec=util.read_object(filename),
                                description=doc.get('description'),
                                instructions=doc.get('instructions'),
                                files=doc.get('files'))
Example #25
0
def test_load_config_from_file():
    """Test loading worker factory configuration from a file."""
    # Passing the file content directly to the object constructor should work
    # the same as using the static load method.
    worker = WorkerFactory(util.read_object(JSON_FILE)).get('test')
    assert worker.variables['a'] == 0