Esempio n. 1
0
def test_engine_volume_manager(tmpdir):
    """Test creating the volume manager for a workflow run from the engine
    configuration and the default run store.
    """
    runstore = FileSystemStorage(basedir=tmpdir, identifier=DEFAULT_STORE)
    # Minimal arguments.
    volumes = volume_manager(specs=[], runstore=runstore, runfiles=[])
    assert len(volumes._storespecs) == 1
    assert len(volumes.files) == 0
    # Only runstore given.
    volumes = volume_manager(specs=[], runstore=runstore, runfiles=['a', 'b'])
    assert len(volumes._storespecs) == 1
    assert volumes.files['a'] == [DEFAULT_STORE]
    assert volumes.files['b'] == [DEFAULT_STORE]
    # Multiple stores with files.
    doc_ignore = runstore.to_dict()
    doc_ignore['files'] = ['c', 'd']
    doc_fs = FStore(basedir=tmpdir, identifier='s0')
    doc_fs['files'] = ['a', 'c']
    volumes = volume_manager(
        specs=[doc_ignore, doc_fs,
               FStore(basedir=tmpdir, identifier='s1')],
        runstore=runstore,
        runfiles=['a', 'b'])
    assert len(volumes._storespecs) == 3
    assert volumes.files['a'] == [DEFAULT_STORE, 's0']
    assert volumes.files['b'] == [DEFAULT_STORE]
    assert volumes.files['c'] == ['s0']
    assert volumes.files.get('d') is None
def test_manager_update(tmpdir):
    """Test the update method for the volume manager."""
    volumes = VolumeManager(
        stores=[
            FStore(basedir=tmpdir, identifier=DEFAULT_STORE),
            FStore(basedir=tmpdir, identifier='s1')
        ],
        files={'f1': [DEFAULT_STORE]}
    )
    default_store = volumes.get(identifier=DEFAULT_STORE)
    s1 = volumes.get(identifier='s1')
    assert volumes.files == {'f1': [DEFAULT_STORE]}
    volumes.update(files=['f1', 'f2'], store=s1)
    assert volumes.files == {'f1': ['s1'], 'f2': ['s1']}
    volumes.update(files=['f2'], store=default_store)
    assert volumes.files == {'f1': ['s1'], 'f2': [DEFAULT_STORE]}
    volumes.update(files=['f2'], store=s1)
    assert volumes.files == {'f1': ['s1'], 'f2': ['s1']}
def test_base_algorithm_run(dataset, tmpdir):
    """Step through base algorithm execute method with an empty workflow."""
    workflow = SerialWorkflow()
    r = run_workflow(workflow=workflow,
                     arguments={},
                     df=dataset,
                     worker=Subprocess(),
                     volume=FStore(basedir=str(tmpdir)))
    assert r.returncode is None
def test_manager_init(tmpdir):
    """Test edge cases for the volume manager initialization."""
    default_store = FStore(basedir=tmpdir, identifier=DEFAULT_STORE)
    # Ensure we can instantiate the volume manager if a default store is given.
    volumes = VolumeManager(stores=[default_store])
    assert volumes.files == dict()
    volumes = VolumeManager(stores=[default_store], files={'f1': [DEFAULT_STORE]})
    assert volumes.files == {'f1': [DEFAULT_STORE]}
    # Error cases when no default store is given.
    with pytest.raises(ValueError):
        VolumeManager(stores=list())
    with pytest.raises(ValueError):
        VolumeManager(stores=[FStore(basedir=tmpdir, identifier='0000')])
    # Error for unknown storage volume.
    with pytest.raises(err.UnknownObjectError):
        VolumeManager(
            stores=[default_store],
            files={'f1': ['unknown']}
        )
Esempio n. 5
0
def DefaultVolume(basedir: str) -> VolumeManager:
    """Helper method to create a volume manager with a single file system store
    as the default store.

    Parameters
    ----------
    basedir: str
        Base directory for the created file system store.

    Returns
    -------
    flowserv.volume.manager.VolumeManager
    """
    return VolumeManager(
        stores=[FStore(basedir=basedir, identifier=DEFAULT_STORE)])
Esempio n. 6
0
"""

ENV = [(FLOWSERV_BASEDIR, API_DEFAULTDIR(), None),
       (FLOWSERV_API_HOST, DEFAULT_HOST, None),
       (FLOWSERV_API_NAME, DEFAULT_NAME, None),
       (FLOWSERV_API_PATH, DEFAULT_PATH, None),
       (FLOWSERV_API_PORT, DEFAULT_PORT, to_int),
       (FLOWSERV_API_PROTOCOL, DEFAULT_PROTOCOL, None),
       (FLOWSERV_APP, None, None),
       (FLOWSERV_AUTH_LOGINTTL, DEFAULT_LOGINTTL, to_int),
       (FLOWSERV_AUTH, AUTH_DEFAULT, None),
       (FLOWSERV_BACKEND_CLASS, None, None),
       (FLOWSERV_BACKEND_MODULE, None, None),
       (FLOWSERV_POLL_INTERVAL, DEFAULT_POLL_INTERVAL, to_float),
       (FLOWSERV_ACCESS_TOKEN, None, None),
       (FLOWSERV_CLIENT, LOCAL_CLIENT, None), (FLOWSERV_DB, None, None),
       (FLOWSERV_WEBAPP, 'False', to_bool),
       (FLOWSERV_FILESTORE, FStore(basedir=API_DEFAULTDIR()), read_config_obj)]


def env() -> Config:
    """Get configuration parameters from the environment."""
    config = Config()
    for var, default, cast in ENV:
        value = os.environ.get(var, default)
        if value is not None:
            if cast is not None:
                value = cast(value)
            config[var] = value
    return config
Esempio n. 7
0
def run_workflow(workflow: SerialWorkflow,
                 arguments: Dict,
                 df: pd.DataFrame,
                 worker: Optional[Dict] = None,
                 volume: Optional[Dict] = None,
                 managers: Optional[Dict] = None,
                 verbose: Optional[bool] = True) -> RunResult:
    """Run a given workflow representing a Metanome profiling algorithm on the
    given data frame.

    Returns the run result. If execution of the Metanome algorithm fails a
    RuntimeError will be raised.

    This implementation assumes that all algorithms operate on a single input
    file that contains a serialization of the data frame and that they all
    produce a single output file in Json format.

    Parameters
    ----------
    workflow: flowserv.controller.serial.workflow.base.SerialWorkflow
        Serial workflow to run a Metanome profiling algorithm on a given data
        frame.
    arguments: dict
        Dictionary of algorithm-specific input arguments.
    df: pd.DataFrame
        Input data frame.
    worker: dict, default=None
        Optional configuration for the main worker.
    volume: dict, default=None
        Optional configuration for the volume that is associated with the main
        worker.
    managers: dict, default=None
        Mapping of workflow step identifier to the worker that is used to
        execute them.
    verbose: bool, default=True
        Output run logs if True.

    Returns
    -------
    flowserv.controller.serial.workflow.result.RunResult
    """
    # Create a temporary run directory for input and output files.
    rundir = tempfile.mkdtemp()
    # Create a subfolder for input and output files. This is important when
    # running the workflow in a Docker container since these folders will
    # be mounted automatically as volumes into the container to provide
    # access to the files.
    os.makedirs(os.path.join(rundir, 'data'))
    # Create a copy of the workflow-specific arguments and add the data frame
    # and the input and output files.
    args = dict(arguments)
    args['df'] = df
    args['inputfile'] = DATA_FILE
    args['outputfile'] = RESULT_FILE
    # Create factory objects for storage volumes.
    stores = [FStore(basedir=rundir, identifier=DEFAULT_STORE)]
    if volume:
        stores.append(volume)
    volumes = VolumeManager(stores=stores, files=[])
    # Create factory for workers. Include mapping of workflow steps to
    # the worker that are responsible for their execution.
    workers = WorkerPool(workers=[worker] if worker else [], managers=managers)
    # Run the workflow and return the result. Make sure to cleanup the temporary
    # run filder. This assumes that the workflow steps have read any output
    # file into main memory or copied it to a target destination.
    try:
        r = workflow.run(arguments=args, workers=workers, volumes=volumes)
        # Output STDOUT and STDERR before raising a potential error.
        if verbose:
            for line in r.log:
                print(line)
        # Raise error if run execution was not successful.
        r.raise_for_status()
        return r
    finally:
        # Remove the created run directory.
        shutil.rmtree(rundir)
Esempio n. 8
0
def local_service(database, tmpdir):
    """Create a local API factory for test purposes."""
    env = Config().basedir(tmpdir).volume(FStore(basedir=str(tmpdir))).auth()
    return LocalAPIFactory(env=env, db=database, engine=StateEngine())
def test_run_with_two_steps(tmpdir):
    """Test executing a sequence of two code steps that operate on the same
    file in different storage volumes.
    """
    # -- Setup ----------------------------------------------------------------
    # Create two separate storage volumes.
    vol1_dir = os.path.join(tmpdir, 'v1')
    os.makedirs(vol1_dir)
    vol2_dir = os.path.join(tmpdir, 'v2')
    volumes = VolumeManager(stores=[
        FStore(basedir=vol1_dir, identifier=DEFAULT_STORE),
        FStore(basedir=vol2_dir, identifier='v2')
    ],
                            files={'data.json': [DEFAULT_STORE]})
    # Create data.json file in v1.
    with open(os.path.join(vol1_dir, 'data.json'), 'w') as f:
        json.dump({"value": 5}, f)
    # Use separate workers for each step.
    workers = WorkerPool(workers=[
        Code(identifier='w1', volume=DEFAULT_STORE),
        Code(identifier='w2', volume='v2')
    ],
                         managers={
                             's1': 'w1',
                             's2': 'w2'
                         })
    # Create workflow steps.
    steps = [
        CodeStep(identifier='s1',
                 func=multi_by_x,
                 arg='s1',
                 varnames={'x': 'x1'},
                 inputs=['data.json']),
        CodeStep(identifier='s2',
                 func=multi_by_x,
                 arg='s2',
                 varnames={'x': 'x2'},
                 inputs=['data.json'])
    ]
    # Initialize the workflow context arguments.
    arguments = {'filename': 'data.json', 'x1': 2, 'x2': 3}
    # -- Test workflow run ----------------------------------------------------
    run_result = exec_workflow(steps=steps,
                               workers=workers,
                               volumes=volumes,
                               result=RunResult(arguments=arguments))
    assert len(run_result.steps) == 2
    assert run_result.context == {
        'filename': 'data.json',
        'x1': 2,
        'x2': 3,
        's1': 10,
        's2': 15
    }
    assert os.path.isfile(os.path.join(vol2_dir, 'data.json'))
    # Error case.
    os.unlink(os.path.join(vol1_dir, 'data.json'))
    run_result = exec_workflow(steps=steps,
                               workers=workers,
                               volumes=volumes,
                               result=RunResult(arguments=arguments))
    assert len(run_result.steps) == 1
    assert run_result.context == {'filename': 'data.json', 'x1': 2, 'x2': 3}