def test_fs_volume_path_expression():
    """Test getting a path object for a file that is relative to the base
    directory of the storage volume.
    """
    volume = FileSystemStorage(basedir=BENCHMARK_DIR)
    path = volume.path('helloworld', 'code', 'helloworld.py')
    assert path.is_file()
Example #2
0
def test_engine_volume_manager(tmpdir):
    """Test creating the volume manager for a workflow run from the engine
    configuration and the default run store.
    """
    runstore = FileSystemStorage(basedir=tmpdir, identifier=DEFAULT_STORE)
    # Minimal arguments.
    volumes = volume_manager(specs=[], runstore=runstore, runfiles=[])
    assert len(volumes._storespecs) == 1
    assert len(volumes.files) == 0
    # Only runstore given.
    volumes = volume_manager(specs=[], runstore=runstore, runfiles=['a', 'b'])
    assert len(volumes._storespecs) == 1
    assert volumes.files['a'] == [DEFAULT_STORE]
    assert volumes.files['b'] == [DEFAULT_STORE]
    # Multiple stores with files.
    doc_ignore = runstore.to_dict()
    doc_ignore['files'] = ['c', 'd']
    doc_fs = FStore(basedir=tmpdir, identifier='s0')
    doc_fs['files'] = ['a', 'c']
    volumes = volume_manager(
        specs=[doc_ignore, doc_fs,
               FStore(basedir=tmpdir, identifier='s1')],
        runstore=runstore,
        runfiles=['a', 'b'])
    assert len(volumes._storespecs) == 3
    assert volumes.files['a'] == [DEFAULT_STORE, 's0']
    assert volumes.files['b'] == [DEFAULT_STORE]
    assert volumes.files['c'] == ['s0']
    assert volumes.files.get('d') is None
def test_multi_success_runs(database, tmpdir):
    """Test rankings for workflows where each group has multiple successful
    runs.
    """
    # -- Setup ----------------------------------------------------------------
    # Create database with two workflows and four grous each. Each group has
    # three active runs. Then set all runs for the first workflow into success
    # state. Increase a counter for the avg_len value as we update runs.
    workflows = init(database, tmpdir)
    fs = FileSystemStorage(basedir=tmpdir)
    workflow_id, groups = workflows[0]
    count = 0
    asc_order = list()
    count_order = list()
    desc_order = list()
    with database.session() as session:
        for group_id, runs in groups:
            for i, run_id in enumerate(runs):
                tmprundir = os.path.join(tmpdir, 'runs', run_id)
                run_success(run_manager=RunManager(session=session, fs=fs),
                            run_id=run_id,
                            store=fs.get_store_for_folder(key=tmprundir),
                            values={
                                'count': count,
                                'avg': 1.0,
                                'name': run_id
                            })
                count += 1
                if i == 0:
                    asc_order.append(run_id)
                count_order.append(run_id)
            desc_order.append(run_id)
    # -- Test get ranking with one result per group ---------------------------
    with database.session() as session:
        wfrepo = WorkflowManager(session=session, fs=fs)
        rankings = RankingManager(session=session)
        wf = wfrepo.get_workflow(workflow_id)
        ranking = rankings.get_ranking(wf)
        rank_order = [e.run_id for e in ranking]
        assert rank_order == desc_order[::-1]
        ranking = rankings.get_ranking(
            wf, order_by=[SortColumn(column_id='count', sort_desc=False)])
        rank_order = [e.run_id for e in ranking]
        assert rank_order == asc_order
        # Run execution time
        assert type(ranking[0].exectime()) == timedelta
    # -- Test get ranking with all results per group --------------------------
    with database.session() as session:
        wfrepo = WorkflowManager(session=session, fs=fs)
        rankings = RankingManager(session=session)
        wf = wfrepo.get_workflow(workflow_id)
        ranking = rankings.get_ranking(wf, include_all=True)
        rank_order = [e.run_id for e in ranking]
        assert rank_order == count_order[::-1]
        ranking = rankings.get_ranking(
            wf,
            order_by=[SortColumn(column_id='count', sort_desc=False)],
            include_all=True)
        rank_order = [e.run_id for e in ranking]
        assert rank_order == count_order
def test_fs_volume_serialization():
    """Test serialization for a file system storage volume object."""
    doc = FileSystemStorage(basedir='.', identifier='0000').to_dict()
    assert doc == {'type': FS_STORE, 'id': '0000', 'args': [{'key': 'basedir', 'value': '.'}]}
    fs = FileSystemStorage.from_dict(doc)
    assert isinstance(fs, FileSystemStorage)
    assert fs.identifier == '0000'
    assert fs.basedir == '.'
def test_fs_volume_load_file(basedir, data_e):
    """Test loading a file from a file system storage volume."""
    store = FileSystemStorage(basedir=basedir)
    with store.load(key='examples/data/data.json').open() as f:
        doc = json.load(f)
    assert doc == data_e
    # -- Error case for unknown file.
    with pytest.raises(err.UnknownFileError):
        store.load(key='examples/data/unknown.json')
def test_fixed_variables(tmpdir):
    """Test proper behavior for setting fixed variables in the worker environment."""
    step = ContainerStep(identifier='test', image='test', commands=['${python} $run $me'])
    arguments = {'run': 'my_model.py', 'me': 1}
    engine = ContainerTestEngine(variables=dict())
    engine.exec(step=step, context=arguments, store=FileSystemStorage(tmpdir))
    assert engine.commands == ['python my_model.py 1']
    engine = ContainerTestEngine(variables={'run': 'static.py'})
    engine.exec(step=step, context=arguments, store=FileSystemStorage(tmpdir))
    assert engine.commands == ['python static.py 1']
Example #7
0
def test_remote_volume_copy_all(mock_ssh, basedir, emptydir, filenames_all,
                                data_a):
    """Test copying the full directory of a storage volume."""
    source = FileSystemStorage(basedir=basedir)
    with ssh.ssh_client('test', sep=os.sep) as client:
        target = RemoteStorage(remotedir=emptydir, client=client)
        source.copy(src=None, dst=None, store=target)
        files = {key: file for key, file in target.walk(src='')}
    assert set(files.keys()) == filenames_all
    with files['A.json'].open() as f:
        assert json.load(f) == data_a
Example #8
0
def test_remote_volume_copy_file(mock_ssh, basedir, emptydir, data_e):
    """Test copying a file from a storage volume."""
    source = FileSystemStorage(basedir=basedir)
    with ssh.ssh_client('test', sep=os.sep) as client:
        target = RemoteStorage(remotedir=emptydir, client=client)
        source.copy(src='examples/data/data.json',
                    dst='static/examples/data/data.json',
                    store=target)
        files = {key: file for key, file in target.walk(src='static')}
    assert set(files.keys()) == {'static/examples/data/data.json'}
    with files['static/examples/data/data.json'].open() as f:
        assert json.load(f) == data_e
def test_fs_volume_init(basedir):
    """Test initializing the file system storage volume."""
    store = FileSystemStorage(basedir=basedir)
    assert store.identifier is not None
    assert basedir in store.describe()
    store.close()
    store = FileSystemStorage(basedir=basedir, identifier='0000')
    assert store.identifier == '0000'
    store.close()
def test_upload_file(database, tmpdir):
    """Test uploading files."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create a database with two groups for a single workflow. Upload one file
    # for each group.
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_1 = model.create_user(session, active=True)
        workflow_id = model.create_workflow(session)
        group_1 = model.create_group(session, workflow_id, users=[user_1])
    # -- Test upload file -----------------------------------------------------
    data = {'A': 1}
    with database.session() as session:
        manager = WorkflowGroupManager(session=session, fs=fs)
        fh = manager.upload_file(group_id=group_1,
                                 file=io_file(data={'A': 1}),
                                 name='A.json')
        assert fh.name == 'A.json'
        assert fh.mime_type == 'application/json'
        fh = manager.get_uploaded_file(group_id=group_1, file_id=fh.file_id)
        assert json.load(fh.open()) == data
    # -- Test error case ------------------------------------------------------
    data = {'A': 1}
    with database.session() as session:
        with pytest.raises(err.ConstraintViolationError):
            manager.upload_file(group_id=group_1,
                                file=io_file(data={'A': 1}),
                                name=' ')
        with pytest.raises(err.UnknownWorkflowGroupError):
            manager.upload_file(group_id='UNKNOWN',
                                file=io_file(data={'A': 1}),
                                name=' ')
Example #11
0
def test_list_runs(database, tmpdir):
    """Test retrieving a list of run descriptors."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create two runs: one in running state and one in error state.
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_id = model.create_user(session, active=True)
        workflow_id = model.create_workflow(session)
        group_id = model.create_group(session, workflow_id, users=[user_id])
        groups = WorkflowGroupManager(session=session, fs=fs)
        runs = RunManager(session=session, fs=fs)
        group = groups.get_group(group_id)
        # Run 1 in running state
        r = runs.create_run(group=group)
        run_1 = r.run_id
        runs.update_run(run_id=run_1, state=r.state().start())
        r = runs.create_run(group=group)
        run_2 = r.run_id
        runs.update_run(run_id=run_2, state=r.state().error())
    # -- Test get listing -----------------------------------------------------
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        run_index = dict()
        for run in runs.list_runs(group_id):
            run_index[run.run_id] = run
        assert len(run_index) == 2
        assert run_index[run_1].state().is_running()
        assert run_index[run_2].state().is_error()
    # -- Test polling runs ----------------------------------------------------
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        assert len(runs.list_runs(group_id)) == 2
        assert len(runs.list_runs(group_id, state=st.STATE_ERROR)) == 1
        assert len(runs.list_runs(group_id, state=st.STATE_SUCCESS)) == 0
def test_workflow_leaderboard_serialization(database, tmpdir):
    """Test serialization of a workflow leaderboard."""
    schema = validator('WorkflowLeaderboard')
    view = WorkflowSerializer()
    with database.session() as session:
        manager = WorkflowManager(session=session,
                                  fs=FileSystemStorage(basedir=tmpdir))
        workflow = manager.create_workflow(source=BENCHMARK_DIR,
                                           name='Test',
                                           specfile=SPEC_FILE)
        ts = util.utc_now()
        ranking = [
            RunResult(run_id='0',
                      group_id='1',
                      group_name='A',
                      created_at=ts,
                      started_at=ts,
                      finished_at=ts,
                      values={
                          'len': 1,
                          'count': 10
                      })
        ]
        doc = view.workflow_leaderboard(workflow, ranking=ranking)
        schema.validate(doc)
Example #13
0
def test_obsolete_runs(database, tmpdir):
    """Test deleting runs that were created before a given date."""
    # -- Setup ----------------------------------------------------------------
    fs = FileSystemStorage(basedir=tmpdir)
    # Create two runs (one SUCCESS and one ERROR) before a timestamp t1
    _, _, run_1, _ = success_run(database, fs, tmpdir)
    _, _, run_2 = error_run(database, fs, ['There were errors'])
    time.sleep(1)
    t1 = util.utc_now()
    # Create another SUCCESS run after timestamp t1
    _, _, run_3, _ = success_run(database, fs, tmpdir)
    # -- Test delete run with state filter ------------------------------------
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        assert runs.delete_obsolete_runs(date=t1, state=st.STATE_ERROR) == 1
        # After deleting the error run the two success runs still exist.
        runs.get_run(run_id=run_1)
        with pytest.raises(err.UnknownRunError):
            runs.get_run(run_id=run_2)
        runs.get_run(run_id=run_3)
    # -- Test delete all runs prior to a given date ---------------------------
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        assert runs.delete_obsolete_runs(date=t1) == 1
        # After deleting the run the only one success runs still exist.
        with pytest.raises(err.UnknownRunError):
            runs.get_run(run_id=run_1)
        runs.get_run(run_id=run_3)
def test_list_files(database, tmpdir):
    """Test listing uploaded files."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create a database with two groups for a single workflow. The first group
    # has one uploaded file and the second group has one file.
    fn = 'data.json'
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_1 = model.create_user(session, active=True)
        workflow_id = model.create_workflow(session)
        group_1 = model.create_group(session, workflow_id, users=[user_1])
        group_2 = model.create_group(session, workflow_id, users=[user_1])
        manager = WorkflowGroupManager(session=session, fs=fs)
        manager.upload_file(group_id=group_1,
                            file=io_file(data={'A': 1}),
                            name=fn)
        manager.upload_file(group_id=group_1,
                            file=io_file(data={'A': 2}),
                            name=fn)
        manager.upload_file(group_id=group_2,
                            file=io_file(data={'A': 3}),
                            name=fn)
    # -- Test list files for groups -------------------------------------------
    with database.session() as session:
        manager = WorkflowGroupManager(session=session, fs=fs)
        files = manager.list_uploaded_files(group_id=group_1)
        assert len(files) == 2
        files = manager.list_uploaded_files(group_id=group_2)
        assert len(files) == 1
Example #15
0
def test_run_parameters(database, tmpdir):
    """Test creating run with template arguments."""
    # -- Setup ----------------------------------------------------------------
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_id = model.create_user(session, active=True)
        workflow_id = model.create_workflow(session)
        group_id = model.create_group(session, workflow_id, users=[user_id])
    # Prepare run arguments
    filename = os.path.join(str(tmpdir), 'results.json')
    util.write_object(filename=filename, obj={'A': 1})
    arguments = [{'id': 'A', 'value': 10}, {'id': 'B', 'value': True}]
    # -- Test create run with arguments ---------------------------------------
    with database.session() as session:
        groups = WorkflowGroupManager(session=session, fs=fs)
        runs = RunManager(session=session, fs=fs)
        run = runs.create_run(
            group=groups.get_group(group_id),
            arguments=arguments
        )
        run_id = run.run_id
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        run = runs.get_run(run_id)
        assert run.arguments == arguments
def test_workflow_postproc_client(local_service, hello_world, tmpdir):
    """Test preparing and accessing post-processing results."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create four groups for the 'Hello World' workflow with one successful
    # run each.
    with local_service() as api:
        user_1 = create_user(api)
        workflow_id = hello_world(api).workflow_id
    with local_service(user_id=user_1) as api:
        create_ranking(api, workflow_id, 4)
    # -- Get ranking in decreasing order of avg_count. ------------------------
    with local_service(user_id=user_1) as api:
        workflow = api.workflows().workflow_repo.get_workflow(workflow_id)
        ranking = api.workflows().ranking_manager.get_ranking(workflow)
        # Prepare data for the post-processing workflow.
        prepare_postproc_data(
            input_files=['results/analytics.json'],
            ranking=ranking,
            run_manager=api.runs().run_manager,
            store=FileSystemStorage(
                basedir=os.path.join(tmpdir, 'postproc_run')))
        # Test the post-processing client that accesses the prepared data.
        runs = Runs(os.path.join(tmpdir, 'postproc_run'))
        assert len(runs) == 4
        assert [r.run_id for r in ranking] == [r.run_id for r in runs]
        for i in range(len(runs)):
            run = runs.get_run(runs.at_rank(i).run_id)
            assert run.get_file(name='results/analytics.json') is not None
            assert os.path.isfile(run.get_file(name='results/analytics.json'))
            assert run.get_file(name='results/greeting.txt') is None
Example #17
0
def test_cancel_run(database, tmpdir):
    """Test setting run state to canceled."""
    # -- Setup ----------------------------------------------------------------
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_id = model.create_user(session, active=True)
        workflow_id = model.create_workflow(session)
        group_id = model.create_group(session, workflow_id, users=[user_id])
    # -- Test set run to error state ------------------------------------------
    with database.session() as session:
        groups = WorkflowGroupManager(session=session, fs=fs)
        runs = RunManager(session=session, fs=fs)
        run = runs.create_run(group=groups.get_group(group_id))
        run_id = run.run_id
        state = run.state()
        runs.update_run(run_id=run_id, state=state.cancel())
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        run = runs.get_run(run_id)
        state = run.state()
        assert not state.is_active()
        assert not state.is_pending()
        assert not state.is_running()
        assert state.is_canceled()
        assert not state.is_error()
        assert not state.is_success()
        assert len(state.messages) == 1
Example #18
0
def test_invalid_state_transitions(database, tmpdir):
    """Test error cases for invalid state transitions."""
    # -- Setup ----------------------------------------------------------------
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_id = model.create_user(session, active=True)
        workflow_id = model.create_workflow(session)
        group_id = model.create_group(session, workflow_id, users=[user_id])
    # -- Test set active run to pending ---------------------------------------
    with database.session() as session:
        groups = WorkflowGroupManager(session=session, fs=fs)
        runs = RunManager(session=session, fs=fs)
        run = runs.create_run(group=groups.get_group(group_id))
        run_id = run.run_id
        state = run.state()
        runs.update_run(run_id=run_id, state=state.start())
        with pytest.raises(err.ConstraintViolationError):
            runs.update_run(run_id=run_id, state=st.StatePending())
    # Cancel run
    with database.session() as session:
        runs = RunManager(session=session, fs=fs)
        runs.update_run(run_id=run_id, state=state.cancel())
    # -- Test cannot set run to any of the inactive states --------------------
    with database.session() as session:
        groups = WorkflowGroupManager(session=session, fs=fs)
        runs = RunManager(session=session, fs=fs)
        assert runs.update_run(run_id=run_id, state=state.cancel()) is None
        with pytest.raises(err.ConstraintViolationError):
            runs.update_run(run_id=run_id, state=state.error())
        with pytest.raises(err.ConstraintViolationError):
            runs.update_run(run_id=run_id, state=state.success())
def test_get_workflow(database, tmpdir):
    """Test retrieving workflows from the repository."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create two workflows.
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(name='A', source=BENCHMARK_DIR)
        workflow_1 = wf.workflow_id
        wf = manager.create_workflow(name='B',
                                     description='Workflow B',
                                     source=BENCHMARK_DIR,
                                     instructions=INSTRUCTION_FILE,
                                     specfile=TEMPLATE_WITHOUT_SCHEMA)
        workflow_2 = wf.workflow_id
    # -- Test getting workflow handles ----------------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.get_workflow(workflow_1)
        assert wf.name == 'A'
        assert wf.description == 'Hello World Demo'
        assert wf.instructions is not None
        template = wf.get_template()
        assert template.result_schema is not None
        wf = manager.get_workflow(workflow_2)
        assert wf.name == 'B'
        assert wf.description == 'Workflow B'
        assert wf.instructions == '# Hello World'
        template = wf.get_template()
        assert template.result_schema is None
def test_create_workflow_with_alt_spec(database, tmpdir):
    """Test creating workflows with alternative specification files."""
    # -- Setup ----------------------------------------------------------------
    fs = FileSystemStorage(basedir=tmpdir)
    # -- Template without schema ----------------------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(source=BENCHMARK_DIR,
                                     specfile=TEMPLATE_WITHOUT_SCHEMA)
        workflow_id = wf.workflow_id
        assert wf.name == 'Hello World'
        template = wf.get_template()
        assert template.result_schema is None
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.get_workflow(workflow_id=workflow_id)
        assert wf.name == 'Hello World'
        template = wf.get_template()
        assert template.result_schema is None
    # -- Template with post-processing step -----------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(name='Top Tagger',
                                     source=BENCHMARK_DIR,
                                     specfile=TEMPLATE_TOPTAGGER)
        workflow_id = wf.workflow_id
        assert wf.get_template().postproc_spec is not None
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.get_workflow(workflow_id=workflow_id)
        assert wf.get_template().postproc_spec is not None
def test_update_workflow_name(database, tmpdir):
    """Test updating workflow names."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create two workflow templates. Workflow 1 does not have a description
    # and instructions while workflow 2 has.
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        # Initialize the repository
        wf = manager.create_workflow(name='A', source=BENCHMARK_DIR)
        workflow_1 = wf.workflow_id
        wf = manager.create_workflow(name='My benchmark',
                                     description='desc',
                                     instructions=INSTRUCTION_FILE,
                                     source=BENCHMARK_DIR)
        workflow_2 = wf.workflow_id
    # -- Test update workflow name --------------------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.update_workflow(workflow_id=workflow_1, name='B')
        assert wf.name == 'B'
        # It is possible to change the name to an existing name only if it is
        # the same workflow.
        wf = manager.update_workflow(workflow_id=workflow_2,
                                     name='My benchmark')
        assert wf.name == 'My benchmark'
    # -- Error cases ----------------------------------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        # Cannot change name to existing name.
        with pytest.raises(err.ConstraintViolationError):
            manager.update_workflow(workflow_id=workflow_2, name='B')
Example #22
0
def test_delete_group(database, tmpdir):
    """Test creating and deleting workflow groups."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create a database with two groups for a single workflow.
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        user_id = model.create_user(session, active=True)
        wf_id = model.create_workflow(session)
        manager = WorkflowGroupManager(session=session, fs=fs)
        group_1 = manager.create_group(workflow_id=wf_id,
                                       name='A',
                                       user_id=user_id,
                                       parameters=ParameterIndex(),
                                       workflow_spec=dict()).group_id
        group_2 = manager.create_group(workflow_id=wf_id,
                                       name='B',
                                       user_id=user_id,
                                       parameters=ParameterIndex(),
                                       workflow_spec=dict()).group_id
    # -- Delete group ---------------------------------------------------------
    with database.session() as session:
        # Ensure that group directores are deleted.
        manager = WorkflowGroupManager(session=session, fs=fs)
        manager.delete_group(group_1)
        # Access to group 1 raises error while group 2 is still accessible.
        with pytest.raises(err.UnknownWorkflowGroupError):
            manager.get_group(group_1)
        assert manager.get_group(group_2) is not None
def test_delete_workflow(database, tmpdir):
    """Test deleting a workflows from the repository."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create two workflows.
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(name='A', source=BENCHMARK_DIR)
        workflow_1 = wf.workflow_id
        wf = manager.create_workflow(name='B', source=BENCHMARK_DIR)
        workflow_2 = wf.workflow_id
    # -- Test delete first workflow -------------------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        manager.delete_workflow(workflow_1)
    with database.session() as session:
        # The second workflow still exists.
        manager = WorkflowManager(session=session, fs=fs)
        manager.get_workflow(workflow_2) is not None
    # -- Deleting the same repository multiple times raises an error ----------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        with pytest.raises(err.UnknownWorkflowError):
            manager.delete_workflow(workflow_id=workflow_1)
Example #24
0
def Volume(doc: Dict) -> StorageVolume:
    """Factory pattern to create storage volume instances for the service API.

    Expects a serialization object that contains at least the volume type ``type``.

    Parameters
    ----------
    doc: dict
        Serialization dictionary that provides access to storage volume type and
        the implementation-specific volume parameters.

    Returns
    -------
    flowserv.volume.base.StorageVolume
    """
    volume_type = doc.get('type', FS_STORE)
    if volume_type == FS_STORE:
        return FileSystemStorage.from_dict(doc)
    elif volume_type == GC_STORE:
        return GCVolume.from_dict(doc)
    elif volume_type == S3_STORE:
        return S3Volume.from_dict(doc)
    elif volume_type == SFTP_STORE:
        return RemoteStorage.from_dict(doc)
    raise err.InvalidConfigurationError('storage volume type', volume_type)
def test_parameter_value_dir(tmpdir):
    """Test directories as input parameter values."""
    basedir = os.path.join(tmpdir, 's1')
    os.makedirs(basedir)
    f1 = os.path.join(basedir, 'file.txt')
    Path(f1).touch()
    f2 = os.path.join(basedir, 'data.json')
    Path(f2).touch()
    dir = InputDirectory(store=FileSystemStorage(basedir=basedir),
                         source=None,
                         target='runs')
    assert str(dir) == 'runs'
    target = FileSystemStorage(basedir=os.path.join(tmpdir, 's2'))
    assert set(dir.copy(target=target)) == {'runs/file.txt', 'runs/data.json'}
    assert os.path.isfile(os.path.join(tmpdir, 's2', 'runs', 'file.txt'))
    assert os.path.isfile(os.path.join(tmpdir, 's2', 'runs', 'data.json'))
def test_manager_prepare(basedir, filenames_all, data_a, tmpdir):
    """Test the volume manager prepare method."""
    # -- Setup ----------------------------------------------------------------
    s1_dir = os.path.join(tmpdir, 's1')
    s0 = FileSystemStorage(basedir=basedir, identifier=DEFAULT_STORE)
    s1 = FileSystemStorage(basedir=s1_dir, identifier='s1')
    volumes = VolumeManager(
        stores=[s0.to_dict(), s1.to_dict()],
        files={f: [DEFAULT_STORE] for f in filenames_all}
    )
    # Case 1: Empty arguments.
    volumes.prepare(store=s0, inputs=[], outputs=[])
    # Case 2: No file copy.
    volumes.prepare(store=s0, inputs=['examples/'], outputs=['examples/'])
    assert len(os.listdir(basedir)) == 3
    assert len(os.listdir(s1_dir)) == 0
    for f in filenames_all:
        assert volumes.files[f] == [DEFAULT_STORE]
    # Case 3: Copy file between stores.
    volumes.prepare(store=s1, inputs=['A.json', 'docs/'], outputs=['results/A.json', 'docs/'])
    assert len(os.listdir(basedir)) == 3
    assert len(os.listdir(s1_dir)) == 3
    filename = os.path.join(s1_dir, 'A.json')
    assert os.path.isfile(filename)
    with s1.load('A.json').open() as f:
        assert json.load(f) == data_a
    assert volumes.files == {
        'docs/D.json': [DEFAULT_STORE, 's1'],
        'examples/data/data.json': [DEFAULT_STORE],
        'examples/C.json': [DEFAULT_STORE],
        'A.json': [DEFAULT_STORE, 's1'],
        'examples/B.json': [DEFAULT_STORE]
    }
def test_error_exec(tmpdir):
    """Test error when running a code step."""
    step = CodeStep(identifier='test', func=write_and_add, arg='a')
    r = CodeWorker().exec(step=step,
                          context={'a': -1},
                          store=FileSystemStorage(tmpdir))
    assert r.returncode == 1
    assert r.stdout == ['-1 written', '\n']
    assert r.stderr != []
    assert r.exception is not None
Example #28
0
def test_fs_volume_copy_all(basedir, emptydir, filenames_all, data_a):
    """Test copying the full directory of a storage volume."""
    source = FileSystemStorage(basedir=basedir)
    target = FileSystemStorage(basedir=emptydir)
    source.copy(src=None, dst=None, store=target)
    files = {key: file for key, file in target.walk(src='')}
    assert set(files.keys()) == filenames_all
    with files['A.json'].open() as f:
        assert json.load(f) == data_a
def test_create_workflow(database, tmpdir):
    """Test creating workflows with different levels of detail."""
    # -- Setup ----------------------------------------------------------------
    fs = FileSystemStorage(basedir=tmpdir)
    # -- Add workflow with minimal information --------------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(source=BENCHMARK_DIR, identifier='WF001')
        assert wf.workflow_id == 'WF001'
        assert wf.name == 'Hello World'
        assert wf.description == 'Hello World Demo'
        assert wf.instructions is not None
        template = wf.get_template()
        assert template.result_schema is not None
    # Ensure that the static files where copied to the workflow folder.
    staticfs = fs.get_store_for_folder(
        dirs.workflow_staticdir(workflow_id='WF001'))
    files = {key for key, _ in staticfs.walk(src=None)}
    assert files == {
        'instructions.md', 'data/names.txt', 'code/analyze.py',
        'code/postproc.py', 'code/helloworld.py', 'notebooks/HelloWorld.ipynb'
    }
    # -- Add workflow with user-provided metadata -----------------------------
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(
            name='My benchmark',
            description='My description',
            instructions=INSTRUCTION_FILE,
            source=BENCHMARK_DIR,
            engine_config={'workers': {
                'test': {
                    'worker': 'docker'
                }
            }})
        assert wf.name == 'My benchmark'
        assert wf.description == 'My description'
        assert wf.instructions == '# Hello World'
        wf.engine_config == {'workers': {'test': {'worker': 'docker'}}}
        template = wf.get_template()
        assert template.result_schema is not None
def test_create_workflow_with_alt_manifest(database, tmpdir):
    """Test creating 'Hello World' workflow with a different manifest file."""
    fs = FileSystemStorage(basedir=tmpdir)
    with database.session() as session:
        manager = WorkflowManager(session=session, fs=fs)
        wf = manager.create_workflow(source=BENCHMARK_DIR,
                                     manifestfile=ALT_MANIFEST)
        assert wf.name == 'Hello World'
        assert wf.description is None
        assert wf.instructions is None
        template = wf.get_template()
        assert template.result_schema is not None