def test_engine_volume_manager(tmpdir): """Test creating the volume manager for a workflow run from the engine configuration and the default run store. """ runstore = FileSystemStorage(basedir=tmpdir, identifier=DEFAULT_STORE) # Minimal arguments. volumes = volume_manager(specs=[], runstore=runstore, runfiles=[]) assert len(volumes._storespecs) == 1 assert len(volumes.files) == 0 # Only runstore given. volumes = volume_manager(specs=[], runstore=runstore, runfiles=['a', 'b']) assert len(volumes._storespecs) == 1 assert volumes.files['a'] == [DEFAULT_STORE] assert volumes.files['b'] == [DEFAULT_STORE] # Multiple stores with files. doc_ignore = runstore.to_dict() doc_ignore['files'] = ['c', 'd'] doc_fs = FStore(basedir=tmpdir, identifier='s0') doc_fs['files'] = ['a', 'c'] volumes = volume_manager( specs=[doc_ignore, doc_fs, FStore(basedir=tmpdir, identifier='s1')], runstore=runstore, runfiles=['a', 'b']) assert len(volumes._storespecs) == 3 assert volumes.files['a'] == [DEFAULT_STORE, 's0'] assert volumes.files['b'] == [DEFAULT_STORE] assert volumes.files['c'] == ['s0'] assert volumes.files.get('d') is None
def test_manager_update(tmpdir): """Test the update method for the volume manager.""" volumes = VolumeManager( stores=[ FStore(basedir=tmpdir, identifier=DEFAULT_STORE), FStore(basedir=tmpdir, identifier='s1') ], files={'f1': [DEFAULT_STORE]} ) default_store = volumes.get(identifier=DEFAULT_STORE) s1 = volumes.get(identifier='s1') assert volumes.files == {'f1': [DEFAULT_STORE]} volumes.update(files=['f1', 'f2'], store=s1) assert volumes.files == {'f1': ['s1'], 'f2': ['s1']} volumes.update(files=['f2'], store=default_store) assert volumes.files == {'f1': ['s1'], 'f2': [DEFAULT_STORE]} volumes.update(files=['f2'], store=s1) assert volumes.files == {'f1': ['s1'], 'f2': ['s1']}
def test_base_algorithm_run(dataset, tmpdir): """Step through base algorithm execute method with an empty workflow.""" workflow = SerialWorkflow() r = run_workflow(workflow=workflow, arguments={}, df=dataset, worker=Subprocess(), volume=FStore(basedir=str(tmpdir))) assert r.returncode is None
def test_manager_init(tmpdir): """Test edge cases for the volume manager initialization.""" default_store = FStore(basedir=tmpdir, identifier=DEFAULT_STORE) # Ensure we can instantiate the volume manager if a default store is given. volumes = VolumeManager(stores=[default_store]) assert volumes.files == dict() volumes = VolumeManager(stores=[default_store], files={'f1': [DEFAULT_STORE]}) assert volumes.files == {'f1': [DEFAULT_STORE]} # Error cases when no default store is given. with pytest.raises(ValueError): VolumeManager(stores=list()) with pytest.raises(ValueError): VolumeManager(stores=[FStore(basedir=tmpdir, identifier='0000')]) # Error for unknown storage volume. with pytest.raises(err.UnknownObjectError): VolumeManager( stores=[default_store], files={'f1': ['unknown']} )
def DefaultVolume(basedir: str) -> VolumeManager: """Helper method to create a volume manager with a single file system store as the default store. Parameters ---------- basedir: str Base directory for the created file system store. Returns ------- flowserv.volume.manager.VolumeManager """ return VolumeManager( stores=[FStore(basedir=basedir, identifier=DEFAULT_STORE)])
""" ENV = [(FLOWSERV_BASEDIR, API_DEFAULTDIR(), None), (FLOWSERV_API_HOST, DEFAULT_HOST, None), (FLOWSERV_API_NAME, DEFAULT_NAME, None), (FLOWSERV_API_PATH, DEFAULT_PATH, None), (FLOWSERV_API_PORT, DEFAULT_PORT, to_int), (FLOWSERV_API_PROTOCOL, DEFAULT_PROTOCOL, None), (FLOWSERV_APP, None, None), (FLOWSERV_AUTH_LOGINTTL, DEFAULT_LOGINTTL, to_int), (FLOWSERV_AUTH, AUTH_DEFAULT, None), (FLOWSERV_BACKEND_CLASS, None, None), (FLOWSERV_BACKEND_MODULE, None, None), (FLOWSERV_POLL_INTERVAL, DEFAULT_POLL_INTERVAL, to_float), (FLOWSERV_ACCESS_TOKEN, None, None), (FLOWSERV_CLIENT, LOCAL_CLIENT, None), (FLOWSERV_DB, None, None), (FLOWSERV_WEBAPP, 'False', to_bool), (FLOWSERV_FILESTORE, FStore(basedir=API_DEFAULTDIR()), read_config_obj)] def env() -> Config: """Get configuration parameters from the environment.""" config = Config() for var, default, cast in ENV: value = os.environ.get(var, default) if value is not None: if cast is not None: value = cast(value) config[var] = value return config
def run_workflow(workflow: SerialWorkflow, arguments: Dict, df: pd.DataFrame, worker: Optional[Dict] = None, volume: Optional[Dict] = None, managers: Optional[Dict] = None, verbose: Optional[bool] = True) -> RunResult: """Run a given workflow representing a Metanome profiling algorithm on the given data frame. Returns the run result. If execution of the Metanome algorithm fails a RuntimeError will be raised. This implementation assumes that all algorithms operate on a single input file that contains a serialization of the data frame and that they all produce a single output file in Json format. Parameters ---------- workflow: flowserv.controller.serial.workflow.base.SerialWorkflow Serial workflow to run a Metanome profiling algorithm on a given data frame. arguments: dict Dictionary of algorithm-specific input arguments. df: pd.DataFrame Input data frame. worker: dict, default=None Optional configuration for the main worker. volume: dict, default=None Optional configuration for the volume that is associated with the main worker. managers: dict, default=None Mapping of workflow step identifier to the worker that is used to execute them. verbose: bool, default=True Output run logs if True. Returns ------- flowserv.controller.serial.workflow.result.RunResult """ # Create a temporary run directory for input and output files. rundir = tempfile.mkdtemp() # Create a subfolder for input and output files. This is important when # running the workflow in a Docker container since these folders will # be mounted automatically as volumes into the container to provide # access to the files. os.makedirs(os.path.join(rundir, 'data')) # Create a copy of the workflow-specific arguments and add the data frame # and the input and output files. args = dict(arguments) args['df'] = df args['inputfile'] = DATA_FILE args['outputfile'] = RESULT_FILE # Create factory objects for storage volumes. stores = [FStore(basedir=rundir, identifier=DEFAULT_STORE)] if volume: stores.append(volume) volumes = VolumeManager(stores=stores, files=[]) # Create factory for workers. Include mapping of workflow steps to # the worker that are responsible for their execution. workers = WorkerPool(workers=[worker] if worker else [], managers=managers) # Run the workflow and return the result. Make sure to cleanup the temporary # run filder. This assumes that the workflow steps have read any output # file into main memory or copied it to a target destination. try: r = workflow.run(arguments=args, workers=workers, volumes=volumes) # Output STDOUT and STDERR before raising a potential error. if verbose: for line in r.log: print(line) # Raise error if run execution was not successful. r.raise_for_status() return r finally: # Remove the created run directory. shutil.rmtree(rundir)
def local_service(database, tmpdir): """Create a local API factory for test purposes.""" env = Config().basedir(tmpdir).volume(FStore(basedir=str(tmpdir))).auth() return LocalAPIFactory(env=env, db=database, engine=StateEngine())
def test_run_with_two_steps(tmpdir): """Test executing a sequence of two code steps that operate on the same file in different storage volumes. """ # -- Setup ---------------------------------------------------------------- # Create two separate storage volumes. vol1_dir = os.path.join(tmpdir, 'v1') os.makedirs(vol1_dir) vol2_dir = os.path.join(tmpdir, 'v2') volumes = VolumeManager(stores=[ FStore(basedir=vol1_dir, identifier=DEFAULT_STORE), FStore(basedir=vol2_dir, identifier='v2') ], files={'data.json': [DEFAULT_STORE]}) # Create data.json file in v1. with open(os.path.join(vol1_dir, 'data.json'), 'w') as f: json.dump({"value": 5}, f) # Use separate workers for each step. workers = WorkerPool(workers=[ Code(identifier='w1', volume=DEFAULT_STORE), Code(identifier='w2', volume='v2') ], managers={ 's1': 'w1', 's2': 'w2' }) # Create workflow steps. steps = [ CodeStep(identifier='s1', func=multi_by_x, arg='s1', varnames={'x': 'x1'}, inputs=['data.json']), CodeStep(identifier='s2', func=multi_by_x, arg='s2', varnames={'x': 'x2'}, inputs=['data.json']) ] # Initialize the workflow context arguments. arguments = {'filename': 'data.json', 'x1': 2, 'x2': 3} # -- Test workflow run ---------------------------------------------------- run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) assert len(run_result.steps) == 2 assert run_result.context == { 'filename': 'data.json', 'x1': 2, 'x2': 3, 's1': 10, 's2': 15 } assert os.path.isfile(os.path.join(vol2_dir, 'data.json')) # Error case. os.unlink(os.path.join(vol1_dir, 'data.json')) run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) assert len(run_result.steps) == 1 assert run_result.context == {'filename': 'data.json', 'x1': 2, 'x2': 3}