def test_run_with_invalid_cmd(self, tmpdir): """Execute the helloworld example with an invalid shell command.""" # Execution fails if a file that is referenced by the workflow does not # exist in the created workspace repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo')) template = repo.add_template( src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_WITH_INVALID_CMD) arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=3) } # Run workflow syncronously engine = MultiProcessWorkflowEngine(base_dir=os.path.join( str(tmpdir), 'engine'), run_async=False, verbose=True) sync_run_id, state = engine.execute(template, arguments) assert state.is_error() assert len(state.messages) > 0 state = engine.get_state(sync_run_id) assert state.is_error() assert len(state.messages) > 0
def test_simple_replace(self): """Replace parameter references in simple template with argument values. """ for filename in [TEMPLATE_YAML_FILE, TEMPLATE_JSON_FILE]: template = DefaultTemplateLoader().load(filename) arguments = { 'code': TemplateArgument(parameter=template.get_parameter('code'), value=FileHandle('code/helloworld.py')), 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle('data/list-of-names.txt')), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=10) } spec = tmpl.replace_args(spec=template.workflow_spec, arguments=arguments, parameters=template.parameters) assert spec['inputs']['files'][0] == 'helloworld.py' assert spec['inputs']['files'][1] == 'data/names.txt' assert spec['inputs']['parameters'][ 'helloworld'] == 'code/helloworld.py' assert spec['inputs']['parameters'][ 'inputfile'] == 'data/names.txt' assert spec['inputs']['parameters']['sleeptime'] == 10 assert spec['inputs']['parameters']['waittime'] == 5
def test_expand_parameters(self): """Test parameter expansion.""" template = DefaultTemplateLoader().load(TEMPLATE_FILE) arguments = { 'code': TemplateArgument( parameter=template.get_parameter('code'), value=FileHandle(filepath='code/runme.py') ), 'names': TemplateArgument( parameter=template.get_parameter('names'), value=FileHandle(filepath='data/myfriends.txt') ), 'sleeptime': TemplateArgument( parameter=template.get_parameter('sleeptime'), value=11 ), 'waittime': TemplateArgument( parameter=template.get_parameter('waittime'), value=22 ) } commands = mp.get_commands(template=template, arguments=arguments) CMDS = [ 'python "runme.py" --inputfile "data/names.txt" --outputfile "results/greetings.txt" --sleeptime 11', 'wait 22', 'python "code/eval.py" --inputfile "results/greetings.txt" --outputfile results.json' ] assert commands == CMDS # Default values arguments = { 'names': TemplateArgument( parameter=template.get_parameter('names'), value=FileHandle(filepath='data/myfriends.txt') ) } commands = mp.get_commands(template=template, arguments=arguments) CMDS = [ 'python "code/helloworld.py" --inputfile "data/names.txt" --outputfile "results/greetings.txt" --sleeptime 10', 'wait 5', 'python "code/eval.py" --inputfile "results/greetings.txt" --outputfile results.json' ] assert commands == CMDS # Error cases del template.workflow_spec['inputs']['parameters']['inputfile'] with pytest.raises(err.InvalidTemplateError): mp.get_commands(template=template, arguments=arguments)
def test_run_benchmark(self, tmpdir): """Test running a benchmarks.""" # Initialize the BASEDIR environment variable os.environ[config.ENV_BASEDIR] = os.path.abspath(str(tmpdir)) # Create a new database and open a connection connect_string = 'sqlite:{}/auth.db'.format(str(tmpdir)) DatabaseDriver.init_db(connect_string=connect_string) con = DatabaseDriver.connect(connect_string=connect_string) # Create repository and engine instances repository = BenchmarkRepository( con=con, template_store=TemplateRepository( base_dir=config.get_template_dir(), loader=BenchmarkTemplateLoader(), filenames=['benchmark', 'template', 'workflow'])) engine = BenchmarkEngine(con) # Add with minimal information benchmark = repository.add_benchmark(name='My benchmark', src_dir=TEMPLATE_DIR) template = benchmark.template arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=1), 'greeting': TemplateArgument(parameter=template.get_parameter('greeting'), value='Welcome') } run_id, state = engine.run(benchmark, arguments, 'USERID') assert state.is_success() sql = 'SELECT * FROM benchmark_run WHERE run_id = ?' rs = con.execute(sql, (run_id, )).fetchone() assert rs['benchmark_id'] == benchmark.identifier assert rs['user_id'] == 'USERID' assert rs['state'] == state.type_id table_name = bm.PREFIX_RESULT_TABLE + benchmark.identifier sql = 'SELECT * FROM {} WHERE run_id = ?'.format(table_name) rs = con.execute(sql, (run_id, )).fetchone() assert rs['max_line'] == 'Welcome Alice!'
def test_run_with_missing_file(self, tmpdir): """Execute the helloworld example with a reference to a missing file.""" # Execution fails if a file that is referenced by the workflow does not # exist in the created workspace repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo')) template = repo.add_template( src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_WITH_MISSING_FILE) arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=3) } # Run workflow syncronously engine = MultiProcessWorkflowEngine(base_dir=os.path.join( str(tmpdir), 'engine'), run_async=False) sync_run_id, state = engine.execute(template, arguments) assert state.is_error() assert len(state.messages) > 0 state = engine.get_state(sync_run_id) assert state.is_error() assert len(state.messages) > 0 # An error is raised if the input file does not exist with pytest.raises(IOError): engine.execute( template=template, arguments={ 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(UNKNOWN_FILE)), 'sleeptime': TemplateArgument( parameter=template.get_parameter('sleeptime'), value=3) })
def test_prepare_inputs_for_local_run(self, tmpdir): """Test copying input files for a local workflow run.""" # Load template store = TemplateRepository(base_dir=str(tmpdir)) template = store.add_template(src_dir=WORKFLOW_DIR, template_spec_file=SPEC_FILE) # Create run directory run_dir = os.path.join(str(tmpdir), 'run') os.makedirs(run_dir) # Copy input files to run directory backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'names': TemplateArgument(template.get_parameter('names'), value=FileHandle(filepath=DATA_FILE)) }, loader=FileCopy(run_dir)) # We should have the following files in the run directory: # code/helloworld.py # data/persons.txt # data/friends.txt assert os.path.isfile(os.path.join(run_dir, 'code', 'helloworld.py')) assert os.path.isfile(os.path.join(run_dir, 'code', 'script.sh')) assert os.path.isfile(os.path.join(run_dir, 'data', 'persons.txt')) assert os.path.isfile(os.path.join(run_dir, 'data', 'friends.txt')) assert not os.path.isfile(os.path.join(run_dir, 'code', 'dontcopy.me')) # data/persons.txt should contain Alice and Bob names = set() with open(os.path.join(run_dir, 'data', 'persons.txt'), 'r') as f: for line in f: names.add(line.strip()) assert len(names) == 2 assert 'Alice' in names assert 'Bob' in names # data/friends contains Jane Doe and Joe Bloggs friends = set() with open(os.path.join(run_dir, 'data', 'friends.txt'), 'r') as f: for line in f: friends.add(line.strip()) assert len(friends) == 2 assert 'Jane Doe' in friends assert 'Joe Bloggs' in friends # Error cases with pytest.raises(err.MissingArgumentError): backend.upload_files(template=template, files=template.workflow_spec.get( 'inputs', {}).get('files', []), arguments={}, loader=FileCopy(run_dir)) # Error when copying non-existing file template = store.add_template(src_dir=WORKFLOW_DIR, template_spec_file=SPEC_FILE) shutil.rmtree(run_dir) os.makedirs(run_dir) with pytest.raises(IOError): backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'names': TemplateArgument( template.get_parameter('names'), value=FileHandle( filepath=os.path.join(str(tmpdir), 'no.file'))) }, loader=FileCopy(run_dir)) assert not os.path.isdir(os.path.join(run_dir, 'data')) # If the constant value for the names parameter is removed the names # file is copied to the run directory and not to the data folder para = template.get_parameter('names') para.as_constant = None shutil.rmtree(run_dir) os.makedirs(run_dir) backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'names': TemplateArgument(parameter=para, value=FileHandle(filepath=DATA_FILE)) }, loader=FileCopy(run_dir)) # We should have the following files in the run directory: # code/helloworld.py # names.txt # data/friends.txt assert os.path.isfile(os.path.join(run_dir, 'code', 'helloworld.py')) assert os.path.isfile(os.path.join(run_dir, 'names.txt')) assert not os.path.isfile(os.path.join(run_dir, 'data', 'persons.txt')) assert os.path.isfile(os.path.join(run_dir, 'data', 'friends.txt')) # Template with input file parameter that is not of type file template = store.add_template(src_dir=WORKFLOW_DIR, template_spec_file=SPEC_FILE_ERR) shutil.rmtree(run_dir) os.makedirs(run_dir) # Copy input files to run directory with pytest.raises(err.InvalidTemplateError): backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'sleeptime': TemplateArgument(template.get_parameter('names'), value=FileHandle(filepath=DATA_FILE)) }, loader=FileCopy(run_dir))
def test_run_helloworld(self, tmpdir): """Execute the helloworld example.""" repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo')) template = repo.add_template(src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_FILE) arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=3) } engine_dir = os.path.join(str(tmpdir), 'engine') engine = MultiProcessWorkflowEngine(base_dir=engine_dir, run_async=True) # Run workflow asyncronously run_id, _ = engine.execute(template, arguments) while engine.get_state(run_id).is_active(): time.sleep(1) state = engine.get_state(run_id) self.validate_run_result(state) # Remove run resources (can be called multiple times as long as the # task is not active) engine.remove_run(run_id) engine.remove_run(run_id) # Getting the state of a removed run raises an error with pytest.raises(err.UnknownRunError): engine.get_state(run_id) # Cancel run arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=30) } run_id, _ = engine.execute(template, arguments) while engine.get_state(run_id).is_active(): # Removing resources will raise an error with pytest.raises(RuntimeError): engine.remove_run(run_id) # Cancel the run engine.cancel_run(run_id) break with pytest.raises(err.UnknownRunError): engine.get_state(run_id) # Remove run resources does not raise an error now engine.remove_run(run_id) # Run workflow syncronously arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=1) } engine = MultiProcessWorkflowEngine(base_dir=engine_dir, run_async=False) sync_run_id, state = engine.execute(template, arguments) assert run_id != sync_run_id self.validate_run_result(state) self.validate_run_result(state) state = engine.get_state(sync_run_id) # Remove run resources (can be called multiple times as long as the # task is not active) engine.remove_run(run_id)
def upload_files(template, files, arguments, loader): """Upload all references to local files in a given list of file names of parameter references. THe list of files, for example corresponds to the entries in the 'inputs.files' section of a REANA workflow specification. Uses a loader function to allow use of this method in cases where the workflow is executed locally or remote using a REANA cluster instance. Raises errors if (i) an unknown parameter is referenced or (ii) if the type of a referenced parameter in the input files section is not of type file. Parameters ---------- template: benchtmpl.workflow.template.base.TemplateHandle Workflow template containing the parameterized specification and the parameter declarations files: list(string) List of file references arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument) Dictionary of argument values for parameters in the template loader: func File (up)load function that takes a filepath as the first argument and a (remote) target path as the second argument Raises ------ benchtmpl.error.InvalidTemplateError benchtmpl.error.MissingArgumentError benchtmpl.error.UnknownParameterError """ for val in files: # Set source and target values depending on whether the list # entry references a template parameter or not if tmpl.is_parameter(val): var = tmpl.get_parameter_name(val) # Raise error if the type of the referenced parameter is # not file para = template.get_parameter(var) if not para.is_file(): raise err.InvalidTemplateError('expected file parameter for \'{}\''.format(var)) arg = arguments.get(var) if arg is None: if para.default_value is None: raise err.MissingArgumentError(var) else: # Set argument to file handle using the default value # (assuming that the default points to a file in the # template base directory). if para.has_constant() and not para.as_input(): target_path = para.get_constant() else: target_path = para.default_value arg = TemplateArgument( parameter=para, value=InputFile( f_handle=FileHandle( filepath=os.path.join( template.base_dir, para.default_value ) ), target_path=target_path ) ) # Get path to source file and the target path from the input # file handle source = arg.value.source() target = arg.value.target() else: source = os.path.join(template.base_dir, val) target = val # Upload source file loader(source, target)