def test_simple_replace(self): """Replace parameter references in simple template with argument values. """ for filename in [TEMPLATE_YAML_FILE, TEMPLATE_JSON_FILE]: template = DefaultTemplateLoader().load(filename) arguments = { 'code': TemplateArgument(parameter=template.get_parameter('code'), value=FileHandle('code/helloworld.py')), 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle('data/list-of-names.txt')), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=10) } spec = tmpl.replace_args(spec=template.workflow_spec, arguments=arguments, parameters=template.parameters) assert spec['inputs']['files'][0] == 'helloworld.py' assert spec['inputs']['files'][1] == 'data/names.txt' assert spec['inputs']['parameters'][ 'helloworld'] == 'code/helloworld.py' assert spec['inputs']['parameters'][ 'inputfile'] == 'data/names.txt' assert spec['inputs']['parameters']['sleeptime'] == 10 assert spec['inputs']['parameters']['waittime'] == 5
def test_run_with_invalid_cmd(self, tmpdir): """Execute the helloworld example with an invalid shell command.""" # Execution fails if a file that is referenced by the workflow does not # exist in the created workspace repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo')) template = repo.add_template( src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_WITH_INVALID_CMD) arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=3) } # Run workflow syncronously engine = MultiProcessWorkflowEngine(base_dir=os.path.join( str(tmpdir), 'engine'), run_async=False, verbose=True) sync_run_id, state = engine.execute(template, arguments) assert state.is_error() assert len(state.messages) > 0 state = engine.get_state(sync_run_id) assert state.is_error() assert len(state.messages) > 0
def upload_stream(self, file, file_name): """Create a new entry from a given file stream. Will copy the given file to a file in the base directory. Parameters ---------- file: werkzeug.datastructures.FileStorage File object (e.g., uploaded via HTTP request) file_name: string Name of the file Returns ------- benchtmpl.io.files.base.FileHandle """ # Create a new unique identifier for the file. identifier = util.get_unique_identifier() file_dir = self.get_file_dir(identifier, create=True) output_file = os.path.join(file_dir, file_name) # Save the file object to the new file path file.save(output_file) f_handle = FileHandle(identifier=identifier, filepath=output_file, file_name=file_name) return f_handle
def upload_file(self, filename): """Create a new entry from a given local file. Will make a copy of the given file. Raises ValueError if the given file does not exist. Parameters ---------- filename: string Path to file on disk Returns ------- benchtmpl.io.files.base.FileHandle """ # Ensure that the given file exists if not os.path.isfile(filename): raise ValueError('invalid file path \'' + str(filename) + '\'') file_name = os.path.basename(filename) # Create a new unique identifier for the file. identifier = util.get_unique_identifier() file_dir = self.get_file_dir(identifier, create=True) output_file = os.path.join(file_dir, file_name) # Copy the uploaded file shutil.copyfile(filename, output_file) # Add file to file index f_handle = FileHandle(identifier=identifier, filepath=output_file, file_name=file_name) return f_handle
def test_expand_parameters(self): """Test parameter expansion.""" template = DefaultTemplateLoader().load(TEMPLATE_FILE) arguments = { 'code': TemplateArgument( parameter=template.get_parameter('code'), value=FileHandle(filepath='code/runme.py') ), 'names': TemplateArgument( parameter=template.get_parameter('names'), value=FileHandle(filepath='data/myfriends.txt') ), 'sleeptime': TemplateArgument( parameter=template.get_parameter('sleeptime'), value=11 ), 'waittime': TemplateArgument( parameter=template.get_parameter('waittime'), value=22 ) } commands = mp.get_commands(template=template, arguments=arguments) CMDS = [ 'python "runme.py" --inputfile "data/names.txt" --outputfile "results/greetings.txt" --sleeptime 11', 'wait 22', 'python "code/eval.py" --inputfile "results/greetings.txt" --outputfile results.json' ] assert commands == CMDS # Default values arguments = { 'names': TemplateArgument( parameter=template.get_parameter('names'), value=FileHandle(filepath='data/myfriends.txt') ) } commands = mp.get_commands(template=template, arguments=arguments) CMDS = [ 'python "code/helloworld.py" --inputfile "data/names.txt" --outputfile "results/greetings.txt" --sleeptime 10', 'wait 5', 'python "code/eval.py" --inputfile "results/greetings.txt" --outputfile results.json' ] assert commands == CMDS # Error cases del template.workflow_spec['inputs']['parameters']['inputfile'] with pytest.raises(err.InvalidTemplateError): mp.get_commands(template=template, arguments=arguments)
def test_file_args(self): """Test the replace_args function for file parameters.""" spec = {'input': ['$[[codeFile]]']} parameters = pd.create_parameter_index([{ 'id': 'codeFile', 'datatype': 'file', 'defaultValue': 'src/helloworld.py', 'as': 'code/helloworld.py' }]) # Test default values (no arguments) wf = tmpl.replace_args(spec=spec, parameters=parameters, arguments=dict()) assert wf['input'] == ['code/helloworld.py'] # Test default values (with arguments) wf = tmpl.replace_args( spec=spec, parameters=parameters, arguments=pr.parse_arguments( arguments={'codeFile': FileHandle(filepath='/dev/null')}, parameters=parameters)) assert wf['input'] == ['code/helloworld.py'] # Test file parameters without constant value parameters = pd.create_parameter_index([{ 'id': 'codeFile', 'datatype': 'file', 'defaultValue': 'src/helloworld.py' }]) # Test default values (no arguments) wf = tmpl.replace_args(spec=spec, parameters=parameters, arguments=dict()) assert wf['input'] == ['src/helloworld.py'] wf = tmpl.replace_args( spec=spec, parameters=parameters, arguments=pr.parse_arguments( arguments={'codeFile': FileHandle(filepath='/dev/null')}, parameters=parameters)) assert wf['input'] == ['null']
def test_source_and_target_path(self): """Test source and target path methods for input file handle.""" fh = FileHandle(filepath='/home/user/files/myfile.txt') # Input file handle without target path f = InputFile(f_handle=fh) assert f.source() == '/home/user/files/myfile.txt' assert f.target() == 'myfile.txt' # Input file handle with target path f = InputFile(f_handle=fh, target_path='data/names.txt') assert f.source() == '/home/user/files/myfile.txt' assert f.target() == 'data/names.txt'
def test_run_with_missing_file(self, tmpdir): """Execute the helloworld example with a reference to a missing file.""" # Execution fails if a file that is referenced by the workflow does not # exist in the created workspace repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo')) template = repo.add_template( src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_WITH_MISSING_FILE) arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=3) } # Run workflow syncronously engine = MultiProcessWorkflowEngine(base_dir=os.path.join( str(tmpdir), 'engine'), run_async=False) sync_run_id, state = engine.execute(template, arguments) assert state.is_error() assert len(state.messages) > 0 state = engine.get_state(sync_run_id) assert state.is_error() assert len(state.messages) > 0 # An error is raised if the input file does not exist with pytest.raises(IOError): engine.execute( template=template, arguments={ 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(UNKNOWN_FILE)), 'sleeptime': TemplateArgument( parameter=template.get_parameter('sleeptime'), value=3) })
def list_files(self): """Get list of file handles for all uploaded files. Returns ------- list(benchtmpl.io.files.base.FileHandle) """ result = list() for f_name in os.listdir(self.directory): dir_name = os.path.join(self.directory, f_name) if os.path.isdir(dir_name): file_name = os.listdir(dir_name)[0] f_handle = FileHandle(identifier=f_name, filepath=os.path.join( dir_name, file_name), file_name=file_name) result.append(f_handle) return result
def test_run_benchmark(self, tmpdir): """Test running a benchmarks.""" # Initialize the BASEDIR environment variable os.environ[config.ENV_BASEDIR] = os.path.abspath(str(tmpdir)) # Create a new database and open a connection connect_string = 'sqlite:{}/auth.db'.format(str(tmpdir)) DatabaseDriver.init_db(connect_string=connect_string) con = DatabaseDriver.connect(connect_string=connect_string) # Create repository and engine instances repository = BenchmarkRepository( con=con, template_store=TemplateRepository( base_dir=config.get_template_dir(), loader=BenchmarkTemplateLoader(), filenames=['benchmark', 'template', 'workflow'])) engine = BenchmarkEngine(con) # Add with minimal information benchmark = repository.add_benchmark(name='My benchmark', src_dir=TEMPLATE_DIR) template = benchmark.template arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=1), 'greeting': TemplateArgument(parameter=template.get_parameter('greeting'), value='Welcome') } run_id, state = engine.run(benchmark, arguments, 'USERID') assert state.is_success() sql = 'SELECT * FROM benchmark_run WHERE run_id = ?' rs = con.execute(sql, (run_id, )).fetchone() assert rs['benchmark_id'] == benchmark.identifier assert rs['user_id'] == 'USERID' assert rs['state'] == state.type_id table_name = bm.PREFIX_RESULT_TABLE + benchmark.identifier sql = 'SELECT * FROM {} WHERE run_id = ?'.format(table_name) rs = con.execute(sql, (run_id, )).fetchone() assert rs['max_line'] == 'Welcome Alice!'
def get_file(self, identifier): """Get handle for file with given identifier. Returns None if no file with given identifier exists. Parameters ---------- identifier: string Unique file identifier Returns ------- benchtmpl.io.files.base.FileHandle """ file_dir = self.get_file_dir(identifier) if os.path.isdir(file_dir): # The uploaded file is the only file in the directory file_name = os.listdir(file_dir)[0] return FileHandle(identifier=identifier, filepath=os.path.join(file_dir, file_name), file_name=file_name) return None
def read_parameter(para, scanner, prompt_prefix=''): """Read value for a given template parameter declaration. Prompts the user to enter a value for the given parameter and returns the converted value that was entered by the user. Parameters ---------- para: benchtmpl.workflow.parameter.TemplateParameter Workflow template parameter declaration Returns ------- bool or float or int or string or benchtmpl.io.files.base.InputFile """ done = False while not done: done = True print(prompt_prefix + para.prompt(), end='') try: if para.is_bool(): return scanner.next_bool(default_value=para.default_value) elif para.is_file(): filename = scanner.next_file(default_value=para.default_value) target_path = None if para.has_constant() and para.as_input(): print('Target Path:', end='') target_path = scanner.next_string() return InputFile(f_handle=FileHandle(filepath=filename), target_path=target_path) elif para.is_float(): return scanner.next_float(default_value=para.default_value) elif para.is_int(): return scanner.next_int(default_value=para.default_value) else: return scanner.next_string(default_value=para.default_value) except ValueError as ex: print(ex) done = False
def test_prepare_inputs_for_local_run(self, tmpdir): """Test copying input files for a local workflow run.""" # Load template store = TemplateRepository(base_dir=str(tmpdir)) template = store.add_template(src_dir=WORKFLOW_DIR, template_spec_file=SPEC_FILE) # Create run directory run_dir = os.path.join(str(tmpdir), 'run') os.makedirs(run_dir) # Copy input files to run directory backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'names': TemplateArgument(template.get_parameter('names'), value=FileHandle(filepath=DATA_FILE)) }, loader=FileCopy(run_dir)) # We should have the following files in the run directory: # code/helloworld.py # data/persons.txt # data/friends.txt assert os.path.isfile(os.path.join(run_dir, 'code', 'helloworld.py')) assert os.path.isfile(os.path.join(run_dir, 'code', 'script.sh')) assert os.path.isfile(os.path.join(run_dir, 'data', 'persons.txt')) assert os.path.isfile(os.path.join(run_dir, 'data', 'friends.txt')) assert not os.path.isfile(os.path.join(run_dir, 'code', 'dontcopy.me')) # data/persons.txt should contain Alice and Bob names = set() with open(os.path.join(run_dir, 'data', 'persons.txt'), 'r') as f: for line in f: names.add(line.strip()) assert len(names) == 2 assert 'Alice' in names assert 'Bob' in names # data/friends contains Jane Doe and Joe Bloggs friends = set() with open(os.path.join(run_dir, 'data', 'friends.txt'), 'r') as f: for line in f: friends.add(line.strip()) assert len(friends) == 2 assert 'Jane Doe' in friends assert 'Joe Bloggs' in friends # Error cases with pytest.raises(err.MissingArgumentError): backend.upload_files(template=template, files=template.workflow_spec.get( 'inputs', {}).get('files', []), arguments={}, loader=FileCopy(run_dir)) # Error when copying non-existing file template = store.add_template(src_dir=WORKFLOW_DIR, template_spec_file=SPEC_FILE) shutil.rmtree(run_dir) os.makedirs(run_dir) with pytest.raises(IOError): backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'names': TemplateArgument( template.get_parameter('names'), value=FileHandle( filepath=os.path.join(str(tmpdir), 'no.file'))) }, loader=FileCopy(run_dir)) assert not os.path.isdir(os.path.join(run_dir, 'data')) # If the constant value for the names parameter is removed the names # file is copied to the run directory and not to the data folder para = template.get_parameter('names') para.as_constant = None shutil.rmtree(run_dir) os.makedirs(run_dir) backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'names': TemplateArgument(parameter=para, value=FileHandle(filepath=DATA_FILE)) }, loader=FileCopy(run_dir)) # We should have the following files in the run directory: # code/helloworld.py # names.txt # data/friends.txt assert os.path.isfile(os.path.join(run_dir, 'code', 'helloworld.py')) assert os.path.isfile(os.path.join(run_dir, 'names.txt')) assert not os.path.isfile(os.path.join(run_dir, 'data', 'persons.txt')) assert os.path.isfile(os.path.join(run_dir, 'data', 'friends.txt')) # Template with input file parameter that is not of type file template = store.add_template(src_dir=WORKFLOW_DIR, template_spec_file=SPEC_FILE_ERR) shutil.rmtree(run_dir) os.makedirs(run_dir) # Copy input files to run directory with pytest.raises(err.InvalidTemplateError): backend.upload_files( template=template, files=template.workflow_spec.get('inputs', {}).get('files', []), arguments={ 'sleeptime': TemplateArgument(template.get_parameter('names'), value=FileHandle(filepath=DATA_FILE)) }, loader=FileCopy(run_dir))
def test_flat_parse(self): """Test parsing arguments for a flat (un-nested) parameter declaration. """ template = TemplateHandle( workflow_spec=dict(), parameters=[ TemplateParameter( pd.parameter_declaration('A', data_type=pd.DT_INTEGER)), TemplateParameter( pd.parameter_declaration('B', data_type=pd.DT_BOOL)), TemplateParameter( pd.parameter_declaration('C', data_type=pd.DT_DECIMAL)), TemplateParameter( pd.parameter_declaration('D', data_type=pd.DT_FILE, required=False)), TemplateParameter( pd.parameter_declaration('E', data_type=pd.DT_STRING, required=False)) ]) params = template.parameters fh = InputFile(f_handle=FileHandle(filepath=LOCAL_FILE)) # Valid argument set args = values.parse_arguments(arguments={ 'A': 10, 'B': True, 'C': 12.5, 'D': fh, 'E': 'ABC' }, parameters=params, validate=True) assert len(args) == 5 for key in params.keys(): assert key in args values.parse_arguments(arguments=args, parameters=params, validate=False) # Error cases with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': 10, 'Z': 0 }, parameters=params) with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': 10, 'B': True }, parameters=params) # Validate data type with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': '10', 'B': True, 'C': 12.3, 'D': fh, 'E': 'ABC' }, parameters=params, validate=True) with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': 10, 'B': 23, 'C': 12.3, 'D': fh, 'E': 'ABC' }, parameters=params, validate=True) with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': 10, 'B': True, 'C': '12.3', 'D': fh, 'E': 'ABC' }, parameters=params, validate=True) with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': 10, 'B': True, 'C': 12.3, 'D': 'fh', 'E': 'ABC' }, parameters=params, validate=True) with pytest.raises(ValueError): values.parse_arguments(arguments={ 'A': 10, 'B': True, 'C': 12.3, 'D': fh, 'E': 12 }, parameters=params, validate=True)
def test_run_helloworld(self, tmpdir): """Execute the helloworld example.""" repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo')) template = repo.add_template(src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_FILE) arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=3) } engine_dir = os.path.join(str(tmpdir), 'engine') engine = MultiProcessWorkflowEngine(base_dir=engine_dir, run_async=True) # Run workflow asyncronously run_id, _ = engine.execute(template, arguments) while engine.get_state(run_id).is_active(): time.sleep(1) state = engine.get_state(run_id) self.validate_run_result(state) # Remove run resources (can be called multiple times as long as the # task is not active) engine.remove_run(run_id) engine.remove_run(run_id) # Getting the state of a removed run raises an error with pytest.raises(err.UnknownRunError): engine.get_state(run_id) # Cancel run arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=30) } run_id, _ = engine.execute(template, arguments) while engine.get_state(run_id).is_active(): # Removing resources will raise an error with pytest.raises(RuntimeError): engine.remove_run(run_id) # Cancel the run engine.cancel_run(run_id) break with pytest.raises(err.UnknownRunError): engine.get_state(run_id) # Remove run resources does not raise an error now engine.remove_run(run_id) # Run workflow syncronously arguments = { 'names': TemplateArgument(parameter=template.get_parameter('names'), value=FileHandle(DATA_FILE)), 'sleeptime': TemplateArgument(parameter=template.get_parameter('sleeptime'), value=1) } engine = MultiProcessWorkflowEngine(base_dir=engine_dir, run_async=False) sync_run_id, state = engine.execute(template, arguments) assert run_id != sync_run_id self.validate_run_result(state) self.validate_run_result(state) state = engine.get_state(sync_run_id) # Remove run resources (can be called multiple times as long as the # task is not active) engine.remove_run(run_id)
def upload_files(template, files, arguments, loader): """Upload all references to local files in a given list of file names of parameter references. THe list of files, for example corresponds to the entries in the 'inputs.files' section of a REANA workflow specification. Uses a loader function to allow use of this method in cases where the workflow is executed locally or remote using a REANA cluster instance. Raises errors if (i) an unknown parameter is referenced or (ii) if the type of a referenced parameter in the input files section is not of type file. Parameters ---------- template: benchtmpl.workflow.template.base.TemplateHandle Workflow template containing the parameterized specification and the parameter declarations files: list(string) List of file references arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument) Dictionary of argument values for parameters in the template loader: func File (up)load function that takes a filepath as the first argument and a (remote) target path as the second argument Raises ------ benchtmpl.error.InvalidTemplateError benchtmpl.error.MissingArgumentError benchtmpl.error.UnknownParameterError """ for val in files: # Set source and target values depending on whether the list # entry references a template parameter or not if tmpl.is_parameter(val): var = tmpl.get_parameter_name(val) # Raise error if the type of the referenced parameter is # not file para = template.get_parameter(var) if not para.is_file(): raise err.InvalidTemplateError('expected file parameter for \'{}\''.format(var)) arg = arguments.get(var) if arg is None: if para.default_value is None: raise err.MissingArgumentError(var) else: # Set argument to file handle using the default value # (assuming that the default points to a file in the # template base directory). if para.has_constant() and not para.as_input(): target_path = para.get_constant() else: target_path = para.default_value arg = TemplateArgument( parameter=para, value=InputFile( f_handle=FileHandle( filepath=os.path.join( template.base_dir, para.default_value ) ), target_path=target_path ) ) # Get path to source file and the target path from the input # file handle source = arg.value.source() target = arg.value.target() else: source = os.path.join(template.base_dir, val) target = val # Upload source file loader(source, target)