Ejemplo n.º 1
0
 def test_run_with_invalid_cmd(self, tmpdir):
     """Execute the helloworld example with an invalid shell command."""
     # Execution fails if a file that is referenced by the workflow does not
     # exist in the created workspace
     repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo'))
     template = repo.add_template(
         src_dir=WORKFLOW_DIR, template_spec_file=TEMPLATE_WITH_INVALID_CMD)
     arguments = {
         'names':
         TemplateArgument(parameter=template.get_parameter('names'),
                          value=FileHandle(DATA_FILE)),
         'sleeptime':
         TemplateArgument(parameter=template.get_parameter('sleeptime'),
                          value=3)
     }
     # Run workflow syncronously
     engine = MultiProcessWorkflowEngine(base_dir=os.path.join(
         str(tmpdir), 'engine'),
                                         run_async=False,
                                         verbose=True)
     sync_run_id, state = engine.execute(template, arguments)
     assert state.is_error()
     assert len(state.messages) > 0
     state = engine.get_state(sync_run_id)
     assert state.is_error()
     assert len(state.messages) > 0
 def test_error_for_id_func(self, tmpdir):
     """Error when the id function cannot return unique folder identifier."""
     dummy_func = DummyIDFunc()
     store = TemplateRepository(base_dir=os.path.join(str(tmpdir)),
                                id_func=dummy_func)
     os.makedirs(os.path.join(store.base_dir, dummy_func()))
     with pytest.raises(RuntimeError):
         store.add_template(src_dir=WORKFLOW_DIR)
     assert dummy_func.count == 102
 def test_delete_template(self, tmpdir):
     """Ensure correct return values when deleting existing and non-existing
     templates.
     """
     store = TemplateRepository(base_dir=str(tmpdir))
     template = store.add_template(src_dir=WORKFLOW_DIR)
     f = os.path.join(store.base_dir, template.identifier, TEMPLATE_FILE)
     d = os.path.join(store.base_dir, template.identifier, STATIC_FILES_DIR)
     assert os.path.isfile(f)
     assert os.path.isdir(d)
     assert store.delete_template(template.identifier)
     assert not os.path.isfile(f)
     assert not os.path.isdir(d)
     assert not store.delete_template(template.identifier)
     # Test deleting after store object is re-instantiated
     template = store.add_template(src_dir=WORKFLOW_DIR)
     store = TemplateRepository(base_dir=str(tmpdir))
     assert store.delete_template(template.identifier)
     assert not store.delete_template(template.identifier)
Ejemplo n.º 4
0
    def __init__(self, con, template_store=None):
        """Initialize the database connection and the template store.

        Parameters
        ----------
        con: DB-API 2.0 database connection
            Connection to underlying database
        template_store: benchtmpl.workflow.template.repo.TemplateRepository, optional
            Repository for workflow templates
        """
        super(BenchmarkRepository, self).__init__(con)
        if not template_store is None:
            self.template_store = template_store
        else:
            template_dir = config.get_template_dir()
            self.template_store = TemplateRepository(
                base_dir=template_dir,
                loader=BenchmarkTemplateLoader(),
                filenames=['benchmark', 'template', 'workflow']
            )
Ejemplo n.º 5
0
 def test_run_with_missing_file(self, tmpdir):
     """Execute the helloworld example with a reference to a missing file."""
     # Execution fails if a file that is referenced by the workflow does not
     # exist in the created workspace
     repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo'))
     template = repo.add_template(
         src_dir=WORKFLOW_DIR,
         template_spec_file=TEMPLATE_WITH_MISSING_FILE)
     arguments = {
         'names':
         TemplateArgument(parameter=template.get_parameter('names'),
                          value=FileHandle(DATA_FILE)),
         'sleeptime':
         TemplateArgument(parameter=template.get_parameter('sleeptime'),
                          value=3)
     }
     # Run workflow syncronously
     engine = MultiProcessWorkflowEngine(base_dir=os.path.join(
         str(tmpdir), 'engine'),
                                         run_async=False)
     sync_run_id, state = engine.execute(template, arguments)
     assert state.is_error()
     assert len(state.messages) > 0
     state = engine.get_state(sync_run_id)
     assert state.is_error()
     assert len(state.messages) > 0
     # An error is raised if the input file does not exist
     with pytest.raises(IOError):
         engine.execute(
             template=template,
             arguments={
                 'names':
                 TemplateArgument(parameter=template.get_parameter('names'),
                                  value=FileHandle(UNKNOWN_FILE)),
                 'sleeptime':
                 TemplateArgument(
                     parameter=template.get_parameter('sleeptime'), value=3)
             })
Ejemplo n.º 6
0
 def test_run_benchmark(self, tmpdir):
     """Test running a benchmarks."""
     # Initialize the BASEDIR environment variable
     os.environ[config.ENV_BASEDIR] = os.path.abspath(str(tmpdir))
     # Create a new database and open a connection
     connect_string = 'sqlite:{}/auth.db'.format(str(tmpdir))
     DatabaseDriver.init_db(connect_string=connect_string)
     con = DatabaseDriver.connect(connect_string=connect_string)
     # Create repository and engine instances
     repository = BenchmarkRepository(
         con=con,
         template_store=TemplateRepository(
             base_dir=config.get_template_dir(),
             loader=BenchmarkTemplateLoader(),
             filenames=['benchmark', 'template', 'workflow']))
     engine = BenchmarkEngine(con)
     # Add with minimal information
     benchmark = repository.add_benchmark(name='My benchmark',
                                          src_dir=TEMPLATE_DIR)
     template = benchmark.template
     arguments = {
         'names':
         TemplateArgument(parameter=template.get_parameter('names'),
                          value=FileHandle(DATA_FILE)),
         'sleeptime':
         TemplateArgument(parameter=template.get_parameter('sleeptime'),
                          value=1),
         'greeting':
         TemplateArgument(parameter=template.get_parameter('greeting'),
                          value='Welcome')
     }
     run_id, state = engine.run(benchmark, arguments, 'USERID')
     assert state.is_success()
     sql = 'SELECT * FROM benchmark_run WHERE run_id = ?'
     rs = con.execute(sql, (run_id, )).fetchone()
     assert rs['benchmark_id'] == benchmark.identifier
     assert rs['user_id'] == 'USERID'
     assert rs['state'] == state.type_id
     table_name = bm.PREFIX_RESULT_TABLE + benchmark.identifier
     sql = 'SELECT * FROM {} WHERE run_id = ?'.format(table_name)
     rs = con.execute(sql, (run_id, )).fetchone()
     assert rs['max_line'] == 'Welcome Alice!'
Ejemplo n.º 7
0
 def test_prepare_inputs_for_local_run(self, tmpdir):
     """Test copying input files for a local workflow run."""
     # Load template
     store = TemplateRepository(base_dir=str(tmpdir))
     template = store.add_template(src_dir=WORKFLOW_DIR,
                                   template_spec_file=SPEC_FILE)
     # Create run directory
     run_dir = os.path.join(str(tmpdir), 'run')
     os.makedirs(run_dir)
     # Copy input files to run directory
     backend.upload_files(
         template=template,
         files=template.workflow_spec.get('inputs', {}).get('files', []),
         arguments={
             'names':
             TemplateArgument(template.get_parameter('names'),
                              value=FileHandle(filepath=DATA_FILE))
         },
         loader=FileCopy(run_dir))
     # We should have the following files in the run directory:
     # code/helloworld.py
     # data/persons.txt
     # data/friends.txt
     assert os.path.isfile(os.path.join(run_dir, 'code', 'helloworld.py'))
     assert os.path.isfile(os.path.join(run_dir, 'code', 'script.sh'))
     assert os.path.isfile(os.path.join(run_dir, 'data', 'persons.txt'))
     assert os.path.isfile(os.path.join(run_dir, 'data', 'friends.txt'))
     assert not os.path.isfile(os.path.join(run_dir, 'code', 'dontcopy.me'))
     # data/persons.txt should contain Alice and Bob
     names = set()
     with open(os.path.join(run_dir, 'data', 'persons.txt'), 'r') as f:
         for line in f:
             names.add(line.strip())
     assert len(names) == 2
     assert 'Alice' in names
     assert 'Bob' in names
     # data/friends contains Jane Doe and Joe Bloggs
     friends = set()
     with open(os.path.join(run_dir, 'data', 'friends.txt'), 'r') as f:
         for line in f:
             friends.add(line.strip())
     assert len(friends) == 2
     assert 'Jane Doe' in friends
     assert 'Joe Bloggs' in friends
     # Error cases
     with pytest.raises(err.MissingArgumentError):
         backend.upload_files(template=template,
                              files=template.workflow_spec.get(
                                  'inputs', {}).get('files', []),
                              arguments={},
                              loader=FileCopy(run_dir))
     # Error when copying non-existing file
     template = store.add_template(src_dir=WORKFLOW_DIR,
                                   template_spec_file=SPEC_FILE)
     shutil.rmtree(run_dir)
     os.makedirs(run_dir)
     with pytest.raises(IOError):
         backend.upload_files(
             template=template,
             files=template.workflow_spec.get('inputs',
                                              {}).get('files', []),
             arguments={
                 'names':
                 TemplateArgument(
                     template.get_parameter('names'),
                     value=FileHandle(
                         filepath=os.path.join(str(tmpdir), 'no.file')))
             },
             loader=FileCopy(run_dir))
     assert not os.path.isdir(os.path.join(run_dir, 'data'))
     # If the constant value for the names parameter is removed the names
     # file is copied to the run directory and not to the data folder
     para = template.get_parameter('names')
     para.as_constant = None
     shutil.rmtree(run_dir)
     os.makedirs(run_dir)
     backend.upload_files(
         template=template,
         files=template.workflow_spec.get('inputs', {}).get('files', []),
         arguments={
             'names':
             TemplateArgument(parameter=para,
                              value=FileHandle(filepath=DATA_FILE))
         },
         loader=FileCopy(run_dir))
     # We should have the following files in the run directory:
     # code/helloworld.py
     # names.txt
     # data/friends.txt
     assert os.path.isfile(os.path.join(run_dir, 'code', 'helloworld.py'))
     assert os.path.isfile(os.path.join(run_dir, 'names.txt'))
     assert not os.path.isfile(os.path.join(run_dir, 'data', 'persons.txt'))
     assert os.path.isfile(os.path.join(run_dir, 'data', 'friends.txt'))
     # Template with input file parameter that is not of type file
     template = store.add_template(src_dir=WORKFLOW_DIR,
                                   template_spec_file=SPEC_FILE_ERR)
     shutil.rmtree(run_dir)
     os.makedirs(run_dir)
     # Copy input files to run directory
     with pytest.raises(err.InvalidTemplateError):
         backend.upload_files(
             template=template,
             files=template.workflow_spec.get('inputs',
                                              {}).get('files', []),
             arguments={
                 'sleeptime':
                 TemplateArgument(template.get_parameter('names'),
                                  value=FileHandle(filepath=DATA_FILE))
             },
             loader=FileCopy(run_dir))
Ejemplo n.º 8
0
class BenchmarkRepository(Auth):
    """The repository maintains benchmarks as well as the results of benchmark
    runs. The repository is a wrapper around two components:
    (1) the template repository to maintain workflow templates for for each
        benchmark, and
    (2) the database to store benchamrk informations (e.g., name, descritption)
        and information about result files for workflow runs.
    """
    def __init__(self, con, template_store=None):
        """Initialize the database connection and the template store.

        Parameters
        ----------
        con: DB-API 2.0 database connection
            Connection to underlying database
        template_store: benchtmpl.workflow.template.repo.TemplateRepository, optional
            Repository for workflow templates
        """
        super(BenchmarkRepository, self).__init__(con)
        if not template_store is None:
            self.template_store = template_store
        else:
            template_dir = config.get_template_dir()
            self.template_store = TemplateRepository(
                base_dir=template_dir,
                loader=BenchmarkTemplateLoader(),
                filenames=['benchmark', 'template', 'workflow']
            )

    def add_benchmark(
        self, name, description=None, instructions=None, src_dir=None,
        src_repo_url=None, template_spec_file=None
    ):
        """Add a benchmark to the repository. The associated workflow template
        is created in the template repository from either the given source
        directory or Git repository. The template repository will raise an
        error if neither or both arguments are given.

        Raises an error if the given benchmark name is not unique.

        Parameters
        ----------
        name: string
            Unique benchmark headline name
        description: string, optional
            Optional short description for display in benchmark listings
        instructions: string, optional
            Text containing detailed instructions for benchmark participants
        src_dir: string, optional
            Directory containing the benchmark components, i.e., the fixed
            files and the template specification (optional).
        src_repo_url: string, optional
            Git repository that contains the the benchmark components
        template_spec_file: string, optional
            Path to the workflow template specification file (absolute or
            relative to the workflow directory)

        Returns
        -------
        benchengine.benchmark.base.BenchmarkHandle

        Raises
        ------
        benchengine.error.ConstraintViolationError
        benchtmpl.error.InvalidParameterError
        benchtmpl.error.InvalidTemplateError
        ValueError
        """
        # Ensure that the benchmark name is not empty, not longer than 255
        # character and unique.
        if name is None:
            raise err.ConstraintViolationError('missing benchmark name')
        name = name.strip()
        if name == '' or len(name) > 255:
            raise err.ConstraintViolationError('invalid benchmark name')
        sql = 'SELECT id FROM benchmark WHERE name = ?'
        if not self.con.execute(sql, (name,)).fetchone() is None:
            raise err.ConstraintViolationError('benchmark \'{}\' exists'.format(name))
        # Create the workflow template in the associated template repository
        template = self.template_store.add_template(
            src_dir=src_dir,
            src_repo_url=src_repo_url,
            template_spec_file=template_spec_file
        )
        # Insert benchmark into database and return descriptor
        sql = 'INSERT INTO benchmark'
        sql += '(id, name, description, instructions) '
        sql += 'VALUES(?, ?, ?, ?)'
        self.con.execute(
            sql,
            (template.identifier, name, description, instructions)
        )
        self.con.commit()
        handle = BenchmarkHandle(
            con=self.con,
            template=template,
            name=name,
            description=description,
            instructions=instructions
        )
        handle.create_result_table()
        return handle

    def assert_benchmark_exists(self, benchmark_id):
        """Ensure that the benchmark with the given identifier exists. If no
        benchmark exists with that identifier an unknown benchmark error
        is raised.

        Parameters
        ----------
        benchmark_id: string
            Unique benchmark identifier

        Raises
        ------
        benchengine.error.UnknownBenchmarkError
        """
        sql = 'SELECT id FROM benchmark WHERE id = ?'
        if self.con.execute(sql, (benchmark_id,)).fetchone() is None:
            raise err.UnknownBenchmarkError(benchmark_id)

    def delete_benchmark(self, benchmark_id):
        """Delete the benchmark with the given identifier. Raises an exception
        if the given benchmark is unknown.

        Parameters
        ----------
        benchmark_id: string
            Unique benchmark identifier

        Raises
        ------
        benchengine.error.UnknownBenchmarkError
        """
        # Ensure that the benchmark exists. Raises error if it does not exist.
        self.assert_benchmark_exists(benchmark_id)
        # Delete the workflow handle
        self.template_store.delete_template(benchmark_id)
        # Delete the benchmark record
        sql = 'DELETE FROM benchmark WHERE id = ?'
        self.con.execute(sql, (benchmark_id,))
        self.con.commit()

    def get_benchmark(self, benchmark_id):
        """Get handle for the benchmark with the given identifier. Raises an
        error if no benchmark with the identifier exists.

        Parameters
        ----------
        benchmark_id: string
            Unique benchmark identifier

        Returns
        -------
        benchengine.benchmark.base.BenchmarkHandle

        Raises
        ------
        benchengine.error.UnknownBenchmarkError
        """
        # Get benchmark information from database. If the result is empty an
        # error is raised
        sql = 'SELECT id, name, description, instructions '
        sql += 'FROM benchmark '
        sql += 'WHERE id = ?'
        bmark = self.con.execute(sql, (benchmark_id,)).fetchone()
        if bmark is None:
            raise err.UnknownBenchmarkError(benchmark_id)
        # Get workflow template for template repository
        template = self.template_store.get_template(benchmark_id)
        # Return handle for benchmark
        return BenchmarkHandle(
            con=self.con,
            template=template,
            name=bmark['name'],
            description=bmark['description'],
            instructions=bmark['instructions']
        )

    def list_benchmarks(self):
        """Get a list of descriptors for all benchmarks in the repository.

        Returns
        -------
        list(benchengine.benchmark.base.BenchmarkDescriptor)
        """
        sql = 'SELECT id, name, description, instructions '
        sql += 'FROM benchmark '
        rs = self.con.execute(sql)
        result = list()
        for bmark in rs:
            result.append(
                BenchmarkDescriptor(
                    identifier=bmark['id'],
                    name=bmark['name'],
                    description=bmark['description'],
                    instructions=bmark['instructions']
                )
            )
        return result
Ejemplo n.º 9
0
 def test_run_helloworld(self, tmpdir):
     """Execute the helloworld example."""
     repo = TemplateRepository(base_dir=os.path.join(str(tmpdir), 'repo'))
     template = repo.add_template(src_dir=WORKFLOW_DIR,
                                  template_spec_file=TEMPLATE_FILE)
     arguments = {
         'names':
         TemplateArgument(parameter=template.get_parameter('names'),
                          value=FileHandle(DATA_FILE)),
         'sleeptime':
         TemplateArgument(parameter=template.get_parameter('sleeptime'),
                          value=3)
     }
     engine_dir = os.path.join(str(tmpdir), 'engine')
     engine = MultiProcessWorkflowEngine(base_dir=engine_dir,
                                         run_async=True)
     # Run workflow asyncronously
     run_id, _ = engine.execute(template, arguments)
     while engine.get_state(run_id).is_active():
         time.sleep(1)
     state = engine.get_state(run_id)
     self.validate_run_result(state)
     # Remove run resources (can be called multiple times as long as the
     # task is not active)
     engine.remove_run(run_id)
     engine.remove_run(run_id)
     # Getting the state of a removed run raises an error
     with pytest.raises(err.UnknownRunError):
         engine.get_state(run_id)
     # Cancel run
     arguments = {
         'names':
         TemplateArgument(parameter=template.get_parameter('names'),
                          value=FileHandle(DATA_FILE)),
         'sleeptime':
         TemplateArgument(parameter=template.get_parameter('sleeptime'),
                          value=30)
     }
     run_id, _ = engine.execute(template, arguments)
     while engine.get_state(run_id).is_active():
         # Removing resources will raise an error
         with pytest.raises(RuntimeError):
             engine.remove_run(run_id)
         # Cancel the run
         engine.cancel_run(run_id)
         break
     with pytest.raises(err.UnknownRunError):
         engine.get_state(run_id)
     # Remove run resources does not raise an error now
     engine.remove_run(run_id)
     # Run workflow syncronously
     arguments = {
         'names':
         TemplateArgument(parameter=template.get_parameter('names'),
                          value=FileHandle(DATA_FILE)),
         'sleeptime':
         TemplateArgument(parameter=template.get_parameter('sleeptime'),
                          value=1)
     }
     engine = MultiProcessWorkflowEngine(base_dir=engine_dir,
                                         run_async=False)
     sync_run_id, state = engine.execute(template, arguments)
     assert run_id != sync_run_id
     self.validate_run_result(state)
     self.validate_run_result(state)
     state = engine.get_state(sync_run_id)
     # Remove run resources (can be called multiple times as long as the
     # task is not active)
     engine.remove_run(run_id)
 def test_add_template(self, tmpdir):
     """Test creating templates."""
     store = TemplateRepository(base_dir=str(tmpdir))
     template = store.add_template(src_dir=WORKFLOW_DIR)
     self.validate_template_handle(template)
     # Ensure that the template handle has been serialized correctly
     f = os.path.join(store.base_dir, template.identifier, TEMPLATE_FILE)
     doc = DefaultTemplateLoader().load(f)
     d = os.path.join(store.base_dir, template.identifier, STATIC_FILES_DIR)
     assert os.path.isdir(d)
     # Get template and repeat tests
     self.validate_template_handle(store.get_template(template.identifier))
     store = TemplateRepository(base_dir=str(tmpdir))
     self.validate_template_handle(store.get_template(template.identifier))
     # Add template with JSON specification file
     template = store.add_template(src_dir=WORKFLOW_DIR,
                                   template_spec_file=JSON_SPEC)
     self.validate_template_handle(template)
     # Unknown template error
     with pytest.raises(err.UnknownTemplateError):
         store.get_template('unknown')
     # Errors when specifying wrong parameter combination
     with pytest.raises(ValueError):
         store.add_template()
     with pytest.raises(ValueError):
         store.add_template(src_dir=WORKFLOW_DIR, src_repo_url=WORKFLOW_DIR)
     # Load templates with erroneous specifications
     with pytest.raises(err.InvalidTemplateError):
         store.add_template(src_dir=WORKFLOW_DIR,
                            template_spec_file=ERR_SPEC)
     # Error when cloning invalid repository from GitHub
     with pytest.raises(err.InvalidTemplateError):
         store.add_template(
             src_repo_url='https://github.com/reanahub/reana-demo-helloworld'
         )