def test_sftp_volume(mock_ssh, tmpdir): """Test generating the SFTP storage volume.""" basedir = str(tmpdir) doc = Sftp(remotedir=basedir, hostname='myhost', identifier='SFTPVolume') fs = Volume(doc) assert isinstance(fs, RemoteStorage) assert fs.identifier == 'SFTPVolume'
def __init__(self, service: APIFactory, fs: Optional[StorageVolume] = None, config: Optional[Dict] = None): """Initialize the workflow engine. The engine configuration that is maintained with the service API can be overriden by providing a separate configuration object. Parameters ---------- service: flowserv.service.api.APIFactory, default=None API factory for service callback during asynchronous workflow execution. fs: flowserv.volume.base.StorageVolume, default=None Storage volume for run files. config: dict, default=None Configuration settings for the engine. Overrides the engine configuration that is contained in the service API object. """ self.service = service self.fs = fs if fs else Volume(doc=service.get(FLOWSERV_FILESTORE)) self.config = config if config else ENGINECONFIG(env=service, validate=True) logging.info("config {}".format(self.config)) # The is_async flag controls the default setting for asynchronous # execution. If the flag is False all workflow steps will be executed # in a sequential (blocking) manner. self.is_async = service.get(FLOWSERV_ASYNC) # Directory for temporary run files. self.runsdir = RUNSDIR(env=service) # Dictionary of all running tasks self.tasks = dict() # Lock to manage asynchronous access to the task dictionary self.lock = Lock()
def callback_function(result, lock, tasks, service): """Callback function for executed tasks.Removes the task from the task index and updates the run state in the underlying database. Parameters ---------- result: (string, dict) Tuple of task identifier and serialized state of the workflow run lock: multiprocessing.Lock Lock for concurrency control tasks: dict Task index of the backend service: contextlib.contextmanager Context manager to create an instance of the service API. """ run_id, runstore, state_dict = result logging.info('finished run {} with {}'.format(run_id, state_dict)) with lock: if run_id in tasks: # Close the pool and remove the entry from the task index pool, _ = tasks[run_id] pool.close() del tasks[run_id] state = serialize.deserialize_state(state_dict) try: with service() as api: api.runs().update_run(run_id=run_id, state=state, runstore=Volume(doc=runstore)) except Exception as ex: logging.error(ex, exc_info=True) logging.debug('\n'.join(util.stacktrace(ex)))
def test_fs_volume(tmpdir): """Test instantiating the file system storage volume.""" basedir = str(tmpdir) doc = FStore(basedir=basedir, identifier='FS') fs = Volume(doc) assert isinstance(fs, FileSystemStorage) assert fs.basedir == basedir assert fs.identifier == 'FS'
def get(self, identifier: str) -> StorageVolume: """Get the instance for the storage volume with the given identifier. Paramaters ---------- identifier: str Unique storage volume identifier. Returns ------- flowserv.volume.base.StorageVolume """ # Create storage volume instance from specification if it has not been # accessed yet. if identifier not in self._stores: self._stores[identifier] = Volume(self._storespecs[identifier]) return self._stores[identifier]
def __init__(self, env: Optional[Dict] = None, db: Optional[DB] = None, engine: Optional[WorkflowController] = None, user_id: Optional[str] = None): """Initialize the API factory from a given set of configuration parameters and their values. If the configuration dictionary is not provided the current values from the respective environment variables are used. The option to initialize the associated database and workflow engine is promarily intended for test purposes. Parameters ---------- env: dict, default=None Dictionary that provides access to configuration parameter values. db: flowserv.model.database.DB, default=None Optional default database. engine: flowserv.controller.base.WorkflowController, default=None Optional workflow controller (for test purposes). user_id: string, default=None Optional identifier for the authenticated API user. """ # Use the current environment settings if the configuration dictionary # is not given. env = env if env is not None else config.env() super(LocalAPIFactory, self).__init__(env) # Ensure that the base directory is set and exists. self[BASEDIR] = self.get(BASEDIR, config.API_DEFAULTDIR()) os.makedirs(self[BASEDIR], exist_ok=True) # Initialize that database. self._db = db if db is not None else init_db(self) # Initialize the workflow engine. self._engine = engine if engine is not None else init_backend(self) # Initialize the file store. self._fs = Volume(doc=self.get(config.FLOWSERV_FILESTORE)) # Ensure that the authentication policy identifier is set. self[AUTH] = self.get(AUTH, config.AUTH_OPEN) # Authenticated default user. The initial value depends on the given # value for the user_id or authentication policy. self._user_id = config.DEFAULT_USER if not user_id and self[ AUTH] == config.AUTH_OPEN else user_id
def test_s3_volume(mock_boto): """Test instantiating the S3 storage volume.""" doc = S3Bucket(bucket='0000', identifier='S3') fs = Volume(doc) assert isinstance(fs, S3Volume) assert fs.identifier == 'S3'
def test_gc_volume(mock_gcstore): """Test instantiating the Google Cloud storage volume.""" doc = GCBucket(bucket='0000', identifier='GC') fs = Volume(doc) assert isinstance(fs, GCVolume) assert fs.identifier == 'GC'
def test_invalid_storage_type(): """Test error when providing an invalid storage volume type identifier.""" with pytest.raises(err.InvalidConfigurationError): Volume({'type': 'unknown'})