def config_and_controller_fixture(): """ Fixture to create config and processing controller objects with a workflow definition and processing block in the config DB. """ config = ska_sdp_config.Config() controller = processing_controller.ProcessingController() # Workflow definition workflow = {"image": WORKFLOW_IMAGE} # Processing block pb = ska_sdp_config.ProcessingBlock( id=PROCESSING_BLOCK_ID, sbi_id="test", workflow={ "type": WORKFLOW_TYPE, "id": WORKFLOW_ID, "version": WORKFLOW_VERSION, }, parameters={}, dependencies=[], ) for txn in config.txn(): txn.create_workflow(WORKFLOW_TYPE, WORKFLOW_ID, WORKFLOW_VERSION, workflow) txn.create_processing_block(pb) return config, controller
def main_loop(self, backend=None): """ Main event loop, executing three processes on a transaction, performing actions depending on the transaction state. :param backend: config DB backend to use """ # Connect to config DB LOG.info("Connecting to config DB") config = ska_sdp_config.Config(backend=backend) LOG.info("Starting main loop") for watcher in config.watcher(): # List processing blocks and deployments for txn in watcher.txn(): pb_ids = txn.list_processing_blocks() deploy_ids = txn.list_deployments() LOG.info("processing block ids %s", pb_ids) # Perform actions. self._start_new_pb_workflows(watcher, pb_ids) self._release_pbs_with_finished_dependencies(watcher, pb_ids) self._delete_deployments_without_pb(watcher, pb_ids, deploy_ids)
def main(): logging.basicConfig(level=logging.INFO) config = ska_sdp_config.Config() pb = get_pb(config, sys.argv[1]) deployment = create_deployment(config, pb) try: dlg_dim_ip = resolve_dim_host(deployment.deploy_id) common.run_processing_block(pb, lambda _: None, host=dlg_dim_ip, port=8001) idle_for_some_obscure_reason(config, pb) finally: cleanup(config, deployment)
def check_config_db(): """Check that the config DB has the configured PBs. Only run this step if the config DB is enabled. """ if ska_sdp_config is not None \ and SDPSubarray.is_feature_active('config_db'): filename = join(dirname(__file__), 'data', 'command_Configure.json') with open(filename, 'r') as file: config = json.load(file) config_db_client = ska_sdp_config.Config() for txn in config_db_client.txn(): pb_ids = txn.list_processing_blocks() for pb in config['processingBlocks']: assert pb['id'] in pb_ids
def main_loop(backend=None): """ Main loop of Helm controller. :param backend: for configuration database """ # Instantiate configuration client = ska_sdp_config.Config(backend=backend) # Configure Helm repositories for name, url in CHART_REPO_LIST: helm_invoke("repo", "add", name, url) # Get charts update_helm() next_chart_refresh = time.time() + CHART_REPO_REFRESH # Wait for something to happen for watcher in client.watcher(timeout=CHART_REPO_REFRESH): # Refresh charts? if time.time() > next_chart_refresh: update_helm() next_chart_refresh = time.time() + CHART_REPO_REFRESH # List deployments deploys = list_helm() for txn in watcher.txn(): target_deploys = txn.list_deployments() # Check for deployments we should delete for dpl_id in deploys: if dpl_id not in target_deploys: # Delete it delete_helm(dpl_id) # Check for deployments we should add for dpl_id in target_deploys: if dpl_id not in deploys: # Get details for txn in watcher.txn(): deploy = _get_deployment(txn, dpl_id) # If vanished or wrong type, ignore if deploy is None or deploy.type != "helm": continue # Create it create_helm(dpl_id, deploy)
def main(): logging.basicConfig(level=logging.INFO) config = ska_sdp_config.Config() pb = get_pb(config, sys.argv[1]) # Set state to indicate workflow is waiting for resources logger.info("Setting status to WAITING") for txn in config.txn(): state = txn.get_processing_block_state(pb.id) state["status"] = "WAITING" txn.update_processing_block_state(pb.id, state) # Wait for resources_available to be true logger.info("Waiting for resources to be available") for txn in config.txn(): state = txn.get_processing_block_state(pb.id) ra = state.get("resources_available") if ra is not None and ra: break txn.loop(wait=True) # Set state to indicate workflow is running logger.info("Setting status to RUNNING") for txn in config.txn(): state = txn.get_processing_block_state(pb.id) state["status"] = "RUNNING" txn.update_processing_block_state(pb.id, state) deployment = create_deployment(config, pb) try: dlg_dim_ip = resolve_dim_host(deployment.id) common.run_processing_block(pb, lambda _: None, host=dlg_dim_ip, port=8001) idle_for_some_obscure_reason(config, pb) finally: cleanup(config, deployment) # Set state to indicate processing has ended logger.info("Setting status to FINISHED") for txn in config.txn(): state = txn.get_processing_block_state(pb.id) state["status"] = "FINISHED" txn.update_processing_block_state(pb.id, state)
def init_device(self): """Initialise the device.""" # SKASubarray.init_device(self) Device.init_device(self) self.set_state(DevState.INIT) LOG.info('Initialising SDP Subarray: %s', self.get_name()) # Initialise attributes self._set_obs_state(ObsState.IDLE) self._set_admin_mode(AdminMode.ONLINE) self._set_health_state(HealthState.OK) self._set_receive_addresses(None) # Initialise instance variables self._sbi_id = None self._pb_realtime = [] self._pb_batch = [] self._cbf_outlink_address = None self._pb_receive_addresses = None if ska_sdp_config is not None \ and self.is_feature_active(FeatureToggle.CONFIG_DB): self._config_db_client = ska_sdp_config.Config() LOG.debug('SDP Config DB enabled') else: self._config_db_client = None LOG.warning( 'SDP Config DB disabled %s', '(ska_sdp_config package not found)' if ska_sdp_config is None else 'by feature toggle') if self.is_feature_active(FeatureToggle.CBF_OUTPUT_LINK): LOG.debug('CBF output link enabled') else: LOG.debug('CBF output link disabled') # The subarray device is initialised in the OFF state. self.set_state(DevState.OFF) LOG.info('SDP Subarray initialised: %s', self.get_name())
def cmd(args, path, value, workflow, parameters): """Execute command.""" # Get configuration client, start transaction import ska_sdp_config prefix = ('' if args['--prefix'] is None else args['--prefix']) cfg = ska_sdp_config.Config(global_prefix=prefix) try: for txn in cfg.txn(): if args['ls'] or args['list']: cmd_list(txn, path, args) elif args['watch'] or args['get']: cmd_get(txn, path, args) elif args['create']: cmd_create(txn, path, value, args) elif args['edit']: cmd_edit(txn, path) elif args['update']: cmd_update(txn, path, value, args) elif args['delete']: cmd_delete(txn, path, args) elif args['process']: pb_id = cmd_create_pb(txn, workflow, parameters, args) elif args['deploy']: cmd_deploy(txn, args['<type>'], args['<name>'], parameters) if args['watch']: txn.loop(wait=True) # Possibly give feedback after transaction has concluded if not args['--quiet']: if args['create'] or args['update'] or args['delete'] or \ args['edit']: print("OK") if args['process']: print("OK, pb_id = {}".format(pb_id)) except KeyboardInterrupt: if not args['watch']: raise
def main(): logging.basicConfig(level=logging.INFO) config = ska_sdp_config.Config() pb = get_pb(config) deployment = create_deployment(config, pb) try: logger.info("Contacting DIM at %s", DLG_DIM_HOST) tries = 1 max_tries = 200 for _ in range(max_tries): try: common.run_processing_block(pb, lambda _: None, host=DLG_DIM_HOST, port=DLG_DIM_PORT) break except: logger.exception("Error while running, trying again (%d/%d)", tries, max_tries) tries += 1 time.sleep(1) idle_for_some_obscure_reason(config, pb) finally: cleanup(config, deployment)
""" Example PSS Receive workflow """ # pylint: disable=C0103 import logging import ska_sdp_config import sys # Initialise logging and configuration logging.basicConfig() log = logging.getLogger('pss_recv') log.setLevel(logging.INFO) config = ska_sdp_config.Config() def main(argv): pb_id = argv[0] for txn in config.txn(): txn.take_processing_block(pb_id, config.client_lease) pb = txn.get_processing_block(pb_id) # Show log.info("Claimed processing block %s", pb) # Deploy PSS Receive with 1 worker. log.info("Deploying PSS Receive...") deploy_id = pb.pb_id + "-pss-receive" deploy = ska_sdp_config.Deployment( deploy_id,
def main(argv): """Workflow main function.""" pb_id = argv[0] config = ska_sdp_config.Config() for txn in config.txn(): txn.take_processing_block(pb_id, config.client_lease) pb = txn.get_processing_block(pb_id) LOG.info("Claimed processing block %s", pb_id) # Parse parameters n_workers = pb.parameters.get("n_workers", 1) buffers = [b.get("name") for b in pb.parameters.get("buffers", [])] secrets = [pb.parameters.get("service_account", {}).get("secret")] # Set state to indicate workflow is waiting for resources LOG.info("Setting status to WAITING") for txn in config.txn(): state = txn.get_processing_block_state(pb_id) state["status"] = "WAITING" txn.update_processing_block_state(pb_id, state) # Wait for resources_available to be true LOG.info("Waiting for resources to be available") for txn in config.txn(): state = txn.get_processing_block_state(pb_id) ra = state.get("resources_available") if ra is not None and ra: LOG.info("Resources are available") break txn.loop(wait=True) # Set state to indicate workflow is running for txn in config.txn(): state = txn.get_processing_block_state(pb_id) state["status"] = "RUNNING" txn.update_processing_block_state(pb_id, state) # Deploy Dask EE LOG.info("Deploying Dask EE") image = "artefact.skao.int/ska-sdp-wflow-delivery:{}".format( pb.workflow.get("version") ) deploy_id, client = ee_dask_deploy( config, pb.id, image, n_workers=n_workers, buffers=buffers, secrets=secrets ) # Run delivery function LOG.info("Starting delivery") deliver(client, pb.parameters) LOG.info("Finished delivery") # Remove Dask EE deployment LOG.info("Removing Dask EE deployment") ee_dask_remove(config, deploy_id) # Set state to indicate processing is finished for txn in config.txn(): state = txn.get_processing_block_state(pb_id) state["status"] = "FINISHED" txn.update_processing_block_state(pb_id, state) config.close()
name: sleeper type: kubernetes-direct """ # pylint: disable=C0103 import logging import ska_sdp_config import sys logging.basicConfig() log = logging.getLogger('testdeploy') log.setLevel(logging.INFO) # Instantiate configuration client = ska_sdp_config.Config() def make_deployment(dpl_name, dpl_args, pb_id): """Make a deployment given PB parameters.""" return ska_sdp_config.Deployment(pb_id + "-" + dpl_name, **dpl_args) def main(argv): pb_id = argv[0] for txn in client.txn(): pb = txn.get_processing_block(pb_id) txn.take_processing_block(pb_id, client.client_lease) # Show log.info("Claimed processing block %s", pb)
def main(): """Main loop.""" # Get environment variables to pass to workflow containers. values_env = get_environment_variables( ['SDP_CONFIG_HOST', 'SDP_HELM_NAMESPACE']) # Fetch workflow definitions. workflows_version, workflows_realtime, workflows_batch = \ update_workflow_definition(WORKFLOWS_URL, WORKFLOWS_SCHEMA) next_workflows_refresh = time.time() + WORKFLOWS_REFRESH # Connect to configuration database. client = ska_sdp_config.Config() LOG.debug("Starting main loop...") for txn in client.txn(): # Update workflow definitions if it is time to do so. if time.time() >= next_workflows_refresh: LOG.debug('Updating workflow definitions') workflows_version, workflows_realtime, workflows_batch = \ update_workflow_definition(WORKFLOWS_URL, WORKFLOWS_SCHEMA) next_workflows_refresh = time.time() + WORKFLOWS_REFRESH # Get lists of processing blocks and deployments. current_pbs = txn.list_processing_blocks() current_deployments = txn.list_deployments() # Make list of current PBs with deployments, inferred from the deployment IDs. current_pbs_with_deployment = list( set(map(get_pb_id_from_deploy_id, current_deployments))) LOG.debug("Current PBs: {}".format(current_pbs)) LOG.debug("Current deployments: {}".format(current_deployments)) LOG.debug("Current PBs with deployment: {}".format( current_pbs_with_deployment)) # Delete deployments not associated with processing blocks. for deploy_id in current_deployments: # Get ID of associated processing block by taking prefix of deployment ID. pb_id = get_pb_id_from_deploy_id(deploy_id) if pb_id not in current_pbs: LOG.info("Deleting deployment {}".format(deploy_id)) deploy = txn.get_deployment(deploy_id) txn.delete_deployment(deploy) # Deploy workflow for processing blocks without deployments. for pb_id in current_pbs: if pb_id in current_pbs_with_deployment: continue pb = txn.get_processing_block(pb_id) wf_type = pb.workflow['type'] wf_id = pb.workflow['id'] wf_version = pb.workflow['version'] LOG.info( "PB {} has no deployment (workflow type = {}, ID = {}, version = {})" "".format(pb_id, wf_type, wf_id, wf_version)) if wf_type == "realtime": if (wf_id, wf_version) in workflows_realtime: LOG.info( "Deploying realtime workflow ID = {}, version = {}" "".format(wf_id, wf_version)) wf_image = workflows_realtime[(wf_id, wf_version)] deploy_id = "{}-workflow".format(pb_id) # Values to pass to workflow Helm chart. # Copy environment variable values and add argument values. values = dict(values_env) values['wf_image'] = wf_image values['pb_id'] = pb_id deploy = ska_sdp_config.Deployment(deploy_id, 'helm', { 'chart': 'workflow', 'values': values }) LOG.info("Creating deployment {}".format(deploy_id)) txn.create_deployment(deploy) else: # Unknown realtime workflow ID and version. LOG.error("Workflow ID = {} version = {} is not supported". format(wf_id, wf_version)) elif wf_type == "batch": LOG.warning("Batch workflows are not supported at present") else: LOG.error("Unknown workflow type: {}".format(wf_type)) LOG.debug("Waiting...") txn.loop(wait=True, timeout=next_workflows_refresh - time.time())
""" This module contains the "model" layer. This includes configuration database interactions and building data structures for display. """ import json import sys from typing import Dict, List import ska_sdp_config DELIMITER = ":" cfg = ska_sdp_config.Config( backend="memory" if "pytest" in sys.modules else "etcd3" ) def _combine_key(parent: str, key: str) -> str: return DELIMITER.join((parent, key)) def _clean_key(key: str) -> str: k = key[1:] if key.startswith("/") else key return k.replace("/", DELIMITER) def _to_node(key: str, parent: str, text: str) -> Dict: return {"id": _clean_key(key), "parent": _clean_key(parent), "text": text}
def cfg(): host = os.getenv("SDP_TEST_HOST", "127.0.0.1") with ska_sdp_config.Config(global_prefix=PREFIX, host=host) as cfg: cfg._backend.delete(PREFIX, must_exist=False, recursive=True) yield cfg cfg._backend.delete(PREFIX, must_exist=False, recursive=True)
def main(): """Main loop of Helm controller.""" # Instantiate configuration client = ska_sdp_config.Config() # TODO: Service lease + leader election # Obtain charts update_chart_repos() next_chart_refresh = time.time() + CHART_REPO_REFRESH # Load Helm repository helm_invoke("init", "--client-only") helm_invoke("repo", "update") # Show log.info("Loading helm deployments...") # Query helm for active deployments. Filter for active ones. deploys = helm_invoke('list', '-q', '--namespace', NAMESPACE).split('\n') deploys = set(deploys).difference(set([''])) log.info("Found {} existing deployments.".format(len(deploys))) # Wait for something to happen for txn in client.txn(): # Refresh charts? if time.time() > next_chart_refresh: next_chart_refresh = time.time() + CHART_REPO_REFRESH try: helm_invoke("repo", "update") except subprocess.CalledProcessError as e: log.error("Could not refresh global chart repository!") try: update_chart_repos() except subprocess.CalledProcessError as e: log.error("Could not refresh chart repository!") # List deployments target_deploys = txn.list_deployments() # Check for deployments that we should delete for dpl_id in list(deploys): if dpl_id not in target_deploys: if delete_helm(txn, dpl_id): deploys.remove(dpl_id) # Check for deployments we should add for dpl_id in target_deploys: if dpl_id not in deploys: # Get details try: deploy = txn.get_deployment(dpl_id) except ValueError as e: log.warning("Deployment {} failed validation: {}!".format( dpl_id, str(e))) continue # Right type? if deploy.type != 'helm': continue # Create it if create_helm(txn, dpl_id, deploy): deploys.add(dpl_id) # Loop around, wait if we made no change timeout = next_chart_refresh txn.loop(wait=True, timeout=next_chart_refresh - time.time())
def new_config_db(): """Return an SDP configuration client (factory function).""" backend = "etcd3" if FEATURE_CONFIG_DB.is_active() else "memory" LOG.info("Using config DB %s backend", backend) config_db = ska_sdp_config.Config(backend=backend) return config_db
def main(argv): """Main function.""" pb_id = argv[0] config = ska_sdp_config.Config() for txn in config.txn(): txn.take_processing_block(pb_id, config.client_lease) pb = txn.get_processing_block(pb_id) LOG.info("Claimed processing block %s", pb_id) # Parse parameters: these ones are needed for deploying the Dask EE n_workers = pb.parameters.get("n_workers", 2) buffer_vis = pb.parameters.get("buffer_vis") if buffer_vis is None: buffer_vis = "buff-{}-vis".format(pb.id) pb.parameters["buffer_vis"] = buffer_vis buffer_img = pb.parameters.get("buffer_img") if buffer_img is None: buffer_img = "buff-{}-img".format(pb.id) pb.parameters["buffer_img"] = buffer_img # Set state to indicate workflow is waiting for resources for txn in config.txn(): state = txn.get_processing_block_state(pb_id) state["status"] = "WAITING" txn.update_processing_block_state(pb_id, state) # Wait for resources_available to be true LOG.info("Waiting for resources to be available") for txn in config.txn(): state = txn.get_processing_block_state(pb_id) ra = state.get("resources_available") if ra is not None and ra: LOG.info("Resources are available") break txn.loop(wait=True) # Set state to indicate workflow is running for txn in config.txn(): state = txn.get_processing_block_state(pb_id) state["status"] = "RUNNING" txn.update_processing_block_state(pb_id, state) # Create buffer reservations for visibilities and images. LOG.info("Creating buffer reservations") buffer_create(config, buffer_vis) buffer_create(config, buffer_img) # Deploy Dask EE LOG.info("Deploying Dask EE") image = "artefact.skao.int/ska-sdp-wflow-batch-imaging:{}".format( pb.workflow["version"]) buffers = [buffer_vis, buffer_img] deploy_id, client = ee_dask_deploy(config, pb.id, image, n_workers=n_workers, buffers=buffers) # Run simulation and ICAL pipelines rascil_workflows.set_client(client) LOG.info("Running simulation pipeline") rascil_workflows.simulate(pb.parameters) LOG.info("Running ICAL pipeline") rascil_workflows.ical(pb.parameters) rascil_workflows.close_client() LOG.info("Finished processing") # Remove Dask EE deployment LOG.info("Removing Dask EE deployment") ee_dask_remove(config, deploy_id) # Set state to indicate processing is finished for txn in config.txn(): state = txn.get_processing_block_state(pb_id) state["status"] = "FINISHED" txn.update_processing_block_state(pb_id, state) config.close()