Esempio n. 1
0
def config_and_controller_fixture():
    """
    Fixture to create config and processing controller objects with a workflow
    definition and processing block in the config DB.
    """
    config = ska_sdp_config.Config()
    controller = processing_controller.ProcessingController()

    # Workflow definition
    workflow = {"image": WORKFLOW_IMAGE}

    # Processing block
    pb = ska_sdp_config.ProcessingBlock(
        id=PROCESSING_BLOCK_ID,
        sbi_id="test",
        workflow={
            "type": WORKFLOW_TYPE,
            "id": WORKFLOW_ID,
            "version": WORKFLOW_VERSION,
        },
        parameters={},
        dependencies=[],
    )

    for txn in config.txn():
        txn.create_workflow(WORKFLOW_TYPE, WORKFLOW_ID, WORKFLOW_VERSION,
                            workflow)
        txn.create_processing_block(pb)

    return config, controller
    def main_loop(self, backend=None):
        """
        Main event loop, executing three processes on a transaction,
        performing actions depending on the transaction state.

        :param backend: config DB backend to use

        """
        # Connect to config DB
        LOG.info("Connecting to config DB")
        config = ska_sdp_config.Config(backend=backend)

        LOG.info("Starting main loop")
        for watcher in config.watcher():

            # List processing blocks and deployments
            for txn in watcher.txn():
                pb_ids = txn.list_processing_blocks()
                deploy_ids = txn.list_deployments()
                LOG.info("processing block ids %s", pb_ids)

            # Perform actions.
            self._start_new_pb_workflows(watcher, pb_ids)
            self._release_pbs_with_finished_dependencies(watcher, pb_ids)
            self._delete_deployments_without_pb(watcher, pb_ids, deploy_ids)
Esempio n. 3
0
def main():
    logging.basicConfig(level=logging.INFO)
    config = ska_sdp_config.Config()
    pb = get_pb(config, sys.argv[1])
    deployment = create_deployment(config, pb)
    try:
        dlg_dim_ip = resolve_dim_host(deployment.deploy_id)
        common.run_processing_block(pb,
                                    lambda _: None,
                                    host=dlg_dim_ip,
                                    port=8001)
        idle_for_some_obscure_reason(config, pb)
    finally:
        cleanup(config, deployment)
def check_config_db():
    """Check that the config DB has the configured PBs.

    Only run this step if the config DB is enabled.
    """
    if ska_sdp_config is not None \
            and SDPSubarray.is_feature_active('config_db'):
        filename = join(dirname(__file__), 'data', 'command_Configure.json')
        with open(filename, 'r') as file:
            config = json.load(file)
        config_db_client = ska_sdp_config.Config()
        for txn in config_db_client.txn():
            pb_ids = txn.list_processing_blocks()
        for pb in config['processingBlocks']:
            assert pb['id'] in pb_ids
Esempio n. 5
0
def main_loop(backend=None):
    """
    Main loop of Helm controller.

    :param backend: for configuration database

    """
    # Instantiate configuration
    client = ska_sdp_config.Config(backend=backend)

    # Configure Helm repositories
    for name, url in CHART_REPO_LIST:
        helm_invoke("repo", "add", name, url)

    # Get charts
    update_helm()
    next_chart_refresh = time.time() + CHART_REPO_REFRESH

    # Wait for something to happen
    for watcher in client.watcher(timeout=CHART_REPO_REFRESH):

        # Refresh charts?
        if time.time() > next_chart_refresh:
            update_helm()
            next_chart_refresh = time.time() + CHART_REPO_REFRESH

        # List deployments
        deploys = list_helm()
        for txn in watcher.txn():
            target_deploys = txn.list_deployments()

        # Check for deployments we should delete
        for dpl_id in deploys:
            if dpl_id not in target_deploys:
                # Delete it
                delete_helm(dpl_id)

        # Check for deployments we should add
        for dpl_id in target_deploys:
            if dpl_id not in deploys:
                # Get details
                for txn in watcher.txn():
                    deploy = _get_deployment(txn, dpl_id)
                # If vanished or wrong type, ignore
                if deploy is None or deploy.type != "helm":
                    continue
                # Create it
                create_helm(dpl_id, deploy)
def main():
    logging.basicConfig(level=logging.INFO)
    config = ska_sdp_config.Config()
    pb = get_pb(config, sys.argv[1])

    # Set state to indicate workflow is waiting for resources
    logger.info("Setting status to WAITING")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb.id)
        state["status"] = "WAITING"
        txn.update_processing_block_state(pb.id, state)

    # Wait for resources_available to be true
    logger.info("Waiting for resources to be available")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb.id)
        ra = state.get("resources_available")
        if ra is not None and ra:
            break
        txn.loop(wait=True)

    # Set state to indicate workflow is running
    logger.info("Setting status to RUNNING")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb.id)
        state["status"] = "RUNNING"
        txn.update_processing_block_state(pb.id, state)

    deployment = create_deployment(config, pb)
    try:
        dlg_dim_ip = resolve_dim_host(deployment.id)
        common.run_processing_block(pb,
                                    lambda _: None,
                                    host=dlg_dim_ip,
                                    port=8001)
        idle_for_some_obscure_reason(config, pb)
    finally:
        cleanup(config, deployment)

    # Set state to indicate processing has ended
    logger.info("Setting status to FINISHED")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb.id)
        state["status"] = "FINISHED"
        txn.update_processing_block_state(pb.id, state)
Esempio n. 7
0
    def init_device(self):
        """Initialise the device."""
        # SKASubarray.init_device(self)
        Device.init_device(self)

        self.set_state(DevState.INIT)
        LOG.info('Initialising SDP Subarray: %s', self.get_name())

        # Initialise attributes
        self._set_obs_state(ObsState.IDLE)
        self._set_admin_mode(AdminMode.ONLINE)
        self._set_health_state(HealthState.OK)
        self._set_receive_addresses(None)

        # Initialise instance variables
        self._sbi_id = None
        self._pb_realtime = []
        self._pb_batch = []
        self._cbf_outlink_address = None
        self._pb_receive_addresses = None

        if ska_sdp_config is not None \
                and self.is_feature_active(FeatureToggle.CONFIG_DB):
            self._config_db_client = ska_sdp_config.Config()
            LOG.debug('SDP Config DB enabled')
        else:
            self._config_db_client = None
            LOG.warning(
                'SDP Config DB disabled %s',
                '(ska_sdp_config package not found)'
                if ska_sdp_config is None else 'by feature toggle')

        if self.is_feature_active(FeatureToggle.CBF_OUTPUT_LINK):
            LOG.debug('CBF output link enabled')
        else:
            LOG.debug('CBF output link disabled')

        # The subarray device is initialised in the OFF state.
        self.set_state(DevState.OFF)
        LOG.info('SDP Subarray initialised: %s', self.get_name())
Esempio n. 8
0
def cmd(args, path, value, workflow, parameters):
    """Execute command."""
    # Get configuration client, start transaction
    import ska_sdp_config
    prefix = ('' if args['--prefix'] is None else args['--prefix'])
    cfg = ska_sdp_config.Config(global_prefix=prefix)
    try:
        for txn in cfg.txn():
            if args['ls'] or args['list']:
                cmd_list(txn, path, args)
            elif args['watch'] or args['get']:
                cmd_get(txn, path, args)
            elif args['create']:
                cmd_create(txn, path, value, args)
            elif args['edit']:
                cmd_edit(txn, path)
            elif args['update']:
                cmd_update(txn, path, value, args)
            elif args['delete']:
                cmd_delete(txn, path, args)
            elif args['process']:
                pb_id = cmd_create_pb(txn, workflow, parameters, args)
            elif args['deploy']:
                cmd_deploy(txn, args['<type>'], args['<name>'], parameters)
            if args['watch']:
                txn.loop(wait=True)

        # Possibly give feedback after transaction has concluded
        if not args['--quiet']:
            if args['create'] or args['update'] or args['delete'] or \
               args['edit']:
                print("OK")
            if args['process']:
                print("OK, pb_id = {}".format(pb_id))

    except KeyboardInterrupt:
        if not args['watch']:
            raise
Esempio n. 9
0
def main():
    logging.basicConfig(level=logging.INFO)
    config = ska_sdp_config.Config()
    pb = get_pb(config)
    deployment = create_deployment(config, pb)
    try:
        logger.info("Contacting DIM at %s", DLG_DIM_HOST)
        tries = 1
        max_tries = 200
        for _ in range(max_tries):
            try:
                common.run_processing_block(pb,
                                            lambda _: None,
                                            host=DLG_DIM_HOST,
                                            port=DLG_DIM_PORT)
                break
            except:
                logger.exception("Error while running, trying again (%d/%d)",
                                 tries, max_tries)
                tries += 1
                time.sleep(1)
        idle_for_some_obscure_reason(config, pb)
    finally:
        cleanup(config, deployment)
Esempio n. 10
0
"""
Example PSS Receive workflow
"""

# pylint: disable=C0103

import logging
import ska_sdp_config
import sys

# Initialise logging and configuration
logging.basicConfig()
log = logging.getLogger('pss_recv')
log.setLevel(logging.INFO)
config = ska_sdp_config.Config()


def main(argv):
    pb_id = argv[0]
    for txn in config.txn():
        txn.take_processing_block(pb_id, config.client_lease)
        pb = txn.get_processing_block(pb_id)

    # Show
    log.info("Claimed processing block %s", pb)

    # Deploy PSS Receive with 1 worker.
    log.info("Deploying PSS Receive...")
    deploy_id = pb.pb_id + "-pss-receive"
    deploy = ska_sdp_config.Deployment(
        deploy_id,
def main(argv):
    """Workflow main function."""
    pb_id = argv[0]

    config = ska_sdp_config.Config()

    for txn in config.txn():
        txn.take_processing_block(pb_id, config.client_lease)
        pb = txn.get_processing_block(pb_id)
    LOG.info("Claimed processing block %s", pb_id)

    # Parse parameters
    n_workers = pb.parameters.get("n_workers", 1)
    buffers = [b.get("name") for b in pb.parameters.get("buffers", [])]
    secrets = [pb.parameters.get("service_account", {}).get("secret")]

    # Set state to indicate workflow is waiting for resources
    LOG.info("Setting status to WAITING")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        state["status"] = "WAITING"
        txn.update_processing_block_state(pb_id, state)

    # Wait for resources_available to be true
    LOG.info("Waiting for resources to be available")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        ra = state.get("resources_available")
        if ra is not None and ra:
            LOG.info("Resources are available")
            break
        txn.loop(wait=True)

    # Set state to indicate workflow is running
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        state["status"] = "RUNNING"
        txn.update_processing_block_state(pb_id, state)

    # Deploy Dask EE
    LOG.info("Deploying Dask EE")
    image = "artefact.skao.int/ska-sdp-wflow-delivery:{}".format(
        pb.workflow.get("version")
    )
    deploy_id, client = ee_dask_deploy(
        config, pb.id, image, n_workers=n_workers, buffers=buffers, secrets=secrets
    )

    # Run delivery function
    LOG.info("Starting delivery")
    deliver(client, pb.parameters)
    LOG.info("Finished delivery")

    # Remove Dask EE deployment
    LOG.info("Removing Dask EE deployment")
    ee_dask_remove(config, deploy_id)

    # Set state to indicate processing is finished
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        state["status"] = "FINISHED"
        txn.update_processing_block_state(pb_id, state)

    config.close()
Esempio n. 12
0
          name: sleeper
    type: kubernetes-direct
"""

# pylint: disable=C0103

import logging
import ska_sdp_config
import sys

logging.basicConfig()
log = logging.getLogger('testdeploy')
log.setLevel(logging.INFO)

# Instantiate configuration
client = ska_sdp_config.Config()


def make_deployment(dpl_name, dpl_args, pb_id):
    """Make a deployment given PB parameters."""
    return ska_sdp_config.Deployment(pb_id + "-" + dpl_name, **dpl_args)


def main(argv):
    pb_id = argv[0]
    for txn in client.txn():
        pb = txn.get_processing_block(pb_id)
        txn.take_processing_block(pb_id, client.client_lease)

    # Show
    log.info("Claimed processing block %s", pb)
Esempio n. 13
0
def main():
    """Main loop."""

    # Get environment variables to pass to workflow containers.
    values_env = get_environment_variables(
        ['SDP_CONFIG_HOST', 'SDP_HELM_NAMESPACE'])

    # Fetch workflow definitions.
    workflows_version, workflows_realtime, workflows_batch = \
        update_workflow_definition(WORKFLOWS_URL, WORKFLOWS_SCHEMA)
    next_workflows_refresh = time.time() + WORKFLOWS_REFRESH

    # Connect to configuration database.
    client = ska_sdp_config.Config()

    LOG.debug("Starting main loop...")
    for txn in client.txn():

        # Update workflow definitions if it is time to do so.

        if time.time() >= next_workflows_refresh:
            LOG.debug('Updating workflow definitions')
            workflows_version, workflows_realtime, workflows_batch = \
                update_workflow_definition(WORKFLOWS_URL, WORKFLOWS_SCHEMA)
            next_workflows_refresh = time.time() + WORKFLOWS_REFRESH

        # Get lists of processing blocks and deployments.

        current_pbs = txn.list_processing_blocks()
        current_deployments = txn.list_deployments()

        # Make list of current PBs with deployments, inferred from the deployment IDs.

        current_pbs_with_deployment = list(
            set(map(get_pb_id_from_deploy_id, current_deployments)))

        LOG.debug("Current PBs: {}".format(current_pbs))
        LOG.debug("Current deployments: {}".format(current_deployments))
        LOG.debug("Current PBs with deployment: {}".format(
            current_pbs_with_deployment))

        # Delete deployments not associated with processing blocks.

        for deploy_id in current_deployments:
            # Get ID of associated processing block by taking prefix of deployment ID.
            pb_id = get_pb_id_from_deploy_id(deploy_id)
            if pb_id not in current_pbs:
                LOG.info("Deleting deployment {}".format(deploy_id))
                deploy = txn.get_deployment(deploy_id)
                txn.delete_deployment(deploy)

        # Deploy workflow for processing blocks without deployments.

        for pb_id in current_pbs:
            if pb_id in current_pbs_with_deployment:
                continue
            pb = txn.get_processing_block(pb_id)
            wf_type = pb.workflow['type']
            wf_id = pb.workflow['id']
            wf_version = pb.workflow['version']
            LOG.info(
                "PB {} has no deployment (workflow type = {}, ID = {}, version = {})"
                "".format(pb_id, wf_type, wf_id, wf_version))
            if wf_type == "realtime":
                if (wf_id, wf_version) in workflows_realtime:
                    LOG.info(
                        "Deploying realtime workflow ID = {}, version = {}"
                        "".format(wf_id, wf_version))
                    wf_image = workflows_realtime[(wf_id, wf_version)]
                    deploy_id = "{}-workflow".format(pb_id)
                    # Values to pass to workflow Helm chart.
                    # Copy environment variable values and add argument values.
                    values = dict(values_env)
                    values['wf_image'] = wf_image
                    values['pb_id'] = pb_id
                    deploy = ska_sdp_config.Deployment(deploy_id, 'helm', {
                        'chart': 'workflow',
                        'values': values
                    })
                    LOG.info("Creating deployment {}".format(deploy_id))
                    txn.create_deployment(deploy)
                else:
                    # Unknown realtime workflow ID and version.
                    LOG.error("Workflow ID = {} version = {} is not supported".
                              format(wf_id, wf_version))
            elif wf_type == "batch":
                LOG.warning("Batch workflows are not supported at present")
            else:
                LOG.error("Unknown workflow type: {}".format(wf_type))

        LOG.debug("Waiting...")
        txn.loop(wait=True, timeout=next_workflows_refresh - time.time())
Esempio n. 14
0
"""
This module contains the "model" layer.

This includes configuration database interactions and building data structures
for display.
"""
import json
import sys
from typing import Dict, List

import ska_sdp_config

DELIMITER = ":"
cfg = ska_sdp_config.Config(
    backend="memory" if "pytest" in sys.modules else "etcd3"
)


def _combine_key(parent: str, key: str) -> str:
    return DELIMITER.join((parent, key))


def _clean_key(key: str) -> str:
    k = key[1:] if key.startswith("/") else key
    return k.replace("/", DELIMITER)


def _to_node(key: str, parent: str, text: str) -> Dict:
    return {"id": _clean_key(key), "parent": _clean_key(parent), "text": text}

Esempio n. 15
0
def cfg():
    host = os.getenv("SDP_TEST_HOST", "127.0.0.1")
    with ska_sdp_config.Config(global_prefix=PREFIX, host=host) as cfg:
        cfg._backend.delete(PREFIX, must_exist=False, recursive=True)
        yield cfg
        cfg._backend.delete(PREFIX, must_exist=False, recursive=True)
Esempio n. 16
0
def main():
    """Main loop of Helm controller."""

    # Instantiate configuration
    client = ska_sdp_config.Config()

    # TODO: Service lease + leader election

    # Obtain charts
    update_chart_repos()
    next_chart_refresh = time.time() + CHART_REPO_REFRESH

    # Load Helm repository
    helm_invoke("init", "--client-only")
    helm_invoke("repo", "update")

    # Show
    log.info("Loading helm deployments...")

    # Query helm for active deployments. Filter for active ones.
    deploys = helm_invoke('list', '-q', '--namespace', NAMESPACE).split('\n')
    deploys = set(deploys).difference(set(['']))
    log.info("Found {} existing deployments.".format(len(deploys)))

    # Wait for something to happen
    for txn in client.txn():

        # Refresh charts?
        if time.time() > next_chart_refresh:
            next_chart_refresh = time.time() + CHART_REPO_REFRESH

            try:
                helm_invoke("repo", "update")
            except subprocess.CalledProcessError as e:
                log.error("Could not refresh global chart repository!")

            try:
                update_chart_repos()
            except subprocess.CalledProcessError as e:
                log.error("Could not refresh chart repository!")

        # List deployments
        target_deploys = txn.list_deployments()

        # Check for deployments that we should delete
        for dpl_id in list(deploys):
            if dpl_id not in target_deploys:
                if delete_helm(txn, dpl_id):
                    deploys.remove(dpl_id)

        # Check for deployments we should add
        for dpl_id in target_deploys:
            if dpl_id not in deploys:

                # Get details
                try:
                    deploy = txn.get_deployment(dpl_id)
                except ValueError as e:
                    log.warning("Deployment {} failed validation: {}!".format(
                        dpl_id, str(e)))
                    continue

                # Right type?
                if deploy.type != 'helm':
                    continue

                # Create it
                if create_helm(txn, dpl_id, deploy):
                    deploys.add(dpl_id)

        # Loop around, wait if we made no change
        timeout = next_chart_refresh
        txn.loop(wait=True, timeout=next_chart_refresh - time.time())
Esempio n. 17
0
def new_config_db():
    """Return an SDP configuration client (factory function)."""
    backend = "etcd3" if FEATURE_CONFIG_DB.is_active() else "memory"
    LOG.info("Using config DB %s backend", backend)
    config_db = ska_sdp_config.Config(backend=backend)
    return config_db
Esempio n. 18
0
def main(argv):
    """Main function."""
    pb_id = argv[0]

    config = ska_sdp_config.Config()

    for txn in config.txn():
        txn.take_processing_block(pb_id, config.client_lease)
        pb = txn.get_processing_block(pb_id)
    LOG.info("Claimed processing block %s", pb_id)

    # Parse parameters: these ones are needed for deploying the Dask EE
    n_workers = pb.parameters.get("n_workers", 2)
    buffer_vis = pb.parameters.get("buffer_vis")
    if buffer_vis is None:
        buffer_vis = "buff-{}-vis".format(pb.id)
        pb.parameters["buffer_vis"] = buffer_vis
    buffer_img = pb.parameters.get("buffer_img")
    if buffer_img is None:
        buffer_img = "buff-{}-img".format(pb.id)
        pb.parameters["buffer_img"] = buffer_img

    # Set state to indicate workflow is waiting for resources
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        state["status"] = "WAITING"
        txn.update_processing_block_state(pb_id, state)

    # Wait for resources_available to be true
    LOG.info("Waiting for resources to be available")
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        ra = state.get("resources_available")
        if ra is not None and ra:
            LOG.info("Resources are available")
            break
        txn.loop(wait=True)

    # Set state to indicate workflow is running
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        state["status"] = "RUNNING"
        txn.update_processing_block_state(pb_id, state)

    # Create buffer reservations for visibilities and images.
    LOG.info("Creating buffer reservations")
    buffer_create(config, buffer_vis)
    buffer_create(config, buffer_img)

    # Deploy Dask EE
    LOG.info("Deploying Dask EE")
    image = "artefact.skao.int/ska-sdp-wflow-batch-imaging:{}".format(
        pb.workflow["version"])
    buffers = [buffer_vis, buffer_img]
    deploy_id, client = ee_dask_deploy(config,
                                       pb.id,
                                       image,
                                       n_workers=n_workers,
                                       buffers=buffers)

    # Run simulation and ICAL pipelines
    rascil_workflows.set_client(client)
    LOG.info("Running simulation pipeline")
    rascil_workflows.simulate(pb.parameters)
    LOG.info("Running ICAL pipeline")
    rascil_workflows.ical(pb.parameters)
    rascil_workflows.close_client()
    LOG.info("Finished processing")

    # Remove Dask EE deployment
    LOG.info("Removing Dask EE deployment")
    ee_dask_remove(config, deploy_id)

    # Set state to indicate processing is finished
    for txn in config.txn():
        state = txn.get_processing_block_state(pb_id)
        state["status"] = "FINISHED"
        txn.update_processing_block_state(pb_id, state)

    config.close()