예제 #1
0
def main():
    logger.info("Running abaco health checks. Now: {}".format(time.time()))
    try:
        ttl = Config.get('workers', 'worker_ttl')
    except Exception as e:
        logger.error(
            "Could not get worker_ttl config. Exception: {}".format(e))
    if not container_running(name='spawner*'):
        logger.critical("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        # check logging strategy to determine log file name:
        if get_log_file_strategy() == 'split':
            log_file = 'spawner.log'
        else:
            log_file = 'service.log'
        try:
            run_container_with_docker(AE_IMAGE,
                                      command,
                                      name='abaco_spawner_0',
                                      environment={'AE_IMAGE': AE_IMAGE},
                                      log_file=log_file)
        except Exception as e:
            logger.critical(
                "Could not restart spanwer. Exception: {}".format(e))
    try:
        ttl = int(ttl)
    except Exception as e:
        logger.error("Invalid ttl config: {}. Setting to -1.".format(e))
        ttl = -1
    ids = get_actor_ids()
    logger.info("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
예제 #2
0
def run_worker(image, ch_name, worker_id):
    """
    Run an actor executor worker with a given channel and image.
    :return:
    """
    logger.debug("top of run_worker()")
    command = 'python3 -u /actors/worker.py'
    logger.debug(
        "docker_utils running worker. image:{}, command:{}, chan:{}".format(
            image, command, ch_name))

    # determine what log file to use
    if get_log_file_strategy() == 'split':
        log_file = 'worker.log'
    else:
        log_file = 'abaco.log'
    container = run_container_with_docker(image=AE_IMAGE,
                                          command=command,
                                          environment={
                                              'ch_name':
                                              ch_name,
                                              'image':
                                              image,
                                              'worker_id':
                                              worker_id,
                                              '_abaco_secret':
                                              os.environ.get('_abaco_secret')
                                          },
                                          log_file=log_file)
    # don't catch errors -- if we get an error trying to run a worker, let it bubble up.
    # TODO - determines worker structure; should be placed in a proper DAO class.
    logger.info(
        "worker container running. worker_id: {}. container: {}".format(
            worker_id, container))
    return {
        'image': image,
        # @todo - location will need to change to support swarm or cluster
        'location': dd,
        'id': worker_id,
        'cid': container.get('Id'),
        'ch_name': ch_name,
        'status': BUSY,
        'host_id': host_id,
        'host_ip': host_ip,
        'last_execution': 0
    }
예제 #3
0
파일: health.py 프로젝트: shresnis000/abaco
def start_spawner(queue, idx='0'):
    """
    Start a spawner on this host listening to a queue, `queue`.
    :param queue: (str) - the queue the spawner should listen to.
    :param idx: (str) - the index to use as a suffix to the spawner container name.
    :return:
    """
    command = 'python3 -u /actors/spawner.py'
    name = 'healthg_{}_spawner_{}'.format(queue, idx)

    try:
        environment = dict(os.environ)
    except Exception as e:
        environment = {}
        logger.error(
            "Unable to convert environment to dict; exception: {}".format(e))

    environment.update({
        'AE_IMAGE': AE_IMAGE.split(':')[0],
        'queue': queue,
    })
    if not '_abaco_secret' in environment:
        msg = 'Error in health process trying to start spawner. Did not find an _abaco_secret. Aborting'
        logger.critical(msg)
        raise

    # check logging strategy to determine log file name:
    log_file = 'abaco.log'
    if get_log_file_strategy() == 'split':
        log_file = 'spawner.log'
    try:
        run_container_with_docker(AE_IMAGE,
                                  command,
                                  name=name,
                                  environment=environment,
                                  mounts=[],
                                  log_file=log_file)
    except Exception as e:
        logger.critical(
            "Could not restart spawner for queue {}. Exception: {}".format(
                queue, e))
예제 #4
0
def run_container_with_docker(image,
                              command,
                              name=None,
                              environment={},
                              mounts=[],
                              log_file=None,
                              auto_remove=False,
                              client_id=None,
                              client_access_token=None,
                              client_refresh_token=None,
                              actor_id=None,
                              tenant=None,
                              api_server=None,
                              client_secret=None):
    """
    Run a container with docker mounted in it.
    Note: this function always mounts the abaco conf file so it should not be used by execute_actor().
    """
    logger.debug("top of run_container_with_docker().")
    cli = docker.APIClient(base_url=dd, version="auto")

    # bind the docker socket as r/w since this container gets docker.
    volumes = ['/var/run/docker.sock']
    binds = {
        '/var/run/docker.sock': {
            'bind': '/var/run/docker.sock',
            'ro': False
        }
    }
    # add a bind key and dictionary as well as a volume for each mount
    for m in mounts:
        binds[m.get('host_path')] = {
            'bind': m.get('container_path'),
            'ro': m.get('format') == 'ro'
        }
        volumes.append(m.get('host_path'))

    # mount the abaco conf file. first we look for the environment variable, falling back to the value in Config.
    try:
        abaco_conf_host_path = os.environ.get('abaco_conf_host_path')
        if not abaco_conf_host_path:
            abaco_conf_host_path = Config.get('spawner',
                                              'abaco_conf_host_path')
        logger.debug("docker_utils using abaco_conf_host_path={}".format(
            abaco_conf_host_path))
        # mount config file at the root of the container as r/o
        volumes.append('/service.conf')
        binds[abaco_conf_host_path] = {'bind': '/service.conf', 'ro': True}
    except configparser.NoOptionError as e:
        # if we're here, it's bad. we don't have a config file. better to cut and run,
        msg = "Did not find the abaco_conf_host_path in Config. Exception: {}".format(
            e)
        logger.error(msg)
        raise DockerError(msg)
    # also add it to the environment if not already there
    if 'abaco_conf_host_path' not in environment:
        environment['abaco_conf_host_path'] = abaco_conf_host_path

    if 'client_id' not in environment:
        environment['client_id'] = client_id

    if 'client_access_token' not in environment:
        environment['client_access_token'] = client_access_token

    if 'actor_id' not in environment:
        environment['actor_id'] = actor_id

    if 'tenant' not in environment:
        environment['tenant'] = tenant

    if 'api_server' not in environment:
        environment['api_server'] = api_server

    if 'client_secret' not in environment:
        environment['client_secret'] = client_secret

    if 'client_refresh_token' not in environment:
        environment['client_refresh_token'] = client_refresh_token

    # if not passed, determine what log file to use
    if not log_file:
        if get_log_file_strategy() == 'split':
            log_file = 'worker.log'
        else:
            log_file = 'abaco.log'

    # mount the logs file.
    volumes.append('/var/log/service.log')
    # first check to see if the logs directory config was set:
    try:
        logs_host_dir = Config.get('logs', 'host_dir')
    except (configparser.NoSectionError, configparser.NoOptionError):
        # if the directory is not configured, default it to abaco_conf_host_path
        logs_host_dir = os.path.dirname(abaco_conf_host_path)
    binds['{}/{}'.format(logs_host_dir, log_file)] = {
        'bind': '/var/log/service.log',
        'rw': True
    }

    host_config = cli.create_host_config(binds=binds, auto_remove=auto_remove)
    logger.debug("binds: {}".format(binds))

    # add the container to a specific docker network, if configured
    netconf = None
    try:
        docker_network = Config.get('spawner', 'docker_network')
    except Exception:
        docker_network = None
    if docker_network:
        netconf = cli.create_networking_config(
            {docker_network: cli.create_endpoint_config()})

    # create and start the container
    try:
        container = cli.create_container(image=image,
                                         environment=environment,
                                         volumes=volumes,
                                         host_config=host_config,
                                         command=command,
                                         name=name,
                                         networking_config=netconf)
        cli.start(container=container.get('Id'))
        logger.debug('container successfully started')
    except Exception as e:
        msg = "Got exception trying to run container from image: {}. Exception: {}".format(
            image, e)
        logger.info(msg)
        raise DockerError(msg)
    logger.info("container started successfully: {}".format(container))
    return container
예제 #5
0
def run_worker(image, worker_id):
    """
    Run an actor executor worker with a given channel and image.
    :return:
    """
    logger.debug("top of run_worker()")
    command = 'python3 -u /actors/worker.py'
    logger.debug("docker_utils running worker. image:{}, command:{}".format(
        image, command))

    # determine what log file to use
    if get_log_file_strategy() == 'split':
        log_file = 'worker.log'
    else:
        log_file = 'abaco.log'

    # mount the directory on the host for creating fifos
    try:
        fifo_host_path_dir = Config.get('workers', 'fifo_host_path_dir')
        logger.info("Using fifo_host_path_dir: {}".format(fifo_host_path_dir))
    except (configparser.NoSectionError, configparser.NoOptionError) as e:
        logger.error("Got exception trying to look up fifo_host_path_dir. Setting to None. Exception: {}".format(e))
        fifo_host_path_dir = None
    if fifo_host_path_dir:
        mounts = [{'host_path': os.path.join(fifo_host_path_dir, worker_id),
                   'container_path': os.path.join(fifo_host_path_dir, worker_id),
                   'format': 'rw'}]
    else:
        mounts = []

    # mount the directory on the host for creating result sockets
    try:
        socket_host_path_dir = Config.get('workers', 'socket_host_path_dir')
        logger.info("Using socket_host_path_dir: {}".format(socket_host_path_dir))
    except (configparser.NoSectionError, configparser.NoOptionError) as e:
        logger.error("Got exception trying to look up fifo_host_path_dir. Setting to None. Exception: {}".format(e))
        socket_host_path_dir = None
    if socket_host_path_dir:
        mounts.append({'host_path': os.path.join(socket_host_path_dir, worker_id),
                       'container_path': os.path.join(socket_host_path_dir, worker_id),
                       'format': 'rw'})

    logger.info("Final fifo_host_path_dir: {}; socket_host_path_dir: {}".format(fifo_host_path_dir,
                                                                                socket_host_path_dir))
    try:
        auto_remove = Config.get('workers', 'auto_remove')
    except (configparser.NoSectionError, configparser.NoOptionError) as e:
        logger.debug("no auto_remove in the workers stanza.")
        auto_remove = True
    if hasattr(auto_remove, 'lower'):
        if auto_remove.lower() == 'false':
            auto_remove = False
    elif not auto_remove == True:
        auto_remove = False
    container = run_container_with_docker(image=AE_IMAGE,
                                          command=command,
                                          environment={'image': image,
                                                       'worker_id': worker_id,
                                                       '_abaco_secret': os.environ.get('_abaco_secret')},
                                          mounts=mounts,
                                          log_file=log_file,
                                          auto_remove=auto_remove)
    # don't catch errors -- if we get an error trying to run a worker, let it bubble up.
    # TODO - determines worker structure; should be placed in a proper DAO class.
    logger.info("worker container running. worker_id: {}. container: {}".format(worker_id, container))
    return { 'image': image,
             # @todo - location will need to change to support swarm or cluster
             'location': dd,
             'id': worker_id,
             'cid': container.get('Id'),
             'status': BUSY,
             'host_id': host_id,
             'host_ip': host_ip,
             'last_execution_time': 0,
             'last_health_check_time': get_current_utc_time() }