Example #1
0
def main():
    logger.info("Running abaco health checks. Now: {}".format(time.time()))
    try:
        clean_up_ipc_dirs()
    except Exception as e:
        logger.error("Got exception from clean_up_ipc_dirs: {}".format(e))
    try:
        ttl = Config.get('workers', 'worker_ttl')
    except Exception as e:
        logger.error(
            "Could not get worker_ttl config. Exception: {}".format(e))
    if not container_running(name='spawner*'):
        logger.critical("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        # check logging strategy to determine log file name:
        try:
            run_container_with_docker(
                AE_IMAGE,
                command,
                name='abaco_spawner_0',
                environment={'AE_IMAGE': AE_IMAGE.split(':')[0]},
                mounts=[],
                log_file=None)
        except Exception as e:
            logger.critical(
                "Could not restart spawner. Exception: {}".format(e))
    try:
        ttl = int(ttl)
    except Exception as e:
        logger.error("Invalid ttl config: {}. Setting to -1.".format(e))
        ttl = -1
    ids = get_actor_ids()
    logger.info("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
Example #2
0
File: health.py Project: TACC/abaco
def main():
    print("Running abaco health checks. Now: {}".format(time.time()))
    ttl = Config.get('workers', 'worker_ttl')
    if not container_running(name='spawner*'):
        print("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE})
    try:
        ttl = int(ttl)
    except Exception:
        ttl = -1
    ids = get_actor_ids()
    print("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
Example #3
0
def main():
    print("Running abaco health checks. Now: {}".format(time.time()))
    ttl = Config.get('workers', 'worker_ttl')
    if not container_running(name='spawner*'):
        print("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE})
    try:
        ttl = int(ttl)
    except Exception:
        ttl = -1
    ids = get_actor_ids()
    print("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
Example #4
0
def start_spawner(queue, idx='0'):
    """
    Start a spawner on this host listening to a queue, `queue`.
    :param queue: (str) - the queue the spawner should listen to.
    :param idx: (str) - the index to use as a suffix to the spawner container name.
    :return:
    """
    command = 'python3 -u /actors/spawner.py'
    name = 'healthg_{}_spawner_{}'.format(queue, idx)

    try:
        environment = dict(os.environ)
    except Exception as e:
        environment = {}
        logger.error(
            "Unable to convert environment to dict; exception: {}".format(e))

    environment.update({
        'AE_IMAGE': AE_IMAGE.split(':')[0],
        'queue': queue,
    })
    if not '_abaco_secret' in environment:
        msg = 'Error in health process trying to start spawner. Did not find an _abaco_secret. Aborting'
        logger.critical(msg)
        raise

    # check logging strategy to determine log file name:
    log_file = 'abaco.log'
    if get_log_file_strategy() == 'split':
        log_file = 'spawner.log'
    try:
        run_container_with_docker(AE_IMAGE,
                                  command,
                                  name=name,
                                  environment=environment,
                                  mounts=[],
                                  log_file=log_file)
    except Exception as e:
        logger.critical(
            "Could not restart spawner for queue {}. Exception: {}".format(
                queue, e))