Ejemplo n.º 1
0
    def __init__(self):
        self.async_client = motor.motor_asyncio.AsyncIOMotorClient(
            username=config.DB_USERNAME,
            password=config.get_from_file(config.MONGO_KEY_PATH),
            host=config.MONGO_HOST,
            port=config.get_int("MONGO_PORT", 27016))

        self.reg_client = pymongo.MongoClient(
            username=config.DB_USERNAME,
            password=config.get_from_file(config.MONGO_KEY_PATH),
            host=config.MONGO_HOST,
            port=config.get_int("MONGO_PORT", 27016))

        self.init_db()
Ejemplo n.º 2
0
    async def init(cls, docker_client, redis, session, autoscale_worker,
                   autoscale_app, autoheal_worker, autoheal_apps):
        self = cls(docker_client, redis, session, autoscale_worker,
                   autoscale_app, autoheal_worker, autoheal_apps)
        # await redis.flushall()  # TODO: do a more targeted cleanup of redis
        self.app_repo = await AppRepo.create(config.APPS_PATH, session)
        self.running_apps = await self.get_running_apps()
        self.worker = await get_service(self.docker_client,
                                        static.WORKER_SERVICE)
        services = await self.docker_client.services.list()
        self.service_replicas = {
            s["Spec"]["Name"]: (await get_replicas(self.docker_client,
                                                   s["ID"]))
            for s in services
        }
        self.max_workers = config.get_int("MAX_WORKER_REPLICAS", 10)

        try:
            await self.redis.xgroup_create(static.REDIS_WORKFLOW_QUEUE,
                                           static.REDIS_WORKFLOW_GROUP,
                                           mkstream=True)
            logger.info(
                f"Created {static.REDIS_WORKFLOW_QUEUE} stream and {static.REDIS_WORKFLOW_GROUP} group."
            )

        except aioredis.errors.BusyGroupError:
            logger.info(
                f"{static.REDIS_WORKFLOW_QUEUE} stream already exists.")

        if len(self.app_repo.apps) < 1:
            logger.error(
                "Walkoff must be loaded with at least one app. Please check that applications dir exists."
            )
            exit(1)
        return self
Ejemplo n.º 3
0
    async def monitor_queues(self):
        # count = 0
        while True:
            services = await self.docker_client.services.list()
            self.service_replicas = {
                s["Spec"]["Name"]: (await get_replicas(self.docker_client,
                                                       s["ID"]))
                for s in services
            }

            if self.autoscale_worker:
                await self.scale_worker()
            if self.autoscale_app:
                await self.scale_app()
            if self.autoheal_apps:
                await self.check_pending_actions()

            # Reload the app projects and apis every once in a while
            # if count * config.get_int("UMPIRE_HEARTBEAT", 1) >= config.get_int("APP_REFRESH", 60):
            #     count = 0
            #     logger.info("Refreshing apps.")
            #     # TODO: maybe do this a bit more intelligently? Presently it throws uniqueness errors for db
            #     await self.app_repo.load_apps_and_apis()
            #     await self.app_repo.delete_unused_apps_and_apis()
            #
            await asyncio.sleep(config.get_int("UMPIRE_HEARTBEAT", 1))
Ejemplo n.º 4
0
 def __init__(self):
     self.host = config.get_str("DB", "host")
     self.user = config.get_str("DB", "user")
     self.password = config.get_str("DB", "password")
     self.db = config.get_str("DB", "db")
     self.port = config.get_int("DB", "port")
     self.charset = config.get_str("DB", "charset")
     self.conn = None
     self.cur = None
Ejemplo n.º 5
0
    async def scale_app(self):
        self.running_apps = await self.get_running_apps()
        logger.debug(
            f"Running apps: {[{s: self.service_replicas.get(s)['running']} for s in self.running_apps.keys()]}")

        streams = [key.split(':') for key in await self.redis.keys(pattern=UUID_GLOB + ":*:*", encoding="utf-8")]

        workloads = {f"{app_name}:{version}": {"total": 0, "queued": 0, "executing": 0}
                     for _, app_name, version in streams}

        if len(streams) > 0:
            for execution_id, app_name, version in streams:
                stream = f"{execution_id}:{app_name}:{version}"
                group = f"{app_name}:{version}"
                try:
                    executing_work = (await self.redis.xpending(stream=stream, group_name=group))[0]
                    total_work = await xlen(self.redis, stream)
                except aioredis.ReplyError:
                    continue  # the group or stream got closed while we were checking other streams

                queued_work = total_work - executing_work

                workloads[group]["executing"] += executing_work
                workloads[group]["queued"] += queued_work
                workloads[group]["total"] += total_work

                service_name = f"{static.APP_PREFIX}_{app_name}"
                curr_replicas = self.service_replicas.get(service_name, {"running": 0, "desired": 0})["desired"]
                max_replicas = config.get_int("MAX_APP_REPLICAS", 10)
                replicas_needed = min(total_work, max_replicas)

                logger.debug(f"Total work: {total_work}")
                logger.debug(f"queued: {total_work}")

                logger.debug(f"Needed replicas: {replicas_needed}")
                logger.debug(f"Current replicas: {curr_replicas}")

                if replicas_needed > curr_replicas:
                    logger.info(f"Launching app {':'.join([service_name, version])}")

                if replicas_needed > curr_replicas > 0:
                    await self.launch_app(service_name, version, replicas_needed)
                elif replicas_needed > curr_replicas == 0:  # scale to 0 and restart
                    await self.launch_app(service_name, version, 0)
                    await self.launch_app(service_name, version, replicas_needed)

            for service_name, workload in workloads.items():
                logger.debug(f"Queued actions for {service_name}: {workload['queued']}")
                logger.debug(f"Executing actions for {service_name}: {workload['executing']}")
Ejemplo n.º 6
0
    async def get_workflow(redis: aioredis.Redis):
        """
            Continuously monitors the workflow queue for new work
        """
        while True:
            logger.info("Waiting for workflows...")
            # if static.CONTAINER_ID is None:
            #     logger.exception("Environment variable 'HOSTNAME' does not exist in worker container.")
            #     sys.exit(-1)

            try:
                message = await redis.xread_group(
                    static.REDIS_WORKFLOW_GROUP,
                    static.CONTAINER_ID,
                    streams=[static.REDIS_WORKFLOW_QUEUE],
                    latest_ids=['>'],
                    timeout=config.get_int("WORKER_TIMEOUT", 30) * 1000,
                    count=1)
            except aioredis.ReplyError as e:
                logger.error(f"Error reading from workflow queue: {e}.")
                sys.exit(-1)

            if len(
                    message
            ) < 1:  # We've timed out with no work. Guess we'll die now...
                sys.exit(1)

            execution_id_workflow, stream, id_ = deref_stream_message(message)
            execution_id, workflow = execution_id_workflow
            try:
                if not (await redis.sismember(static.REDIS_ABORTING_WORKFLOWS,
                                              execution_id)):
                    await redis.sadd(static.REDIS_EXECUTING_WORKFLOWS,
                                     execution_id)
                    yield workflow_loads(workflow)

            except Exception as e:
                logger.exception(e)
            finally:  # Clean up workflow-queue
                await redis.xack(stream=stream,
                                 group_name=static.REDIS_WORKFLOW_GROUP,
                                 id=id_)
                await xdel(redis, stream=stream, id_=id_)