Beispiel #1
0
def main():
    is_debug = asyncio.run(_is_debug_mode())
    LOGGER_CONFIGURER.configure_logger(is_debug)
    if not EPSAGON_TOKEN:
        logging.error(
            "Missing Epsagon token. "
            "Make sure to configure EPSAGON_TOKEN in cluster_agent_deployment.yaml"
        )
        return

    if not CLUSTER_NAME:
        logging.error(
            "Missing cluster name. "
            "Make sure to configure EPSAGON_CLUSTER_NAME in cluster_agent_deployment.yaml"
        )
        return

    config.load_incluster_config()
    logging.info("Loaded cluster config")
    if is_debug:
        loaded_conf = client.configuration.Configuration.get_default_copy()
        logging.debug(
            "Loaded cluster configuration:\nHost: %s\n"
            "Using SSL Cert? %s\nUsing API token? %s",
            loaded_conf.host,
            bool(loaded_conf.ssl_ca_cert),
            bool(loaded_conf.api_key)
        )
    loop = asyncio.new_event_loop()
    loop.add_signal_handler(signal.SIGHUP, _reload_handler)
    loop.run_until_complete(run(is_debug))
    loop.close()
Beispiel #2
0
async def _get_deployment_logs(namespace, name, tail_lines=TAIL_LINES_DEFAULT):
    """Gather pod names via K8s label selector"""
    pods = []
    config.load_incluster_config()
    k8s_client = client.CoreV1Api()

    try:
        api_response = await k8s_client.list_namespaced_pod(
            namespace, label_selector='release={}'.format(name))
        for api_items in api_response.items:
            pods.append(api_items.metadata.name)
    except ApiException as e:
        logging.error(
            f"Exception when calling CoreV1Api->list_namespaced_pod: {e}")

    # Iterate over list of pods and concatenate logs
    logs = ""
    try:
        for pod in pods:
            logs += pod + "\n"
            logs += await k8s_client.read_namespaced_pod_log(
                pod, namespace, tail_lines=tail_lines)
    except ApiException as e:
        logging.error(
            f"Exception when calling CoreV1Api->read_namespaced_pod_log: {e}")

    return logs
Beispiel #3
0
async def log_stream_websocket(ws):
    config.load_incluster_config()
    k8s_client = client.CoreV1Api()

    name = ws.query_params['name']
    namespace = ws.query_params['namespace']

    await ws.accept()
    resp = await k8s_client.read_namespaced_pod_log(
        name,
        namespace,
        tail_lines=TAIL_LINES_DEFAULT,
        follow=True, _preload_content=False
        )
    while True:
        try:
            line = await resp.content.readline()
        except asyncio.TimeoutError as e:
            logging.error(
                f"""
            Async timeout server side, will recover from client side {e}
            """)
            break
        if not line:
            break
        await ws.send_text(line.decode('utf-8'))

    await ws.close()
Beispiel #4
0
async def main():
    # Setup Sentry if configured
    sentry_dsn = os.getenv("SENTRY_DSN")
    if sentry_dsn:
        with open(".version") as f:
            release = f.readline().strip()
        environment = os.getenv("HOSTNAME", "dev").split("-")[0]

        sentry_sdk.init(
            sentry_dsn,
            release=release,
            environment=environment)
        log.info("Sentry initialized with release='%s' and environment='%s'", release, environment)

    try:
        config.load_incluster_config()
    except Exception:
        await config.load_kube_config()
    crds = client.CustomObjectsApi()

    # Give tiller time to start up, if it isn't already
    log.info("Waiting for tiller to be available ..")
    await run_command(f"helm version", timeout=30)

    tasks = [
        asyncio.ensure_future(monitor_forever(crds, "global")),
        asyncio.ensure_future(monitor_forever(crds, "production")),
        asyncio.ensure_future(monitor_forever(crds, "staging")),
    ]

    signal.signal(signal.SIGTERM, functools.partial(signal_handler, tasks))

    await asyncio.wait(tasks)
Beispiel #5
0
    async def setup(self, app):
        await super().setup(app)
        try:
            # Not a coroutine for some reason
            config.load_incluster_config()
        except config.ConfigException:
            await config.load_kube_config()

        self.api_client = client.ApiClient()
        self.core_client = client.CoreV1Api(api_client=self.api_client)
        self.custom_client = client.CustomObjectsApi(
            api_client=self.api_client)

        self.cluster_waiters = defaultdict(Flag)
        self.clusters = {}
        self.username_to_clusters = defaultdict(dict)
        self.queue = WorkQueue()
        self.informer = Informer(
            parent=self,
            name="cluster",
            client=self.custom_client,
            method="list_cluster_custom_object",
            method_kwargs=dict(
                group="gateway.dask.org",
                version=self.crd_version,
                plural="daskclusters",
                label_selector=self.label_selector,
            ),
            on_update=self.on_cluster_event,
            on_delete=self.on_cluster_event,
        )
        await self.informer.start()
        self.sync_task = asyncio.ensure_future(self.sync_clusters_loop())
Beispiel #6
0
async def log_job(request, ws, job, pod_id, namespace, container):
    job_uuid = job.uuid.hex
    if job_uuid in request.app.job_logs_ws_managers:
        ws_manager = request.app.job_logs_ws_managers[job_uuid]
    else:
        ws_manager = SocketManager()
        request.app.job_logs_ws_managers[job_uuid] = ws_manager

    ws_manager.add_socket(ws)

    # Stream phase changes
    status = None
    while status != JobLifeCycle.RUNNING and not JobLifeCycle.is_done(status):
        job.refresh_from_db()
        if status != job.last_status:
            status = job.last_status
            await notify_ws(ws=ws, message=get_status_message(status))
            if should_disconnect(ws=ws, ws_manager=ws_manager):
                return
        await asyncio.sleep(SOCKET_SLEEP)

    if JobLifeCycle.is_done(status):
        await notify_ws(ws=ws, message=get_status_message(status))
        return

    config.load_incluster_config()
    k8s_api = client.CoreV1Api()
    await log_job_pod(k8s_api=k8s_api,
                      ws=ws,
                      ws_manager=ws_manager,
                      pod_id=pod_id,
                      container=container,
                      namespace=namespace)
Beispiel #7
0
async def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('timeout_seconds', type=int)
    parser.add_argument('namespace', type=str)

    subparsers = parser.add_subparsers(dest='kind')

    pod_parser = subparsers.add_parser('Pod')
    pod_parser.add_argument('name', type=str)

    service_parser = subparsers.add_parser('Service')
    service_parser.add_argument('name', type=str)
    service_parser.add_argument('--port', '-p', type=int, default=80)

    args = parser.parse_args()

    if args.kind == 'Pod':
        if 'USE_KUBE_CONFIG' in os.environ:
            await config.load_kube_config()
        else:
            config.load_incluster_config()
        v1 = client.CoreV1Api()

        t = wait_for_pod_complete(v1, args.namespace, args.name)
    else:
        assert args.kind == 'Service'
        t = wait_for_service_alive(args.namespace, args.name, args.port)

    await asyncio.gather(timeout(args.timeout_seconds), t)
Beispiel #8
0
    async def _watch(self):
        DBSession = self.connector.DBSession

        k8s_config.load_incluster_config()

        async with k8s_client.ApiClient() as api:
            v1 = k8s_client.CoreV1Api(api)
            with open(os.path.join(self.config_dir, 'runner.namespace')) as fp:
                namespace = fp.read().strip()

            # Find existing run pods
            pods = await v1.list_namespaced_pod(
                namespace=namespace,
                label_selector='app=run',
            )
            PROM_RUNS.set(0)
            for pod in pods.items:
                run_id = int(pod.metadata.labels['run'], 10)
                logger.info("Found run pod for %d", run_id)
                PROM_RUNS.inc()
                await self._check_pod(api, run_id, pod)

            # Watch changes
            watch = k8s_watch.Watch()
            f, kwargs = v1.list_namespaced_pod, dict(
                namespace=namespace,
                label_selector='app=run',
            )
            while True:
                try:
                    async for event in watch.stream(f, **kwargs):
                        await self._handle_watch_event(api, DBSession, event)
                except k8s_client.ApiException as e:
                    if e.status != 410:
                        raise
Beispiel #9
0
async def main():
    try:
        config.load_incluster_config()
    except Exception:
        await config.load_kube_config()
    crds = client.CustomObjectsApi()

    await monitor(crds)
Beispiel #10
0
async def on_startup(app):
    if 'BATCH_USE_KUBE_CONFIG' in os.environ:
        await config.load_kube_config()
    else:
        config.load_incluster_config()
    app['k8s_client'] = client.CoreV1Api()

    app['dbpool'] = await create_database_pool()
Beispiel #11
0
 async def init(cls):
     logger.info('Init!')
     if Config.IN_CLUSTER:
         config.load_incluster_config()
     else:
         await config.load_kube_config()
     cls.v1 = client.CoreV1Api()
     t = threading.Thread(target=cls.init_watch_all_pods)
     t.setDaemon(True)
     t.start()
async def login_via_kubernetes_asyncio(
    logger: Union[logging.Logger, logging.LoggerAdapter],
    **kwargs: Any,
) -> ConnectionInfo:
    """
    Authenticate with the Kubernetes cluster.

    Upon startup of the Kopf operator, this function attempts to authenticate
    with a Kubernetes cluster. If the
    :attr:`~crate.operator.config.Config.KUBECONFIG` is defined, an attempt
    will be made to use that config file. In other cases, an in-cluster
    authentication will be tried.
    """
    if config.KUBECONFIG:
        logger.info("Authenticating with KUBECONFIG='%s'", config.KUBECONFIG)
        await load_kube_config(config_file=config.KUBECONFIG)
    else:
        logger.info("Authenticating with in-cluster config")
        load_incluster_config()

    # Below follows a copy of Kopf's `kopf.utilities.piggybacking.login_via_client`

    # We do not even try to understand how it works and why. Just load it, and
    # extract the results.
    k8s_config = Configuration.get_default_copy()

    # For auth-providers, this method is monkey-patched with the
    # auth-provider's one.
    # We need the actual auth-provider's token, so we call it instead of
    # accessing api_key.
    # Other keys (token, tokenFile) also end up being retrieved via this method.
    header: Optional[str] = k8s_config.get_api_key_with_prefix("authorization")
    parts: Sequence[str] = header.split(" ", 1) if header else []
    scheme, token = ((None, None) if len(parts) == 0 else
                     (None, parts[0]) if len(parts) == 1 else
                     (parts[0], parts[1]))  # RFC-7235, Appendix C.

    # Interpret the k8s_config object for our own minimalistic credentials.
    # Note: kubernetes client has no concept of a "current" context's namespace.
    c = ConnectionInfo(
        server=k8s_config.host,
        ca_path=k8s_config.ssl_ca_cert,  # can be a temporary file
        insecure=not k8s_config.verify_ssl,
        username=k8s_config.username
        or None,  # an empty string when not defined
        password=k8s_config.password
        or None,  # an empty string when not defined
        scheme=scheme,
        token=token,
        certificate_path=k8s_config.cert_file,  # can be a temporary file
        private_key_path=k8s_config.key_file,  # can be a temporary file
        priority=
        30,  # The priorities for `client` and `pykube-ng` are 10 and 20.
    )
    return c
Beispiel #13
0
async def _get_all_namespaces():
    """Get all namespaces"""
    config.load_incluster_config()
    k8s_client = client.CoreV1Api()
    namespaces = []

    ret = await k8s_client.list_namespace(watch=False)
    for i in ret.items:
        namespaces.append(i.metadata.name)

    return namespaces
Beispiel #14
0
async def _describe_pod(pod, namespace):
    """Describes pod"""
    config.load_incluster_config()
    k8s_client = client.CoreV1Api()

    try:
        ret = await k8s_client.read_namespaced_pod(
            pod, namespace, pretty='true')
    except ApiException as e:
        logging.error(
            f"Exception when calling CoreV1Api->read_namespaced_pod: {e}")

    return ret
Beispiel #15
0
def main():
    args = parser.parse_args()
    loop = asyncio.get_event_loop()

    # Load the kubeconfig file specified in the KUBECONFIG environment
    # variable, or fall back to `~/.kube/config`.
    config.load_incluster_config()
    # loop.run_until_complete(config.load_kube_config())
    loop.run_until_complete(simple_watch_clusters())
    loop.run_until_complete(simple_watch_nodepools())
    loop.run_until_complete(simple_watch_nodes(args.preemptible))

    loop.close()
Beispiel #16
0
async def _get_pod_logs(pod, namespace, tail_lines=TAIL_LINES_DEFAULT):
    """Read pod logs"""
    config.load_incluster_config()
    k8s_client = client.CoreV1Api()

    try:
        ret = await k8s_client.read_namespaced_pod_log(
            pod, namespace, tail_lines=tail_lines)
    except ApiException as e:
        logging.error(
            f"Exception when calling CoreV1Api->read_namespaced_pod: {e}")

    return ret
Beispiel #17
0
async def main():
    logging.basicConfig(format="%(asctime)s %(message)s", level=logging.INFO)
    try:
        config.load_incluster_config()
        logging.debug('Acquired credentials from service account')
    except:
        await config.load_kube_config()
        logging.debug('Acquired credentials from kubeconfig')

    v1 = client.CoreV1Api()
    while True:
        await label_newest_node(v1, namespace, user_node_selector,
                                attractor_label)
        await asyncio.sleep(10)
Beispiel #18
0
    async def get(cls) -> CustomObjectsApi:
        if cls.k8s_custom_object_api:
            return cls.k8s_custom_object_api
        else:
            try:
                try:
                    await config.load_kube_config()
                except FileNotFoundError:
                    config.load_incluster_config()

                cls.k8s_custom_object_api = client.CustomObjectsApi(client.ApiClient())
                return cls.k8s_custom_object_api
            except Exception:
                logger.exception(f'Failed to initialize {cls.__name__}')
                raise
Beispiel #19
0
    async def setup(self, k8s_config=None):
        if not k8s_config:
            if self.in_cluster:
                config.load_incluster_config()
            else:
                await config.load_kube_config()
            self.api_client = client.api_client.ApiClient()
        else:
            self.api_client = client.api_client.ApiClient(configuration=k8s_config)

        self.k8s_api = client.CoreV1Api(self.api_client)
        self.k8s_batch_api = client.BatchV1Api(self.api_client)
        self.k8s_beta_api = client.ExtensionsV1beta1Api(self.api_client)
        self.k8s_custom_object_api = client.CustomObjectsApi(self.api_client)
        self.k8s_version_api = client.VersionApi(self.api_client)
Beispiel #20
0
async def _get_all_pods(namespace=None):
    """Get all pods"""
    pods = {}
    config.load_incluster_config()
    k8s_client = client.CoreV1Api()

    if namespace:
        ret = await k8s_client.list_namespaced_pod(namespace, watch=False)
    else:
        ret = await k8s_client.list_pod_for_all_namespaces(watch=False)

    for i in ret.items:
        pod = i.metadata.name
        namespace = i.metadata.namespace
        pods.update({pod: i.metadata.namespace})

    return pods
Beispiel #21
0
    async def init(cls, in_cluster: bool,
                   task_runner_service: TaskRunnerService) -> K8sClient:
        if in_cluster:
            # auth inside k8s cluster
            config.load_incluster_config()
            configuration = client.Configuration()
        else:
            # local auth (from kubectl config)
            configuration = client.Configuration()
            await config.load_kube_config(client_configuration=configuration)

        api_client = client.ApiClient(configuration)
        core_client = client.CoreApi(api_client)
        v1_client = client.CoreV1Api(api_client)

        return cls(core_client=core_client,
                   v1_client=v1_client,
                   task_runner_service=task_runner_service)
Beispiel #22
0
async def initialize_kubernetes() -> None:
    """Load the Kubernetes configuration.

    This has to be run once per process and should be run during application
    startup.  This function handles Kubernetes configuration independent of
    any given Kubernetes client so that clients can be created for each
    request.

    Notes
    -----
    If ``KUBERNETES_PORT`` is set in the environment, this will use
    ``load_incluster_config`` to get configuration information from the local
    pod metadata.  Otherwise, it will use ``load_kube_config`` to read
    configuration from the user's home directory.
    """
    if "KUBERNETES_PORT" in os.environ:
        config.load_incluster_config()
    else:
        await config.load_kube_config()
Beispiel #23
0
async def log_experiment(request, ws, experiment, namespace, container):
    experiment_uuid = experiment.uuid.hex
    if experiment_uuid in request.app.experiment_logs_ws_managers:
        ws_manager = request.app.experiment_logs_ws_managers[experiment_uuid]
    else:
        ws_manager = SocketManager()
        request.app.experiment_logs_ws_managers[experiment_uuid] = ws_manager

    ws_manager.add_socket(ws)

    # Stream phase changes
    status = None
    while status != ExperimentLifeCycle.RUNNING and not ExperimentLifeCycle.is_done(
            status):
        experiment.refresh_from_db()
        if status != experiment.last_status:
            status = experiment.last_status
            await notify_ws(ws=ws, message=get_status_message(status))
            if should_disconnect(ws=ws, ws_manager=ws_manager):
                return
        await asyncio.sleep(SOCKET_SLEEP)

    if ExperimentLifeCycle.is_done(status):
        await notify_ws(ws=ws, message=get_status_message(status))
        return

    config.load_incluster_config()
    k8s_api = client.CoreV1Api()
    log_requests = []
    for job in experiment.jobs.all():
        pod_id = job.pod_id
        log_requests.append(
            log_job_pod(k8s_api=k8s_api,
                        ws=ws,
                        ws_manager=ws_manager,
                        pod_id=pod_id,
                        container=container,
                        namespace=namespace,
                        task_type=job.role,
                        task_idx=job.sequence))
    await asyncio.wait(log_requests)
def main():
    loop = asyncio.get_event_loop()
    # loop.run_until_complete(config.load_kube_config())
    config.load_incluster_config()

    # Start Prometheus HTTP Endpoint for exposing metrics
    start_http_server(8000)

    # Build tasks list of enabled resources
    tasks = []
    for resource in tasklist:
        # possibles = globals().copy()
        # possibles.update(locals())
        # method = possibles.get(resource)
        # tasks.append(asyncio.ensure_future(method()))
        task = asyncio.ensure_future(globals()[resource]())
        tasks.append(task)

    # Start the loop
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
Beispiel #25
0
async def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('timeout_seconds', type=int)
    parser.add_argument('namespace', type=str)

    subparsers = parser.add_subparsers(dest='kind')

    pod_parser = subparsers.add_parser('Pod')
    pod_parser.add_argument('name', type=str)

    service_parser = subparsers.add_parser('Service')
    service_parser.add_argument('name', type=str)
    service_parser.add_argument('--port', '-p', type=int, default=80)
    service_parser.add_argument('--endpoint',
                                '-e',
                                type=str,
                                default='/healthcheck')
    service_parser.add_argument('--header', action='append', type=str, nargs=2)

    args = parser.parse_args()

    if args.kind == 'Pod':
        if 'USE_KUBE_CONFIG' in os.environ:
            await config.load_kube_config()
        else:
            config.load_incluster_config()
        v1 = client.CoreV1Api()

        t = wait_for_pod_complete(v1, args.namespace, args.name)
    else:
        assert args.kind == 'Service'
        headers = None if args.header is None else {
            flag: val
            for flag, val in args.header
        }
        t = wait_for_service_alive(args.namespace, args.name, args.port,
                                   args.endpoint, headers)

    await asyncio.gather(timeout(args.timeout_seconds), t)
async def main():

    while True:

        try:

            # it works only if this script is run by K8s as a POD
            config.load_incluster_config()

            v1 = client.CoreV1Api()
            print("Listing pods with their IPs:")
            ret = await v1.list_pod_for_all_namespaces()

            for i in ret.items:
                print(i.status.pod_ip, i.metadata.namespace, i.metadata.name)

        except Exception:
            traceback.print_exc(file=sys.stdout)

        finally:
            print("sleep 10s")
            await asyncio.sleep(10)
Beispiel #27
0
async def poll():
    print('info: in poll', file=sys.stderr)
    if 'USE_KUBE_CONFIG' in os.environ:
        await config.load_kube_config()
    else:
        config.load_incluster_config()
    v1 = client.CoreV1Api()
    while True:
        try:
            try:
                pod = await v1.read_namespaced_pod(
                    name,
                    namespace,
                    _request_timeout=5.0)
                if pod and pod.status and pod.status.container_statuses:
                    container_statuses = pod.status.container_statuses
                    if all(cs.state and cs.state.terminated for cs in container_statuses):
                        if all(cs.state.terminated.exit_code == 0 for cs in container_statuses):
                            print('info: success')
                            sys.exit(0)
                        else:
                            print('error: a container failed')
                            sys.exit(1)
            except client.rest.ApiException as exc:
                if exc.status == 404:
                    print('info: 404', file=sys.stderr)
                    pass
                else:
                    raise
        except concurrent.futures.CancelledError:
            print('info: CancelledError', file=sys.stderr)
            raise
        except Exception as e:
            print(f'poll failed due to exception {traceback.format_exc()}{e}', file=sys.stderr)

        await asyncio.sleep(1)
Beispiel #28
0
import os
import urllib.parse

import aiobotocore
from kubernetes_asyncio import client, config

# ============================================================================
if os.environ.get("IN_CLUSTER"):
    print("Cluster Init")
    config.load_incluster_config()
else:
    # loop = asyncio.get_event_loop()
    # loop.run_until_complete(main())
    config.load_kube_config()

DEFAULT_NAMESPACE = os.environ.get("BROWSER_NAMESPACE") or "browsers"


# ============================================================================
class K8SManager:
    def __init__(self, namespace=DEFAULT_NAMESPACE):
        self.core_api = client.CoreV1Api()
        self.batch_api = client.BatchV1Api()
        self.namespace = namespace

    async def get_job(self, name):
        try:
            return await self.batch_api.read_namespaced_job(
                name=name, namespace=self.namespace)
        except Exception as exc:
            print(exc)
Beispiel #29
0
    async def setup(self):
        # Register signal handlers
        loop = asyncio.get_event_loop()
        for s in (signal.SIGTERM, signal.SIGINT):
            loop.add_signal_handler(s, self.handle_shutdown_signal, s)

        # Rate limiter for k8s api calls
        self.rate_limiter = RateLimiter(rate=self.k8s_api_rate_limit,
                                        burst=self.k8s_api_rate_limit_burst)

        # Initialize the kubernetes clients
        try:
            config.load_incluster_config()
        except config.ConfigException:
            await config.load_kube_config()
        self.api_client = client.ApiClient()
        self.core_client = RateLimitedClient(
            client.CoreV1Api(api_client=self.api_client), self.rate_limiter)
        self.custom_client = RateLimitedClient(
            client.CustomObjectsApi(api_client=self.api_client),
            self.rate_limiter)

        # Local state
        self.cluster_info = collections.defaultdict(ClusterInfo)
        self.stopped_clusters = {}

        # Initialize queue and informers
        self.queue = WorkQueue(
            backoff=Backoff(base_delay=self.backoff_base_delay,
                            max_delay=self.backoff_max_delay))
        endpoints_selector = (self.label_selector +
                              ",app.kubernetes.io/component=dask-scheduler")
        self.informers = {
            "cluster":
            Informer(
                parent=self,
                name="cluster",
                client=self.custom_client,
                method="list_cluster_custom_object",
                method_kwargs=dict(
                    group="gateway.dask.org",
                    version=self.crd_version,
                    plural="daskclusters",
                    label_selector=self.label_selector,
                ),
                on_update=self.on_cluster_update,
                on_delete=self.on_cluster_delete,
            ),
            "pod":
            Informer(
                parent=self,
                name="pod",
                client=self.core_client,
                method="list_pod_for_all_namespaces",
                method_kwargs=dict(label_selector=self.label_selector),
                on_update=self.on_pod_update,
                on_delete=self.on_pod_delete,
            ),
            "endpoints":
            Informer(
                parent=self,
                name="endpoints",
                client=self.core_client,
                method="list_endpoints_for_all_namespaces",
                method_kwargs=dict(label_selector=endpoints_selector),
                on_update=self.on_endpoints_update,
                on_delete=self.on_endpoints_delete,
            ),
        }
        await asyncio.wait([i.start() for i in self.informers.values()])
        self.log.debug("All informers started")

        # Initialize reconcilers
        self.reconcilers = [
            asyncio.ensure_future(self.reconciler_loop())
            for _ in range(self.parallelism)
        ]

        # Start background tasks
        self.task_pool = TaskPool()
        self.task_pool.spawn(self.cleanup_expired_cluster_records_loop())

        # Start the aiohttp application
        self.runner = web.AppRunner(
            self.app,
            handle_signals=False,
            access_log_class=AccessLogger,
            access_log=self.log,
        )
        await self.runner.setup()

        host, port = self.address.split(":")
        port = int(port)
        site = web.TCPSite(self.runner,
                           host,
                           port,
                           shutdown_timeout=15.0,
                           backlog=128)
        await site.start()
        self.log.info("%s started!", self.name)
        self.log.info("API listening at http://%s", self.address)
Beispiel #30
0
    async def run_inner(self, run_info):
        run_id = run_info['id']
        del run_info

        # This does not run the experiment, it schedules a runner pod by
        # talking to the Kubernetes API. That pod will run the experiment and
        # update the database directly

        k8s_config.load_incluster_config()

        name = self._pod_name(run_id)

        # Load configuration from configmap volume
        with open(os.path.join(self.config_dir, 'runner.pod_spec')) as fp:
            pod_spec = yaml.safe_load(fp)
        with open(os.path.join(self.config_dir, 'runner.namespace')) as fp:
            namespace = fp.read().strip()

        # Make required changes
        for container in pod_spec['containers']:
            if container['name'] == 'runner':
                container['args'] += [str(run_id)]

                # This is mostly used by Tilt
                if os.environ.get('OVERRIDE_RUNNER_IMAGE'):
                    container['image'] = os.environ['OVERRIDE_RUNNER_IMAGE']

        async with k8s_client.ApiClient() as api:
            # Create a Kubernetes pod to run
            v1 = k8s_client.CoreV1Api(api)
            pod = k8s_client.V1Pod(
                api_version='v1',
                kind='Pod',
                metadata=k8s_client.V1ObjectMeta(
                    name=name,
                    labels={
                        'app': 'run',
                        'run': str(run_id),
                    },
                ),
                spec=pod_spec,
            )
            await v1.create_namespaced_pod(
                namespace=namespace,
                body=pod,
            )
            logger.info("Pod created: %s", name)
            PROM_RUNS.inc()

            # Create a service for proxy connections
            svc = k8s_client.V1Service(
                api_version='v1',
                kind='Service',
                metadata=k8s_client.V1ObjectMeta(
                    name=name,
                    labels={
                        'app': 'run',
                        'run': str(run_id),
                    },
                ),
                spec=k8s_client.V1ServiceSpec(
                    selector={
                        'app': 'run',
                        'run': str(run_id),
                    },
                    ports=[
                        k8s_client.V1ServicePort(
                            protocol='TCP',
                            port=5597,
                        ),
                    ],
                ),
            )
            await v1.create_namespaced_service(
                namespace=namespace,
                body=svc,
            )
            logger.info("Service created: %s", name)
Beispiel #31
0
async def on_startup(app):
    if 'BATCH_USE_KUBE_CONFIG' in os.environ:
        await config.load_kube_config()
    else:
        config.load_incluster_config()
    app['k8s_client'] = client.CoreV1Api()