def get_version(self, app, print_stdout=True): safeowner = clean(app["owner"]) safetitle = clean(app["title"]) img_name = f"{safeowner}_{safetitle}_tasks" app_version = None if app["tech"] == "python-paramtools": secrets = self.config._list_secrets(app) client = docker.from_env() container = client.containers.run( f"{img_name}:{self.tag}", [ "python", "-c", "from cs_config import functions; print(functions.get_version())", ], environment=secrets, detach=True, ports=None, ) logs = [] for line in container.logs(stream=True): logs.append(strip_secrets(line, secrets)) app_version = logs[0] if logs else None if app_version and print_stdout: sys.stdout.write(app_version) return app_version or str(datetime.now().date())
def env(self, owner, title, deployment_name, config): safeowner = clean(owner) safetitle = clean(title) envs = [ kclient.V1EnvVar("OWNER", config["owner"]), kclient.V1EnvVar("TITLE", config["title"]), ] for secret in ModelSecrets(owner=owner, title=title, project=self.project).list(): envs.append( kclient.V1EnvVar( name=secret, value_from=kclient.V1EnvVarSource( secret_key_ref=(kclient.V1SecretKeySelector( key=secret, name=f"{safeowner}-{safetitle}-secret") )), )) envs.append( kclient.V1EnvVar( name="URL_BASE_PATHNAME", value=f"/{owner}/{title}/{deployment_name}/", )) return envs
def __init__(self, owner=None, title=None, name=None, project=None): if owner and title: self.owner = owner self.title = title else: self.owner, self.title = name.split("/") self.project = project self.safe_owner = clean(self.owner) self.safe_title = clean(self.title) super().__init__(project)
def configure(self, owner, title, tag, job_id, callback_url, route_name): job_id = str(job_id) config = self.model_config safeowner = clean(owner) safetitle = clean(title) name = f"{safeowner}-{safetitle}" container = kclient.V1Container( name=job_id, image= f"{self.cr}/{self.project}/{safeowner}_{safetitle}_tasks:{tag}", command=[ "cs-jobs", "--callback-url", callback_url, "--route-name", route_name, ], env=self.env(owner, title, config), resources=kclient.V1ResourceRequirements(**config["resources"]), ) # Create and configurate a spec section template = kclient.V1PodTemplateSpec( metadata=kclient.V1ObjectMeta(labels={ "app": f"{name}-job", "job-id": job_id }), spec=kclient.V1PodSpec( restart_policy="Never", containers=[container], node_selector={"component": "model"}, ), ) # Create the specification of deployment spec = kclient.V1JobSpec(template=template, backoff_limit=1, ttl_seconds_after_finished=0) # Instantiate the job object job = kclient.V1Job( api_version="batch/v1", kind="Job", metadata=kclient.V1ObjectMeta(name=job_id), spec=spec, ) if not self.quiet: print(yaml.dump(job.to_dict())) return job
def _set_secrets(self, app, config): safeowner = clean(app["owner"]) safetitle = clean(app["title"]) name = f"{safeowner}-{safetitle}-secret" for key in self.config._list_secrets(app): config["env"].append({ "name": key, "valueFrom": { "secretKeyRef": { "name": name, "key": key } } })
def push_app_image(self, app): assert self.cr is not None safeowner = clean(app["owner"]) safetitle = clean(app["title"]) img_name = f"{safeowner}_{safetitle}_tasks" if self.use_kind: cmd_prefix = "kind load docker-image --name cs --nodes cs-worker2" elif self.use_latest_tag: raise Exception( "Unable to push latest tag for use outside of kind.") else: cmd_prefix = "docker push" if self.use_latest_tag: tag = self.get_latest_tag(app) else: tag = self.tag run(f"{cmd_prefix} {self.cr}/{self.project}/{img_name}:{tag}")
def write_secrets(self, app): secret_config = copy.deepcopy(self.secret_template) safeowner = clean(app["owner"]) safetitle = clean(app["title"]) name = f"{safeowner}-{safetitle}-secret" secret_config["metadata"]["name"] = name for name, value in self.config._list_secrets(app).items(): secret_config["stringData"][name] = value if not secret_config["stringData"]: secret_config["stringData"] = dict() if self.kubernetes_target == "-": sys.stdout.write(yaml.dump(secret_config)) sys.stdout.write("---") sys.stdout.write("\n") else: with open(self.kubernetes_target / Path(f"{name}.yaml"), "w") as f: f.write(yaml.dump(secret_config)) return secret_config
def env(self, owner, title, config): safeowner = clean(owner) safetitle = clean(title) envs = [ kclient.V1EnvVar("OWNER", owner), kclient.V1EnvVar("TITLE", title), kclient.V1EnvVar("EXP_TASK_TIME", str(config["exp_task_time"])), ] # for sec in [ # "BUCKET", # "REDIS_HOST", # "REDIS_PORT", # "REDIS_EXECUTOR_PW", # ]: # envs.append( # kclient.V1EnvVar( # sec, # value_from=kclient.V1EnvVarSource( # secret_key_ref=( # kclient.V1SecretKeySelector(key=sec, name="worker-secret") # ) # ), # ) # ) for secret in ModelSecrets(owner=owner, title=title, project=self.project).list(): envs.append( kclient.V1EnvVar( name=secret, value_from=kclient.V1EnvVarSource( secret_key_ref=(kclient.V1SecretKeySelector( key=secret, name=f"{safeowner}-{safetitle}-secret") )), )) return envs
def build_app_image(self, app): """ Build, tag, and pus the image for a single app. """ safeowner = clean(app["owner"]) safetitle = clean(app["title"]) img_name = f"{safeowner}_{safetitle}_tasks" repo_tag = os.environ.get("REPO_TAG") or app["repo_tag"] repo_url = os.environ.get("REPO_URL") or app["repo_url"] parsed_url = urlparse(repo_url) repo_name = parsed_url.path.split("/")[-1] reg_url = "https://github.com" raw_url = "https://raw.githubusercontent.com" buildargs = dict( OWNER=app["owner"], TITLE=app["title"], REPO_TAG=repo_tag, REPO_URL=repo_url, REPO_NAME=repo_name, RAW_REPO_URL=repo_url.replace(reg_url, raw_url), CS_APPBASE_TAG=self.cs_appbase_tag, ) buildargs_str = " ".join( [f"--build-arg {arg}={value}" for arg, value in buildargs.items()]) dockerfile = self.dockerfiles_dir / "Dockerfile.model" cmd = f"docker build --no-cache {buildargs_str} -t {img_name}:{self.tag} -f {dockerfile} ./" run(cmd) assert self.cr is not None run(f"docker tag {img_name}:{self.tag} {self.cr}/{self.project}/{img_name}:{self.tag}" )
def configure(self): config = self.model_config safeowner = clean(self.owner) safetitle = clean(self.title) app_name = f"{safeowner}-{safetitle}" name = f"{app_name}-{self.deployment_name}" if config["tech"] == "dash": app_module = config.get("app_location", None) or "cs_config.functions" cpu = math.floor(float(config["resources"]["requests"]["cpu"])) workers = int(2 * cpu + 1) print("Starting gunicorn server with workers", workers, config["resources"]) cmd = [ "gunicorn", "--workers", str(workers), f"{app_module}:{self.callable_name}", ] elif config["tech"] == "bokeh": cmd = [ "bokeh", "serve", config["app_location"], "--address", "0.0.0.0", "--port", str(PORT), "--prefix", f"/{self.owner}/{self.title}/{self.deployment_name}/", f"--allow-websocket-origin={self.viz_host}", ] elif config["tech"] == "streamlit": cmd = [ "streamlit", "run", config["app_location"], "--server.address", "0.0.0.0", "--server.port", str(PORT), "--server.baseUrlPath", f"/{self.owner}/{self.title}/{self.deployment_name}/", ] else: raise ValueError( f"Unknown tech: {config['tech']}. Must be one of: bokeh, dash, streamlit." ) print("got config", config) print("running cmd", cmd) container = kclient.V1Container( name=name, image= f"{self.cr}/{self.project}/{safeowner}_{safetitle}_tasks:{self.tag}", command=cmd, env=self.env(self.owner, self.title, self.deployment_name, config), resources=kclient.V1ResourceRequirements(**config["resources"]), ports=[kclient.V1ContainerPort(container_port=PORT)], ) # Create and configurate a spec section template = kclient.V1PodTemplateSpec( metadata=kclient.V1ObjectMeta(labels={"app": name}), spec=kclient.V1PodSpec( restart_policy="Always", containers=[container], node_selector={"component": "model"}, ), ) # Create the specification of deployment spec = kclient.V1DeploymentSpec( template=template, selector=kclient.V1LabelSelector(match_labels={"app": name}), replicas=1, ) # Instantiate the deployment object deployment = kclient.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=kclient.V1ObjectMeta(name=name), spec=spec, ) service = kclient.V1Service( api_version="v1", kind="Service", metadata=kclient.V1ObjectMeta(name=name), spec=kclient.V1ServiceSpec( selector={"app": name}, ports=[ kclient.V1ServicePort(port=80, target_port=PORT, protocol="TCP") ], type="LoadBalancer", ), ) path_prefix = f"/{self.owner}/{self.title}/{self.deployment_name}" routes = [{ "kind": "Rule", "match": f"Host(`{self.viz_host}`) && PathPrefix(`{path_prefix}`)", "services": [{ "name": name, "port": 80 }], }] ingressroute = ingressroute_template(namespace=self.namespace, name=name, routes=routes, tls=True) if not self.quiet: sys.stdout.write(yaml.dump(deployment.to_dict())) sys.stdout.write("---\n") sys.stdout.write(yaml.dump(service.to_dict())) self.service, self.deployment, self.ingressroute = ( service, deployment, ingressroute, )
def full_name(self): safeowner = clean(self.owner) safetitle = clean(self.title) return f"{safeowner}-{safetitle}-{self.deployment_name}"
def test_app_image(self, app): safeowner = clean(app["owner"]) safetitle = clean(app["title"]) img_name = f"{safeowner}_{safetitle}_tasks" viz_ports = {"8010/tcp": ("127.0.0.1", "8010")} if app["tech"] == "python-paramtools": cmd = [ "py.test", "./cs-config/cs_config/tests/test_functions.py", "-v", "-s", ] ports = None elif app["tech"] == "dash": app_module = app.get("app_location", None) or "cs_config.functions" cmd = ["gunicorn", f"{app_module}:{app['callable_name']}"] ports = viz_ports elif app["tech"] == "bokeh": cmd = [ "bokeh", "serve", app["app_location"], "--address", "0.0.0.0", "--port", "8010", "--prefix", f"/{app['owner']}/{app['owner']}/test/", ] ports = viz_ports elif app["tech"] == "streamlit": cmd = [ "streamlit", "run", app["app_location"], "--server.address", "0.0.0.0", "--server.port", "8010", "--server.baseUrlPath", f"/{app['owner']}/{app['owner']}/test/", ] ports = viz_ports else: raise ValueError(f"Unknown tech: {app['tech']}") secrets = self.config._list_secrets(app) client = docker.from_env() container = client.containers.run( f"{img_name}:{self.tag}", cmd, environment=secrets, detach=True, ports=ports, ) try: def stream_logs(container): for line in container.logs(stream=True): print(strip_secrets(line, secrets)) container.reload() if container.status != "running": print(f"Container exited with status: {container.status}") for line in container.logs(stream=True): print(strip_secrets(line, secrets)) raise RuntimeError( f"Container exited with status: {container.status}") if app["tech"] in ("bokeh", "dash", "streamlit"): # Run function for showing logs in another thread so test/monitoring # can run in main thread. thread = threading.Thread(target=stream_logs, args=(container, ), daemon=True) thread.start() time.sleep(2) num_attempts = 10 for attempt in range(1, num_attempts + 1): container.reload() if container.status != "running": raise RuntimeError( f"Container exected with status: {container.status}" ) try: resp = httpx.get( f"http://localhost:8010/{app['owner']}/{app['owner']}/test/" ) if resp.status_code == 200: print(f"Received successful response: {resp}") break except Exception: import traceback traceback.print_exc() time.sleep(1) container.reload() if attempt < num_attempts: print( f"Successful response received after {attempt} attempts." ) container.kill() else: try: raise ValueError( f"Unable to get 200 response after {attempt}.") finally: container.kill() else: for line in container.logs(stream=True): print(strip_secrets(line, secrets)) container.reload() exit_status = container.wait() if exit_status["StatusCode"] != 0: raise RuntimeError( f"Tests failed with exit status {exit_status['StatusCode']}." ) finally: container.reload() if container.status != "exited": print(f"Stopping container: {container}.") container.kill()