def load_incluster_config_with_token(token: str): token_filename = '/tmp/token' with open(token_filename, 'w') as token_file: token_file.write(token) loader = InClusterConfigLoader( token_filename=token_filename, cert_filename=SERVICE_CERT_FILENAME) loader.load_and_set()
def start_job(definition): print("deploying build baseimage job!".format()) from kubernetes import client, config if settings.EXTERNAL_KUBECONF: config.load_kube_config('cluster.conf') else: if 'TELEPRESENCE_ROOT' in os.environ: from kubernetes.config.incluster_config import ( SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME, InClusterConfigLoader) token_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/') cert_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/') InClusterConfigLoader(token_filename=token_filename, cert_filename=cert_filename).load_and_set() else: config.load_incluster_config() api = client.BatchV1Api() # create the resource api.create_namespaced_job( namespace=settings.NAMESPACE, body=definition, ) print("Resource created")
def run_job(instance): print("deploying job with {}!".format(instance)) from kubernetes import client, config if settings.EXTERNAL_KUBECONF: config.load_kube_config('cluster.conf') else: if 'TELEPRESENCE_ROOT' in os.environ: from kubernetes.config.incluster_config import ( SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME, InClusterConfigLoader) token_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/') cert_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/') InClusterConfigLoader(token_filename=token_filename, cert_filename=cert_filename).load_and_set() else: config.load_incluster_config() api = client.BatchV1Api() yaml_definition = get_instance_from_definition(instance) # create the resource api.create_namespaced_job( namespace=settings.NAMESPACE, body=yaml_definition, ) print("Resource created") # get the resource and print out data print("getting logs:") resource = api.read_namespaced_job( name=str(instance.id), namespace=settings.NAMESPACE, ) print("got logs?") # resource = api.list_namespaced_job( # namespace="stack-fn", # ) print("Resources details:") pprint(resource)
def main(): log('Started NamespaceWatcher') import os if 'SERVICE_TOKEN_FILENAME' in os.environ: InClusterConfigLoader( token_filename=os.environ.get('SERVICE_TOKEN_FILENAME'), cert_filename=os.environ.get( 'SERVICE_CERT_FILENAME')).load_and_set() else: config.load_incluster_config() client.configuration.verify_ssl = False nw = NamespaceWatcher(watch) nw.start()
def main(): log('Started JobPrunner') import os deadline_hours = int(os.environ.get('DEADLINE_HOURS', 24)) log('Job deadline {}h'.format(deadline_hours)) if 'SERVICE_TOKEN_FILENAME' in os.environ: InClusterConfigLoader( token_filename=os.environ.get('SERVICE_TOKEN_FILENAME'), cert_filename=os.environ.get( 'SERVICE_CERT_FILENAME')).load_and_set() else: config.load_incluster_config() client.configuration.verify_ssl = False jp = JobPrunner(watch=watch.Watch()) jp.start(deadline_hours * 60 * 60)
def get_logs(experiment): from kubernetes import client, config if settings.EXTERNAL_KUBECONF: config.load_kube_config('cluster.conf') else: if 'TELEPRESENCE_ROOT' in os.environ: from kubernetes.config.incluster_config import ( SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME, InClusterConfigLoader) token_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/') cert_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/') InClusterConfigLoader(token_filename=token_filename, cert_filename=cert_filename).load_and_set() else: config.load_incluster_config() api = client.BatchV1Api() ret = api.read_namespaced_job( name=str(experiment.id), namespace=settings.NAMESPACE, ) print("getting job name:") job_name = ret.metadata.labels['job-name'] api = client.CoreV1Api() ret = api.list_namespaced_pod( namespace=settings.NAMESPACE, label_selector='job-name={}'.format(job_name)) ret = api.read_namespaced_pod_log(name=ret.items[0].metadata.name, namespace=settings.NAMESPACE) return ret
def __generate_and_apply_configmaps(project): from kubernetes import client, config from kubernetes.client.rest import ApiException from pprint import pprint if settings.EXTERNAL_KUBECONF: config.load_kube_config('cluster.conf') else: # adjust k8s service account paths if running inside telepresence if 'TELEPRESENCE_ROOT' in os.environ: from kubernetes.config.incluster_config import ( SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME, InClusterConfigLoader) token_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/') cert_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/') InClusterConfigLoader(token_filename=token_filename, cert_filename=cert_filename).load_and_set() else: config.load_incluster_config() api = client.CoreV1Api() try: api_response = api.delete_namespaced_config_map( namespace=settings.NAMESPACE, name="{}-dockerfile".format(project.name), ) pprint(api_response) except ApiException as e: print( "Exception when calling CoreV1Api->delete_namespaced_config_map: %s\n" % e) metadata = client.V1ObjectMeta( name="{}-dockerfile".format(project.name), namespace=settings.NAMESPACE, ) # Instantiate the configmap object configmap = client.V1ConfigMap( api_version="v1", kind="ConfigMap", # How do I modify here ? data=dict(Dockerfile=str(project.environment.dockerfile)), metadata=metadata) try: api_response = api.create_namespaced_config_map( namespace=settings.NAMESPACE, body=configmap, pretty='pretty_example', ) pprint(api_response) except ApiException as e: print( "Exception when calling CoreV1Api->create_namespaced_config_map: %s\n" % e)
def main(token_file=None, cert_file=None, config_file=None): logging.basicConfig(level=logging.DEBUG, format="(%(threadName)-9s) %(message)s") # logging.getLogger("requests").setLevel(logging.CRITICAL) logging.getLogger("urllib3").setLevel(logging.CRITICAL) kubernetes_verify_tls = get_param("KUBERNETES_VERIFY_TLS", None, "0") kubernetes_verify_tls = bool(int(kubernetes_verify_tls)) # Load in-cluster configuration that is exposed by OpenShift/k8s configuration. InClusterConfigLoader( token_filename=_get_incluster_token_file(token_file=token_file), cert_filename=_get_incluster_ca_file(ca_file=cert_file), environ=os.environ, ).load_and_set() # We need to explicitly set whether we want to verify SSL/TLS connection to the master. configuration = client.Configuration() configuration.verify_ssl = kubernetes_verify_tls ocp_client = DynamicClient(client.ApiClient(configuration=configuration)) host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() plugin = TensorflowBuildPlugin() # login_checks = [check_none(v) for v in [OCP_URL, DEFAULT_NAMESPACE, ACCESS_TOKEN]] # if not all(login_checks): # raise Exception("Release Trigger can't start! OCP credentials are not provided!") # TODO may use config.json or use CRD # Load BUILD_MAP build_map = os.getenv(ENV_BUILD_MAP, "{}") build_map = json.loads(build_map) if str(build_map) == "{}": build_map = load_json_file(config_file) if not build_map: raise Exception("No BUILD_MAP loaded.Nothing todo") imagestream_list = [] buildconfig_list = [] job_list = [] object_map = {} object_map.update(plugin.get_labels_dict()) # Process BUILD_MAP for py_version, os_details in build_map.items(): for os_version, image_details in os_details.items(): try: application_build_name = "tf-{}-build-image-{}".format( os_version.lower(), py_version.replace(".", "")) application_name = "tf-{}-build-job-{}".format( os_version.lower(), py_version.replace(".", "")) builder_imagestream = "{}:{}".format(application_build_name, DEFAULT_IMAGE_VERSION) nb_python_ver = py_version docker_file_path = "Dockerfile.{}".format(os_version.lower()) logging.debug( "-------------------VARIABLES-------------------------") logging.debug("APPLICATION_BUILD_NAME: {}".format( application_build_name)) logging.debug("APPLICATION_NAME: {}".format(application_name)) logging.debug( "BUILDER_IMAGESTREAM: {}".format(builder_imagestream)) logging.debug("PYTHON VERSION: {}".format(nb_python_ver)) logging.debug("DOCKERFILE: {}".format(docker_file_path)) for var_key, var_val in image_details.items(): # self.__dict__[var_key] = var_val logging.debug("{}: {}".format(var_key, var_val)) logging.debug( "-----------------------------------------------------") imagestream_template = plugin.fill_imagestream_template( ims_name=application_build_name) imagestream_list.append({ "kind": "ImageStream", "object": imagestream_template, "trigger_count": 0, "retrigger": False, }) job_template = plugin.fill_job_template1( application_name=application_name, builder_imagestream=builder_imagestream, nb_python_ver=nb_python_ver, image_details=image_details, ) object_map[application_name] = job_template job_list.append(job_template) build_template = plugin.fill_buildconfig_template1( build_name=application_build_name, docker_file_path=docker_file_path, nb_python_ver=nb_python_ver, image_details=image_details, ) object_map[application_build_name] = build_template buildconfig_list.append({ "kind": "BuildConfig", "object": build_template, "trigger_count": 0, "retrigger": False, "application_name": application_name, "builder_imagestream": builder_imagestream, "nb_python_ver": nb_python_ver, }) except Exception as e: logging.error("Exception: ", e) logging.error( "Error in Tensorflow Build or Job trigger! Please refer the above log, Starting the next " "one in queue!") for ims in imagestream_list: ims_name = ims["object"]["metadata"]["name"] ims_exist, ims_response = get_imagestream( req_url=host, req_headers=get_header(api_key), namespace=namespace, imagestream_name=ims_name, ) if not ims_exist: generated_img = create_imagestream( req_url=host, req_headers=get_header(api_key), namespace=namespace, imagestream=ims["object"], ) if not generated_img: raise Exception( "Image could not be generated for {}".format(ims_name)) quota_event = threading.Condition() done_event = threading.Event() global_count = ResourceCounter() task_q = Queue(maxsize=1000) bloom = BloomFilter(10000, 0.001) # TODO TFBuildConfig OpenBlasBuildConfig, numpy for y in buildconfig_list: task_q.put(y) # global_count.set_val(task_q.qsize()) logging.debug("Q size {}".format(task_q.qsize())) quota_name = get_param("QUOTA_NAME", None, DEFAULT_QUOTA_NAME) quota_thread = threading.Thread( name="quota-thread", target=quota_check, args=(quota_name, quota_event, done_event, task_q, global_count), ) resource_thread = threading.Thread( name="resource-thread", target=create_resource, args=(quota_event, done_event, task_q, global_count, object_map), ) event_thread = threading.Thread( name="event-thread", target=event_loop, args=("events", bloom, object_map, task_q, global_count), ) # event_thread.daemon = True event_thread.start() time.sleep(3) quota_thread.start() resource_thread.start() event_thread.join() resource_thread.join() quota_thread.join() logging.debug("END")
SERVICE_TOKEN_FILENAME, InClusterConfigLoader, ) from .. import config # adjust k8s service account paths if running inside telepresence tele_root = Path(os.getenv("TELEPRESENCE_ROOT", "/")) token_filename = tele_root / Path(SERVICE_TOKEN_FILENAME).relative_to("/") cert_filename = tele_root / Path(SERVICE_CERT_FILENAME).relative_to("/") namespace_path = tele_root / Path( "var/run/secrets/kubernetes.io/serviceaccount/namespace") try: InClusterConfigLoader(token_filename=token_filename, cert_filename=cert_filename).load_and_set() v1 = client.CoreV1Api() except ConfigException: v1 = None warnings.warn("Unable to configure the kubernetes client.") try: with open(namespace_path, "rt") as f: kubernetes_namespace = f.read() except FileNotFoundError: kubernetes_namespace = "" warnings.warn( "No k8s service account found - not running inside a kubernetes cluster?" )
def load_k8s_config(): InClusterConfigLoader( token_filename=SERVICE_TOKEN_FILENAME, cert_filename=SERVICE_CERT_FILENAME, ).load_and_set()