def __init__(self,run_date="",configpath="/mnt/consumerhub/config/kubernetes/config.ini"): """ Initiates the K8s object :param configpath: path for the k8s config """ config.load_kube_config(configpath) self.api_instance = client.BatchV1Api() self.batch_instance= client.BatchV1beta1Api() if run_date: self.run_date=run_date else: self.run_date=datetime.date.today().isoformat() self.logger=loggerfunc(servicename="kubernetes",run_date=self.run_date) self.api_pods = client.CoreV1Api()
def getJob(): config.load_kube_config() api_instance = client.BatchV1Api() jobs = api_instance.list_namespaced_job( namespace='default', # async_req=True, pretty=True, timeout_seconds=60) for job in jobs.items: if job.status.active == 1: now = datetime.datetime.now(timezone.utc) # print(job.status.start_time) duration = int((now - job.status.start_time).total_seconds()) return job, duration return None, 0
def __init__(self, distributedBuildConfig, logName=None, logPath=None): if logName is None: logName = "DistributedBuild" if logPath is None: logPath = constants.logPath self.logName = logName self.logPath = logPath self.logger = Logger.getLogger(logName, logPath, constants.logLevel) self.distributedBuildConfig = distributedBuildConfig self.buildGuid = self.getBuildGuid() self.aApiClient = config.load_kube_config() self.coreV1ApiInstance = client.CoreV1Api(self.aApiClient) self.batchV1ApiInstance = client.BatchV1Api(self.aApiClient) self.AppsV1ApiInstance = client.AppsV1Api(self.aApiClient)
def main(): job_id = sys.argv[1] parallelism = sys.argv[2] job_name = "pocket-datanode-dram-job" + job_id yaml_file = "pocket-datanode-dram-job.yaml" cmd = ["./update_datanode_yaml.sh", job_name, parallelism, yaml_file] Popen(cmd, stdout=PIPE).wait() config.load_kube_config() with open(path.join(path.dirname(__file__), yaml_file)) as f: job = yaml.load(f) k8s_beta = client.BatchV1Api() resp = k8s_beta.create_namespaced_job(body=job, namespace="default") print("Job created. status='%s'" % str(resp.status))
def __init__(self): self.logger = logging.getLogger(__name__) try: config_file = os.path.expanduser(kubeconfig_filepath) config.load_kube_config(config_file=config_file) except: self.logger.warning("unable to load kube-config") self.v1 = client.CoreV1Api() self.v1Beta1 = client.AppsV1beta1Api() self.extensionsV1Beta1 = client.ExtensionsV1beta1Api() self.autoscalingV1Api = client.AutoscalingV1Api() self.rbacApi = client.RbacAuthorizationV1beta1Api() self.batchV1Api = client.BatchV1Api() self.batchV2Api = client.BatchV2alpha1Api()
def __init__(self) -> None: super().__init__() from kubernetes import client, config try: self.logger.debug("Loading incluster configuration") config.load_incluster_config() except config.config_exception.ConfigException as exc: self.logger.warning( "{} Using out of cluster configuration option.".format(exc)) self.logger.debug("Loading out of cluster configuration") config.load_kube_config() self.batch_client = client.BatchV1Api()
def __init__(self): config.load_incluster_config() self.namespace = "default" self.nodeport = 32137 self.core_api = client.CoreV1Api() self.apps_api = client.AppsV1Api() self.batch_api = client.BatchV1Api() self.lobby_ip = constant.LOBBY_URL with open("/opt/devxdev/src/game-server.yaml", "r") as whatever: self.game_server = yaml.load(whatever, yaml.SafeLoader) with open("/opt/devxdev/src/inside-job.yaml", "r") as whatever: self.job = yaml.load(whatever, yaml.SafeLoader) with open("/opt/devxdev/src/game-server-service.yaml", "r") as whatever: self.game_server_service = yaml.load(whatever, yaml.SafeLoader)
def main(): # Configs can be set in Configuration class directly or using helper # utility. If no argument provided, the config will be loaded from # default location. config.load_kube_config() batch_v1 = client.BatchV1Api() # Create a job object with client-python API. The job we # created is same as the `pi-job.yaml` in the /examples folder. job = create_job_object() create_job(batch_v1, job) update_job(batch_v1, job) delete_job(batch_v1)
def main(): if os.environ.get('RD_CONFIG_DEBUG') == 'true': log.setLevel(logging.DEBUG) log.debug("Log level configured for DEBUG") common.connect() data = {} data["api_version"] = os.environ.get('RD_CONFIG_API_VERSION') data["name"] = os.environ.get('RD_CONFIG_NAME') data["namespace"] = os.environ.get('RD_CONFIG_NAMESPACE') try: k8s_client = client.BatchV1Api() job = k8s_client.read_namespaced_job( name=data["name"], namespace=data["namespace"] ) job.metadata.creation_timestamp = None job.metadata.uid = None job.metadata.resource_version = None job.status = None job.spec.selector = None job.spec.template.metadata = None body = client.V1DeleteOptions(api_version='v1',kind="DeleteOptions",propagation_policy="Background") pretty = 'pretty_example' api_response = k8s_client.delete_namespaced_job( name=data["name"], namespace=data["namespace"], body=body, pretty=pretty ) print("Job deleted. status='%s'" % str(api_response.status)) api_response = k8s_client.create_namespaced_job( body=job, namespace=data["namespace"] ) print("Job created. status='%s'" % str(api_response.status)) except ApiException as e: log.error("Exception creating job: %s\n" % e) sys.exit(1)
def __init__( self, service_account_name, job_image, instance_config_map, dagster_home, image_pull_policy='Always', image_pull_secrets=None, load_kubeconfig=False, kubeconfig_file=None, inst_data=None, job_namespace="default", env_config_maps=None, env_secrets=None, ): self._inst_data = check.opt_inst_param(inst_data, 'inst_data', ConfigurableClassData) self.job_image = check.str_param(job_image, 'job_image') self.instance_config_map = check.str_param(instance_config_map, 'instance_config_map') self.dagster_home = check.str_param(dagster_home, 'dagster_home') self.image_pull_secrets = check.opt_list_param(image_pull_secrets, 'image_pull_secrets') self.image_pull_policy = check.str_param(image_pull_policy, 'image_pull_policy') self.service_account_name = check.str_param(service_account_name, 'service_account_name') self.job_namespace = check.str_param(job_namespace, 'job_namespace') self._env_config_maps = check.opt_list_param(env_config_maps, 'env_config_maps', of_type=str) self._env_secrets = check.opt_list_param(env_secrets, 'env_secrets', of_type=str) check.bool_param(load_kubeconfig, 'load_kubeconfig') if load_kubeconfig: check.str_param(kubeconfig_file, 'kubeconfig_file') else: check.invariant( kubeconfig_file is None, '`kubeconfig_file` is set but `load_kubeconfig` is True.') if load_kubeconfig: config.load_kube_config(kubeconfig_file) else: config.load_incluster_config() self._kube_api = client.BatchV1Api()
def serve(self): # For deployed clusters, we should always be running inside # a Rook cluster. For development convenience, also support # running outside (reading ~/.kube config) if self._rook_env.has_namespace(): config.load_incluster_config() cluster_name = self._rook_env.cluster_name else: self.log.warning("DEVELOPMENT ONLY: Reading kube config from ~") config.load_kube_config() cluster_name = "rook-ceph" # So that I can do port forwarding from my workstation - jcsp from kubernetes.client import configuration configuration.verify_ssl = False self._k8s_CoreV1_api = client.CoreV1Api() self._k8s_BatchV1_api = client.BatchV1Api() try: # XXX mystery hack -- I need to do an API call from # this context, or subsequent API usage from handle_command # fails with SSLError('bad handshake'). Suspect some kind of # thread context setup in SSL lib? self._k8s_CoreV1_api.list_namespaced_pod(cluster_name) except ApiException: # Ignore here to make self.available() fail with a proper error message pass self._rook_cluster = RookCluster(self._k8s_CoreV1_api, self._k8s_BatchV1_api, self._rook_env) self._initialized.set() while not self._shutdown.is_set(): # XXX hack (or is it?) to kick all completions periodically, # in case we had a caller that wait()'ed on them long enough # to get persistence but not long enough to get completion self.all_progress_references = [ p for p in self.all_progress_references if not p.effective ] for p in self.all_progress_references: p.update() self._shutdown.wait(5)
def __init__(self, host, token, namespace, verify_ssl=True, ssl_ca_cert=None): configuration = client.Configuration() configuration.host = host configuration.api_key = {"authorization": "Bearer " + token} configuration.verify_ssl = verify_ssl if ssl_ca_cert: configuration.ssl_ca_cert = ssl_ca_cert self.api_client = client.ApiClient(configuration) self.core = client.CoreV1Api(self.api_client) self.batch = client.BatchV1Api(self.api_client) self.namespace = namespace
def deleteJob(kJobname, podName, kNameSpace): try: config.load_incluster_config() except: config.load_kube_config('.kube/config') jobBody = client.V1Job() batchV1 = client.BatchV1Api() ret = batchV1.delete_namespaced_job(kJobname, kNameSpace) print("Job deleted: " + kJobname) podBody = client.V1DeleteOptions() coreV1 = client.CoreV1Api() ret = coreV1.delete_namespaced_pod(podName, kNameSpace) print("Pod deleted: " + podName) return
def wait_for_job_success(job_name): '''Poll the job for successful completion ''' job = None while not job: # Ensure we found the job that we launched jobs = client.BatchV1Api().list_namespaced_job(namespace='dagster-test', watch=False) job = next((j for j in jobs.items if j.metadata.name == job_name), None) print('Job not yet launched, waiting') time.sleep(1) success, job_pod_name = wait_for_pod(job.metadata.name, wait_for_termination=True) raw_logs = retrieve_pod_logs(job_pod_name) return success, raw_logs
def delete_job(instance): from kubernetes import client, config if settings.EXTERNAL_KUBECONF: config.load_kube_config('cluster.conf') else: config.load_incluster_config() api = client.BatchV1Api() api.delete_namespaced_job( name=str(instance.id), namespace=settings.NAMESPACE, body=client.V1DeleteOptions(), ) print("Resource deleted")
def authenticate_platform(self): cert_path, host, api_token, api_prefix = self.__parse_identity_json( self.identity) self.configuration = client.Configuration() self.configuration.api_key["authorization"] = api_token self.configuration.api_key_prefix['authorization'] = api_prefix self.configuration.host = host self.configuration.connection_pool_maxsize = 200 self.configuration.ssl_ca_cert = cert_path self.batch_api = client.BatchV1Api(client.ApiClient( self.configuration)) self.core_api = client.CoreV1Api(client.ApiClient(self.configuration)) self.status_manager = KubernetesStatusManager(self.batch_api, self.core_api) self.status_manager.start_job_monitoring()
def fake_k8s_client_dict(): k8s_client_dict = { 'v1': client.CoreV1Api(), 'apiregistration.k8s.io/v1': client.ApiregistrationV1Api(), 'apps/v1': client.AppsV1Api(), 'authentication.k8s.io/v1': client.AuthenticationV1Api(), 'authorization.k8s.io/v1': client.AuthorizationV1Api(), 'autoscaling/v1': client.AutoscalingV1Api(), 'batch/v1': client.BatchV1Api(), 'coordination.k8s.io/v1': client.CoordinationV1Api(), 'networking.k8s.io/v1': client.NetworkingV1Api(), 'rbac.authorization.k8s.io/v1': client.RbacAuthorizationV1Api(), 'scheduling.k8s.io/v1': client.SchedulingV1Api(), 'storage.k8s.io/v1': client.StorageV1Api() } return k8s_client_dict
def _wait_for_job_completion( wait_minutes_for_completion: int, name: str, options: Dict, ): """Waits until job is completed Raises ApiException exception when job with given name no longer exist""" for i in range(wait_minutes_for_completion): logging.debug("Waiting for job to complete...") job = client.BatchV1Api().read_namespaced_job_status( **gamla.add_key_value("name", name)(options)) if job.status.succeeded: return time.sleep(60) raise Exception( f"Job wasn't completed within {wait_minutes_for_completion} min")
def __init__(self, namespace, config_file=None): """ Init. """ if not os.path.isfile(config_file): raise RuntimeError( 'Cannot find k8s config file: {0}'.format(config_file)) config.load_kube_config(config_file=config_file) self.namespace = namespace if namespace else 'default' self.core_v1 = client.CoreV1Api() self.batch_v1 = client.BatchV1Api() self.apps_v1 = client.AppsV1Api() self.delete_v1 = client.V1DeleteOptions( propagation_policy='Background')
def fetch_jobs(self, job_definition_name: Optional[str] = None, **extra_labels) -> Iterator[client.V1Job]: batch_v1_client = client.BatchV1Api() response = batch_v1_client.list_namespaced_job( namespace=self.namespace, label_selector=self.signer.label_selector( job_definition_name=job_definition_name, **extra_labels), ) yield from response.items while response.metadata._continue: response = batch_v1_client.list_namespaced_job( namespace=self.namespace, _continue=response.metadata._continue) yield from response.items
def delete_job(self, jobname, username): namespace = utils.email_escape(username) api_client = utils.get_user_api_client(username) if not jobname: return utils.simple_response(500, "must specify jobname") # FIXME: options needed: grace_period_seconds, orphan_dependents, preconditions # FIXME: cascade delteing delete_status = [] # delete job trainer_name = jobname + "-trainer" try: u_status = client.BatchV1Api(api_client=api_client)\ .delete_namespaced_job(trainer_name, namespace, {}) except ApiException, e: logging.error("error deleting job: %s, %s", jobname, str(e)) delete_status.append(str(e))
def delete_job(msg): yaml_file = open(ROOT_DIR + msg['file_name'], 'r+') yaml_conf = yaml.load(yaml_file) api_instance = client.BatchV1Api(client.ApiClient()) try: api_response = api_instance.delete_namespaced_job( msg.name, msg.namespace, yaml_conf) logger.info(api_response) except ApiException as e: extra = { 'api_instance': 'BatchV1Api', 'api_func': 'delete_namespaced_job', 'func_name': 'delete_job' } logger.error("error_message: %s", e, extra=extra) raise e
def delete_job(self, job: str): """Delete the Kubernetes job with the provided name. This will delete the job with the provided name regardless of the status of the job. If you want to avoid deleting a job that has not completed then make sure to check the status of the job before using this function. :param job: The name of the job to delete. """ _load_kube_config2(print_output=self.print_output) batch_api = client.BatchV1Api() try: batch_api.delete_namespaced_job(name=job, namespace=self.namespace) except ApiException as error: raise APIConnectionError(error)
def delete_job(self, namespace=None, name=None): api_instance = client.BatchV1Api() delete_options = client.V1DeleteOptions( propagation_policy="Foreground") grace_period_seconds = 10 try: api_instance.delete_namespaced_job( name=name, namespace=namespace, body=delete_options, grace_period_seconds=grace_period_seconds, pretty="true", ) except ApiException as e: LOG.error("Exception when call AppsV1beta1Api: %s", e)
def run_job(instance): print("deploying job with {}!".format(instance)) from kubernetes import client, config if settings.EXTERNAL_KUBECONF: config.load_kube_config('cluster.conf') else: if 'TELEPRESENCE_ROOT' in os.environ: from kubernetes.config.incluster_config import ( SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME, InClusterConfigLoader) token_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/') cert_filename = Path( os.getenv('TELEPRESENCE_ROOT', '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/') InClusterConfigLoader(token_filename=token_filename, cert_filename=cert_filename).load_and_set() else: config.load_incluster_config() api = client.BatchV1Api() yaml_definition = get_instance_from_definition(instance) # create the resource api.create_namespaced_job( namespace=settings.NAMESPACE, body=yaml_definition, ) print("Resource created") # get the resource and print out data print("getting logs:") resource = api.read_namespaced_job( name=str(instance.id), namespace=settings.NAMESPACE, ) print("got logs?") # resource = api.list_namespaced_job( # namespace="stack-fn", # ) print("Resources details:") pprint(resource)
def execute( # type: ignore self, flow: "Flow", **kwargs: Any) -> None: """ Create a single Kubernetes job that spins up a dask scheduler, dynamically creates worker pods, and runs the flow. Args: - flow (Flow): the Flow object - **kwargs (Any): additional keyword arguments to pass to the runner Raises: - Exception: if the environment is unable to create the Kubernetes job """ docker_name = get_flow_image(flow) from kubernetes import client, config # Verify environment is running in cluster try: config.load_incluster_config() except config.config_exception.ConfigException: self.logger.error( "Environment not currently running inside a cluster") raise EnvironmentError( "Environment not currently inside a cluster") batch_client = client.BatchV1Api() if self._scheduler_spec: job = self._scheduler_spec job = self._populate_scheduler_spec_yaml(yaml_obj=job, docker_name=docker_name) else: with open(path.join(path.dirname(__file__), "job.yaml")) as job_file: job = yaml.safe_load(job_file) job = self._populate_job_yaml(yaml_obj=job, docker_name=docker_name) # Create Job try: batch_client.create_namespaced_job( namespace=prefect.context.get("namespace"), body=job) except Exception as exc: self.logger.critical( "Failed to create Kubernetes job: {}".format(exc)) raise exc
def main(): is_skip_all = True number_days = int(os.getenv('NUMBER_DAYS', 7)) namespace = os.getenv('NAMESPACE', 'default') is_cluster = bool(os.getenv('IS_CLUSTER', True)) label_selector = os.getenv('LABEL_SELECTOR', 'app=migration') propagation_policy = os.getenv('PROPAGATION_POLICY', 'Background') if is_cluster: config.load_incluster_config() else: config.load_kube_config() client_job = client.BatchV1Api() instances = client_job.list_namespaced_job(namespace=namespace, label_selector=label_selector) now = datetime.datetime.utcnow().replace(tzinfo=tzutc()) for job in instances.items: job_name = job.metadata.name start_time = job.status.start_time if is_lower_one_week(now=now, time=start_time, number_days=number_days): continue conditions = job.status.conditions if conditions: check_time = conditions[0].last_probe_time if not is_lower_one_week( now=now, time=check_time, number_days=number_days): delete_job(job_name=job_name, namespace=namespace, propagation_policy=propagation_policy) is_skip_all = False continue else: label_selector_for_pod = f'job-name={job_name}' if not is_pod_running(namespace=namespace, label_selector=label_selector_for_pod): delete_job(job_name=job_name, namespace=namespace, propagation_policy=propagation_policy) is_skip_all = False continue if is_skip_all: print(f'Unable for finding jobs being larger than {number_days} days')
def main(): if path.isfile(END_HOOK_FILEPATH): log.info("END hook file already created, exiting...") return config.load_incluster_config() batch_wrapper_job_name = getenv("BATCH_WRAPPER_JOB_NAME") if batch_wrapper_job_name is None: raise RuntimeError("BATCH_WRAPPER_JOB_NAME env var is not defined!") with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", mode='r') as file: my_current_namespace = file.read() if not my_current_namespace: raise RuntimeError( f"error reading my current namespace {str(my_current_namespace)}") v1 = client.BatchV1Api() while True: batch_wrapper_job: V1Job = v1.read_namespaced_job( name=batch_wrapper_job_name, namespace=my_current_namespace) batch_wrapper_job_status: V1JobStatus = batch_wrapper_job.status active_pods = batch_wrapper_job_status.active if batch_wrapper_job_status.active is not None else 0 succeeded_pods = batch_wrapper_job_status.succeeded if batch_wrapper_job_status.succeeded is not None else 0 # model server should also be closed when there is failure in batch wrapper job if hasattr(batch_wrapper_job_status, 'failed') and batch_wrapper_job_status.failed is not None: failed_pods = batch_wrapper_job_status.failed else: failed_pods = 0 if active_pods == 0 and (succeeded_pods > 0 or failed_pods > 0): log.info( f"active_pods == {active_pods}, succeeded_pods == {succeeded_pods}, failed_pods == {failed_pods}, " f"creating END hook") open(END_HOOK_FILEPATH, 'a').close() log.info("exiting...") return sleep(1)
def main(): # Configs can be set in Configuration class directly or using helper # utility. If no argument provided, the config will be loaded from # default location. kubeconf.load_kube_config() # Detect and set the external api url set_kubeapi_url() # Save the file for future reference if requested if SAVE_COPY: if WDIR: f=WDIR+"/config.yaml-used" else: f="config.yaml-used" with open(f, 'w') as of: yaml.safe_dump(config, of, explicit_start=True, default_flow_style = False) # Dump the node stuff as well yaml.safe_dump(get_dedicated_nodes(num_nodes=config['depl']['nodes']), of, explicit_start=False, default_flow_style = False ) if config['private_registry_enabled']: print("Creating image pull secret for %s" % config['private_registry_server']) create_pull_secret( name="installer-pull-secret", namespace="kube-system", server=config['private_registry_server'], username=config['docker_username'], password=config['docker_password'] ) print("Done") # Make sure we have appropriate service account sa = create_service_account("mcm-deploy", "kube-system") rb = create_cluster_role_binding("mcm-deploy", sa, "kube-system") batch_v1 = client.BatchV1Api() # Create a job object with client-python API. job = create_job_object( container_image=config['depl']['installer_image'], image_pull_secret="installer-pull-secret", service_account_name="mcm-deploy" ) create_job(batch_v1, job)
def update_table_job(table_path, tablejobimageid, kubeconfigpath, dbid, namespace, dbtype, tableid, Region, archeplaydatapath): try: config.load_kube_config("/home/app/web/kubeconfig") batch_v1 = client.BatchV1Api() volume2 = client.V1Volume(name="kubeconfig", host_path={"path": kubeconfigpath}) volume3 = client.V1Volume(name="archeplaydata", host_path={"path": archeplaydatapath}) mount2 = client.V1VolumeMount(name="kubeconfig", mount_path="/home/app/web/kubeconfig") mount3 = client.V1VolumeMount( name="archeplaydata", mount_path="/home/app/web/archeplay/data") container = client.V1Container(name="tablejob" + tableid, image=tablejobimageid, volume_mounts=[mount2, mount3], command=[ "python", "-u", "update.py", table_path, dbid, tableid, Region ], env=[{ "name": "archeplaydatapath", "value": archeplaydatapath }], image_pull_policy="Always") # Create and configurate a spec section template = client.V1PodTemplateSpec(metadata=client.V1ObjectMeta( labels={"tablejob": "tablejob" + tableid}), spec=client.V1PodSpec( restart_policy="Never", containers=[container], volumes=[volume2, volume3])) # Create the specification of deployment spec = client.V1JobSpec(template=template, backoff_limit=0) # Instantiate the job object job = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name="tablejob" + tableid), spec=spec) api_response = batch_v1.create_namespaced_job(body=job, namespace=namespace) success_message = tableid + " Deploy Job Intitated" return ("success", success_message, str(api_response.status)) except Exception as Error: error_message = tableid + " Failed to Intitate Deploy Job" return ("error", error_message, str(Error))