def __init__(self, app, nworkers, **kwargs): # Check if pykube was importable, fail if not assert operator is not None, K8S_IMPORT_MESSAGE runner_param_specs = dict( k8s_config_path=dict(map=str, default=os_environ.get('KUBECONFIG', None)), k8s_use_service_account=dict(map=bool, default=False), k8s_persistent_volume_claim_name=dict(map=str), k8s_persistent_volume_claim_mount_path=dict(map=str), k8s_namespace=dict(map=str, default="default"), k8s_pod_retrials=dict(map=int, valid=lambda x: int > 0, default=3)) if 'runner_param_specs' not in kwargs: kwargs['runner_param_specs'] = dict() kwargs['runner_param_specs'].update(runner_param_specs) """Start the job runner parent object """ super(KubernetesJobRunner, self).__init__(app, nworkers, **kwargs) # self.cli_interface = CliInterface() if "k8s_use_service_account" in self.runner_params and self.runner_params[ "k8s_use_service_account"]: self._pykube_api = HTTPClient(KubeConfig.from_service_account()) else: self._pykube_api = HTTPClient( KubeConfig.from_file(self.runner_params["k8s_config_path"])) self._galaxy_vol_name = "pvc-galaxy" # TODO this needs to be read from params!! self._init_monitor_thread() self._init_worker_threads()
def test(): num = 3 base = 27020 k8s = HTTPClient(KubeConfig.from_service_account()) k8s.url = 'http://127.0.0.1:8001' k8s.session = k8s.build_session() def get_mongo_pods(): return [ Pod( None, { 'metadata': { 'labels': { 'hostname': 'fb-1.db.waverbase.com:%d' % p } }, 'status': { 'podIP': '127.0.0.1:%d' % p } }) for p in range(base, base + num) ] for p in range(base, base + num): replica_manager = ReplicaManager( app_name='testapp', creator_name='testcreator', hostname='fb-1.db.waverbase.com:%d' % p, k8s=k8s, local_mongo_server_conn='mongodb://127.0.0.1:%d' % p, external_ip='127.0.0.1:%d' % p) replica_manager.local_pod_ip = '127.0.0.1:%d' % p replica_manager.get_mongo_pods = get_mongo_pods replica_manager.start()
class KubernetesApi(object): def __init__(self,filename): self.config = KubeConfig(filename) self.http_client = HTTPClient(self.config) def get_pods(self): """Returns a list of the pod names""" return self._get_components('pods') def get_replication_controllers(self): """Returns a list of the rc names""" return self._get_components('replicationcontrollers') def get_services(self): """Returns a list of the service names""" return self._get_components('services') def start_pod(self, *args, **kwargs): filename = '{}/templates/pod.json'.format(os.path.dirname(__file__)) with open(filename) as f: t = jinja2.Template(f.read()) json_str = t.render( podname=kwargs.get('podname'), containername=kwargs.get('containername'), containerimage=kwargs.get('containerimage'), imagepullpolicy=kwargs.get('imagepullpolicy'), restartpolicy=kwargs.get('restartpolicy'), command=kwargs.get('command') ) json_obj = json.loads(json_str) rsp = self.http_client.post(url='/pods', json=json_obj) if rsp.status_code != 201: raise KubernetesError def start_replicationcontroller(self, *args, **kwargs): filename = '{}/templates/rc.json'.format(os.path.dirname(__file__)) with open(filename) as f: t = jinja2.Template(f.read()) json_str = t.render( rcname=kwargs.get('rcname'), containername=kwargs.get('containername'), containerimage=kwargs.get('containerimage'), label=kwargs.get('label'), replicas=kwargs.get('replicas') ) json_obj = json.loads(json_str) rsp = self.http_client.post(url='/replicationcontrollers', json=json_obj) if rsp.status_code != 201: raise KubernetesError def _get_components(self, component_type): rsp = self.http_client.get(url='/{}'.format(component_type)) if rsp.status_code != 200: raise KubernetesError json_rsp = json.loads(rsp.text) if 'items' in json_rsp: items = [x['metadata']['name'] for x in json_rsp['items']] return items raise KubernetesError
def _init_kubernetes(self): if self.auth_method == "kubeconfig": self.__kube_api = HTTPClient( KubeConfig.from_file(self.kubeconfig_path)) elif self.auth_method == "service-account": self.__kube_api = HTTPClient(KubeConfig.from_service_account()) else: raise ValueError("Illegal auth_method") self.create_id()
def pykube_client_from_dict(params): if "k8s_use_service_account" in params and params["k8s_use_service_account"]: pykube_client = HTTPClient(KubeConfig.from_service_account()) else: config_path = params.get("k8s_config_path") if config_path is None: config_path = os.environ.get('KUBECONFIG', None) if config_path is None: config_path = '~/.kube/config' pykube_client = HTTPClient(KubeConfig.from_file(config_path)) return pykube_client
def _init_kubernetes(self): self.__logger = logger self.__logger.debug("Kubernetes auth method: " + self.auth_method) if (self.auth_method == "kubeconfig"): self.__kube_api = HTTPClient( KubeConfig.from_file(self.kubeconfig_path)) elif (self.auth_method == "service-account"): self.__kube_api = HTTPClient(KubeConfig.from_service_account()) else: raise ValueError("Illegal auth_method") self.job_uuid = str(uuid.uuid4().hex) self.uu_name = self.name + "-luigi-" + self.job_uuid
def _init_kubernetes(self): self.__logger = logger self.__logger.debug("Kubernetes auth method: " + self.auth_method) if self.auth_method == "kubeconfig": self.__kube_api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path)) elif self.auth_method == "service-account": self.__kube_api = HTTPClient(KubeConfig.from_service_account()) else: raise ValueError("Illegal auth_method") self.job_uuid = str(uuid.uuid4().hex) now = datetime.utcnow() self.uu_name = "%s-%s-%s" % (self.name, now.strftime('%Y%m%d%H%M%S'), self.job_uuid[:16])
def __init__(self, app, nworkers, **kwargs): # Check if pykube was importable, fail if not assert KubeConfig is not None, K8S_IMPORT_MESSAGE runner_param_specs = dict( k8s_config_path=dict(map=str, default=os.environ.get('KUBECONFIG', None)), k8s_use_service_account=dict(map=bool, default=False), k8s_persistent_volume_claims=dict(map=str), k8s_namespace=dict(map=str, default="default"), k8s_galaxy_instance_id=dict(map=str), k8s_timeout_seconds_job_deletion=dict(map=int, valid=lambda x: int > 0, default=30), k8s_job_api_version=dict(map=str, default="batch/v1"), k8s_supplemental_group_id=dict(map=str), k8s_pull_policy=dict(map=str, default="Default"), k8s_run_as_user_id=dict( map=str, valid=lambda s: s == "$uid" or s.isdigit()), k8s_run_as_group_id=dict( map=str, valid=lambda s: s == "$gid" or s.isdigit()), k8s_fs_group_id=dict(map=int), k8s_default_requests_cpu=dict(map=str, default=None), k8s_default_requests_memory=dict(map=str, default=None), k8s_default_limits_cpu=dict(map=str, default=None), k8s_default_limits_memory=dict(map=str, default=None), k8s_pod_retrials=dict(map=int, valid=lambda x: int >= 0, default=3)) if 'runner_param_specs' not in kwargs: kwargs['runner_param_specs'] = dict() kwargs['runner_param_specs'].update(runner_param_specs) """Start the job runner parent object """ super(KubernetesJobRunner, self).__init__(app, nworkers, **kwargs) if "k8s_use_service_account" in self.runner_params and self.runner_params[ "k8s_use_service_account"]: self._pykube_api = HTTPClient(KubeConfig.from_service_account()) else: self._pykube_api = HTTPClient( KubeConfig.from_file(self.runner_params["k8s_config_path"])) self._galaxy_instance_id = self.__get_galaxy_instance_id() self._run_as_user_id = self.__get_run_as_user_id() self._run_as_group_id = self.__get_run_as_group_id() self._supplemental_group = self.__get_supplemental_group() self._fs_group = self.__get_fs_group() self._default_pull_policy = self.__get_pull_policy() self._init_monitor_thread() self._init_worker_threads() self.setup_volumes()
def test_http_do_not_overwrite_auth(monkeypatch): cfg = KubeConfig.from_file(GOOD_CONFIG_FILE_PATH) api = HTTPClient(cfg) mock_send = MagicMock() mock_send.side_effect = Exception('MOCK HTTP') monkeypatch.setattr('pykube.http.KubernetesHTTPAdapter._do_send', mock_send) with pytest.raises(Exception): api.get(url='test', headers={'Authorization': 'Bearer testtoken'}) mock_send.assert_called_once() assert mock_send.call_args[0][0].headers['Authorization'] == 'Bearer testtoken'
def test_http_with_oidc_auth(monkeypatch): cfg = KubeConfig.from_file(CONFIG_WITH_OIDC_AUTH) api = HTTPClient(cfg) mock_send = MagicMock() mock_send.side_effect = Exception('MOCK HTTP') monkeypatch.setattr('pykube.http.KubernetesHTTPAdapter._do_send', mock_send) with pytest.raises(Exception): api.get(url='test') mock_send.assert_called_once() assert mock_send.call_args[0][0].headers['Authorization'] == 'Bearer some-id-token'
def test_http_with_dry_run(monkeypatch): cfg = KubeConfig.from_file(GOOD_CONFIG_FILE_PATH) api = HTTPClient(cfg, dry_run=True) mock_send = MagicMock() mock_send.side_effect = Exception('MOCK HTTP') monkeypatch.setattr('pykube.http.KubernetesHTTPAdapter._do_send', mock_send) with pytest.raises(Exception): api.get(url='test') mock_send.assert_called_once() # check that dry run http parameters were set assert mock_send.call_args[0][0].url == "http://localhost/api/v1/test?dryRun=All"
def test_http_insecure_skip_tls_verify(monkeypatch): cfg = KubeConfig.from_file(CONFIG_WITH_INSECURE_SKIP_TLS_VERIFY) api = HTTPClient(cfg) mock_send = MagicMock() mock_send.side_effect = Exception('MOCK HTTP') monkeypatch.setattr('pykube.http.KubernetesHTTPAdapter._do_send', mock_send) with pytest.raises(Exception): api.get(url='test') mock_send.assert_called_once() # check that SSL is not verified assert not mock_send.call_args[1]['verify']
def test_http_do_not_overwrite_auth(monkeypatch): cfg = KubeConfig.from_file(GOOD_CONFIG_FILE_PATH) api = HTTPClient(cfg) mock_send = mock.MagicMock() mock_send.side_effect = Exception("MOCK HTTP") monkeypatch.setattr("pykube.http.KubernetesHTTPAdapter._do_send", mock_send) with pytest.raises(Exception): api.get(url="test", headers={"Authorization": "Bearer testtoken"}) mock_send.assert_called_once() assert mock_send.call_args[0][0].headers[ "Authorization"] == "Bearer testtoken"
def test_get_kwargs(): cfg = KubeConfig.from_file(GOOD_CONFIG_FILE_PATH) api = HTTPClient(cfg) assert api.get_kwargs(version="v1") == { "timeout": 10, "url": "http://localhost/api/v1/", } assert api.get_kwargs(version="/apis") == { "timeout": 10, "url": "http://localhost/apis/", } assert api.get_kwargs(version="storage.k8s.io/v1") == { "timeout": 10, "url": "http://localhost/apis/storage.k8s.io/v1/", }
def action(self, resource, action_name): api = HTTPClient(KubeConfig.from_file("~/.kube/config")) log.debug('Executing %s %s', action_name, resource.name) # XXX: self._configs is used in _compile_action_file via _make_args. It has to be here self._configs = self.prepare_configs(resource) action_file = self._compile_action_file(resource, action_name) log.debug('action_file: %s', action_file) # XXX: seems hacky obj = yaml.load(open(action_file).read()) k8s_class = obj['kind'] if action_name == 'run': k8s_class = getattr(pykube.objects, k8s_class) k8s_obj = k8s_class(api, obj) k8s_obj.create() self._wait_for(k8s_obj) elif action_name == 'update': k8s_class = getattr(pykube.objects, k8s_class) k8s_obj = k8s_class(api, obj) k8s_obj.reload() # generate new data new_data = self._compile_action_file(resource, 'run') new_obj = yaml.load(open(new_data).read()) _update_obj(k8s_obj.obj, new_obj) # hacky pykube.objects.jsonpatch.make_patch = jsondiff.make k8s_obj.update() self._wait_for(k8s_obj) elif action_name == 'delete': raise NotImplemented(action_name) else: raise NotImplemented(action_name)
def test_http(monkeypatch): cfg = KubeConfig.from_file(GOOD_CONFIG_FILE_PATH) api = HTTPClient(cfg) mock_send = MagicMock() mock_send.side_effect = Exception('MOCK HTTP') monkeypatch.setattr('pykube.http.KubernetesHTTPAdapter._do_send', mock_send) with pytest.raises(Exception): api.get(url='test') mock_send.assert_called_once() assert mock_send.call_args[0][0].headers['Authorization'] == 'Basic YWRtOnNvbWVwYXNzd29yZA==' assert mock_send.call_args[0][0].headers['User-Agent'] == f'pykube-ng/{__version__}' # check that the default HTTP timeout was set assert mock_send.call_args[1]['timeout'] == DEFAULT_HTTP_TIMEOUT
def load_and_check_config(self): if not os.path.exists(self.config_path): print("Config does not exist at path " + self.config_path + "!") return False try: self.config = KubeConfig.from_file(self.config_path) except: print("Config at path " + self.config_path + " failed to validate!") return False # Check current context if self.context_override != None: if self.context_override not in self.config.contexts: print("Context override " + self.context_override + " not in list of contexts.") return False self.config.set_current_context(self.context_override) elif self.config.current_context == None: print("Context not set, not sure which to use.") return False curr_ctx = self.config.contexts[self.config.current_context] self.api = HTTPClient(self.config) if not self.enable_secure: print('[note] we are in insecure mode, disabling warnings') requests.packages.urllib3.disable_warnings() self.api.session.verify = False return True
def __init__(self, task_name=None): self.kube_api = HTTPClient(KubeConfig.from_service_account()) self.kube_api.session.verify = False if task_name: self.task_name = task_name else: self.task_name = None self.namespace = os.environ['NAMESPACE']
def test_http_with_oidc_auth_no_refresh(monkeypatch): cfg = KubeConfig.from_file(CONFIG_WITH_OIDC_AUTH) api = HTTPClient(cfg) mock_send = mock.MagicMock() mock_send.side_effect = Exception("MOCK HTTP") monkeypatch.setattr("pykube.http.KubernetesHTTPAdapter._do_send", mock_send) with mock.patch("pykube.http.KubernetesHTTPAdapter._is_valid_jwt", return_value=True) as mock_jwt: with pytest.raises(Exception): api.get(url="test") mock_jwt.assert_called_once_with("some-id-token") mock_send.assert_called_once() assert mock_send.call_args[0][0].headers[ "Authorization"] == "Bearer some-id-token"
def test_http(monkeypatch): cfg = KubeConfig.from_file(GOOD_CONFIG_FILE_PATH) api = HTTPClient(cfg) mock_send = mock.MagicMock() mock_send.side_effect = Exception("MOCK HTTP") monkeypatch.setattr("pykube.http.KubernetesHTTPAdapter._do_send", mock_send) with pytest.raises(Exception): api.get(url="test") mock_send.assert_called_once() assert (mock_send.call_args[0][0].headers["Authorization"] == "Basic YWRtOnNvbWVwYXNzd29yZA==") assert mock_send.call_args[0][0].headers[ "User-Agent"] == f"pykube-ng/{__version__}" # check that the default HTTP timeout was set assert mock_send.call_args[1]["timeout"] == DEFAULT_HTTP_TIMEOUT
def test_fail_job(self): fail = FailJob() self.assertRaises(RuntimeError, fail.run) # Check for retrials kube_api = HTTPClient( KubeConfig.from_file("~/.kube/config")) # assumes minikube jobs = Job.objects(kube_api).filter(selector="luigi_task_id=" + fail.job_uuid) self.assertEqual(len(jobs.response["items"]), 1) job = Job(kube_api, jobs.response["items"][0]) self.assertTrue("failed" in job.obj["status"]) self.assertTrue(job.obj["status"]["failed"] > fail.max_retrials)
def _get_api_pod(self): """Get the pod resource from the API. :return: Dictionary representation of Pod from k8s API. """ # If kubeconfig was specified, use the pykube library. if self.kubeconfig_path: _log.info("Using kubeconfig at %s", self.kubeconfig_path) try: api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path)) pod = Query(api, Pod, self.namespace).get_by_name(self.pod_name) _log.debug("Found pod: %s: ", pod.obj) except Exception as e: raise PolicyException("Error querying Kubernetes API", details=str(e.message)) else: return pod.obj # Otherwise, use direct HTTP query to get pod. with requests.Session() as session: if self.auth_token: _log.debug('Updating header with Token %s', self.auth_token) session.headers.update( {'Authorization': 'Bearer ' + self.auth_token}) # Generate the API endpoint to query. path = "namespaces/%s/pods/%s" % (self.namespace, self.pod_name) path = os.path.join(self.api_root, path) # Perform the API query and handle the result. try: _log.debug('Querying Kubernetes API for Pod: %s', path) if self.client_certificate and self.client_key: _log.debug( "Using client certificate for Query API. " "cert: %s, key: %s", self.client_certificate, self.client_key) cert = (self.client_certificate, self.client_key) response = session.get(path, cert=cert, verify=self.certificate_authority) else: _log.debug('Using direct connection for query API') response = session.get(path, verify=self.certificate_authority) except BaseException, e: _log.exception("Exception hitting Kubernetes API") raise ApplyProfileError("Error querying Kubernetes API", details=str(e.message)) else:
class Service(object): def __init__(self): self.client = HTTPClient(kubeconfig) def create_service(self, name): ports = list() port_22 = {"name": "ssh", "port": 22} port_80 = {"name": "http", "port": 80} port_443 = {"name": "https", "port": 443} ports.append(port_22) ports.append(port_80) ports.append(port_443) #for portNum in range(7000,7010): # port_custom = {"name": portNum, "port": portNum} # ports.append(port_custom) body = dict() body["kind"] = "Service" body["apiVersion"] = "v1" body["metadata"] = {"name": name} body["spec"] = {"selector": {"run": name}, "type": "NodePort"} body["spec"]["ports"] = ports data = json.dumps(body) service = json.loads( self.client.post(url="namespaces/default/services", data=data).content) return service def view_service(self, name): url = "namespaces/{}/services/{}".format("default", name) service = json.loads(self.client.get(url=url).content) return service def delete_service(self, name): service_path = "namespaces/{}/services/{}".format("default", name) body = {"gracePeriodSeconds": 0} data = json.dumps(body) delete_result = json.loads( self.client.delete(url=service_path, data=data).content) return delete_result
class Pod(object): def __init__(self): self.client = HTTPClient(kubeconfig) def pod_view(self, pod_name): url = "namespaces/default/pods" pods = json.loads(self.client.get(url=url).content)['items'] for pod in pods: if pod['metadata']['labels']['run'] == pod_name: return pod return None def delete_pod(self, name): url = "namespaces/default/pods" pods = json.loads(self.client.get(url=url).content)['items'] for pod in pods: if pod['metadata']['labels']['run'] == name: pod_name = pod['metadata']['name'] del_url = "namespaces/default/pods/{}".format(pod_name) delete_result = self.client.delete(url=del_url).content return delete_result return None
def run(): k8s = HTTPClient(KubeConfig.from_service_account()) mongo_connection_string = os.environ.get('MONGO_CONNECTION_STRING', 'mongodb://127.0.0.1') logging.info('Mongo server %s', mongo_connection_string) replica_manager = ReplicaManager( app_name=os.environ['APP_NAME'], creator_name=os.environ['CREATOR_NAME'], hostname=os.environ['MONGO_HOSTNAME'], k8s=k8s, local_mongo_server_conn=mongo_connection_string, external_ip=os.environ['EXTERNAL_IP']) replica_manager.start()
def test(): num = 3 base = 27020 k8s = HTTPClient(KubeConfig.from_service_account()) k8s.url = 'http://127.0.0.1:8001' k8s.session = k8s.build_session() def get_mongo_pods(): return [ Pod(None, { 'metadata': { 'labels': { 'hostname': 'fb-1.db.waverbase.com:%d' % p } }, 'status': { 'podIP': '127.0.0.1:%d' % p } } ) for p in range(base, base+num) ] for p in range(base, base+num): replica_manager = ReplicaManager( app_name='testapp', creator_name='testcreator', hostname='fb-1.db.waverbase.com:%d' % p, k8s=k8s, local_mongo_server_conn = 'mongodb://127.0.0.1:%d' % p, external_ip='127.0.0.1:%d' % p ) replica_manager.local_pod_ip = '127.0.0.1:%d' % p replica_manager.get_mongo_pods = get_mongo_pods replica_manager.start()
def run_all(self): api = HTTPClient(KubeConfig.from_file('~/.kube/config')) datas = [] for i, (resource, path, to) in enumerate(self.paths): datas.append(self.make_confimap_data(resource, path, i)) self.data_sufix = random_string(52) self.configmap_name = 'configmap' + self.data_sufix self.configmap_namespace = 'default' self.configmap_datas = datas obj = self.make_configmap_obj(datas) self.configmap_obj = pykube.objects.ConfigMap(api, obj) self.configmap_obj.create() log.debug("Created ConfigMap: %s", self.configmap_obj.name) return
def _get_kubernetes_pod_cidr(self): """ Attempt to get the Kubernetes pod CIDR for this node. First check if we've written it to disk. If so, use that value. If not, then query the Kubernetes API for it. """ _log.info("Getting node.spec.podCidr from API, kubeconfig: %s", self.kubeconfig_path) if not self.kubeconfig_path: # For now, kubeconfig is the only supported auth method. print_cni_error( ERR_CODE_GENERIC, "Missing kubeconfig", "usePodCidr requires specification of kubeconfig file") sys.exit(ERR_CODE_GENERIC) # Query the API for this node. Default node name to the hostname. try: api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path)) node = None for n in Node.objects(api): _log.debug("Checking node: %s", n.obj["metadata"]["name"]) if n.obj["metadata"]["name"] == self.k8s_node_name: node = n break if not node: raise KeyError("Unable to find node in API: %s", self.k8s_node_name) _log.debug("Found node %s: %s: ", node.obj["metadata"]["name"], node.obj["spec"]) except Exception: print_cni_error(ERR_CODE_GENERIC, "Error querying Kubernetes API", "Failed to get podCidr from Kubernetes API") sys.exit(ERR_CODE_GENERIC) else: pod_cidr = node.obj["spec"].get("podCIDR") if not pod_cidr: print_cni_error(ERR_CODE_GENERIC, "Missing podCidr", "No podCidr for node %s" % self.k8s_node_name) sys.exit(ERR_CODE_GENERIC) _log.debug("Using podCidr: %s", pod_cidr) return pod_cidr
class KubernetesJobTask(luigi.Task): __DEFAULT_POLL_INTERVAL = 5 # see __track_job __DEFAULT_POD_CREATION_INTERVAL = 5 _kubernetes_config = None # Needs to be loaded at runtime def _init_kubernetes(self): self.__logger = logger self.__logger.debug("Kubernetes auth method: " + self.auth_method) if self.auth_method == "kubeconfig": self.__kube_api = HTTPClient( KubeConfig.from_file(self.kubeconfig_path)) elif self.auth_method == "service-account": self.__kube_api = HTTPClient(KubeConfig.from_service_account()) else: raise ValueError("Illegal auth_method") self.job_uuid = str(uuid.uuid4().hex) now = datetime.utcnow() self.uu_name = "%s-%s-%s" % (self.name, now.strftime('%Y%m%d%H%M%S'), self.job_uuid[:16]) @property def auth_method(self): """ This can be set to ``kubeconfig`` or ``service-account``. It defaults to ``kubeconfig``. For more details, please refer to: - kubeconfig: http://kubernetes.io/docs/user-guide/kubeconfig-file - service-account: http://kubernetes.io/docs/user-guide/service-accounts """ return self.kubernetes_config.auth_method @property def kubeconfig_path(self): """ Path to kubeconfig file used for cluster authentication. It defaults to "~/.kube/config", which is the default location when using minikube (http://kubernetes.io/docs/getting-started-guides/minikube). When auth_method is ``service-account`` this property is ignored. **WARNING**: For Python versions < 3.5 kubeconfig must point to a Kubernetes API hostname, and NOT to an IP address. For more details, please refer to: http://kubernetes.io/docs/user-guide/kubeconfig-file """ return self.kubernetes_config.kubeconfig_path @property def kubernetes_namespace(self): """ Namespace in Kubernetes where the job will run. It defaults to the default namespace in Kubernetes For more details, please refer to: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ """ return self.kubernetes_config.kubernetes_namespace @property def name(self): """ A name for this job. This task will automatically append a UUID to the name before to submit to Kubernetes. """ raise NotImplementedError("subclass must define name") @property def labels(self): """ Return custom labels for kubernetes job. example:: ``{"run_dt": datetime.date.today().strftime('%F')}`` """ return {} @property def spec_schema(self): """ Kubernetes Job spec schema in JSON format, an example follows. .. code-block:: javascript { "containers": [{ "name": "pi", "image": "perl", "command": ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] }], "restartPolicy": "Never" } **restartPolicy** - If restartPolicy is not defined, it will be set to "Never" by default. - **Warning**: restartPolicy=OnFailure will bypass max_retrials, and restart the container until success, with the risk of blocking the Luigi task. For more informations please refer to: http://kubernetes.io/docs/user-guide/pods/multi-container/#the-spec-schema """ raise NotImplementedError("subclass must define spec_schema") @property def max_retrials(self): """ Maximum number of retrials in case of failure. """ return self.kubernetes_config.max_retrials @property def backoff_limit(self): """ Maximum number of retries before considering the job as failed. See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#pod-backoff-failure-policy """ return 6 @property def delete_on_success(self): """ Delete the Kubernetes workload if the job has ended successfully. """ return True @property def print_pod_logs_on_exit(self): """ Fetch and print the pod logs once the job is completed. """ return False @property def active_deadline_seconds(self): """ Time allowed to successfully schedule pods. See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#job-termination-and-cleanup """ return None @property def kubernetes_config(self): if not self._kubernetes_config: self._kubernetes_config = kubernetes() return self._kubernetes_config @property def poll_interval(self): """How often to poll Kubernetes for job status, in seconds.""" return self.__DEFAULT_POLL_INTERVAL @property def pod_creation_wait_interal(self): """Delay for initial pod creation for just submitted job in seconds""" return self.__DEFAULT_POD_CREATION_INTERVAL def __track_job(self): """Poll job status while active""" while not self.__verify_job_has_started(): time.sleep(self.poll_interval) self.__logger.debug("Waiting for Kubernetes job " + self.uu_name + " to start") self.__print_kubectl_hints() status = self.__get_job_status() while status == "RUNNING": self.__logger.debug("Kubernetes job " + self.uu_name + " is running") time.sleep(self.poll_interval) status = self.__get_job_status() assert status != "FAILED", "Kubernetes job " + self.uu_name + " failed" # status == "SUCCEEDED" self.__logger.info("Kubernetes job " + self.uu_name + " succeeded") self.signal_complete() def signal_complete(self): """Signal job completion for scheduler and dependent tasks. Touching a system file is an easy way to signal completion. example:: .. code-block:: python with self.output().open('w') as output_file: output_file.write('') """ pass def __get_pods(self): pod_objs = Pod.objects(self.__kube_api, namespace=self.kubernetes_namespace) \ .filter(selector="job-name=" + self.uu_name) \ .response['items'] return [Pod(self.__kube_api, p) for p in pod_objs] def __get_job(self): jobs = Job.objects(self.__kube_api, namespace=self.kubernetes_namespace) \ .filter(selector="luigi_task_id=" + self.job_uuid) \ .response['items'] assert len(jobs) == 1, "Kubernetes job " + self.uu_name + " not found" return Job(self.__kube_api, jobs[0]) def __print_pod_logs(self): for pod in self.__get_pods(): logs = pod.logs(timestamps=True).strip() self.__logger.info("Fetching logs from " + pod.name) if len(logs) > 0: for line in logs.split('\n'): self.__logger.info(line) def __print_kubectl_hints(self): self.__logger.info("To stream Pod logs, use:") for pod in self.__get_pods(): self.__logger.info("`kubectl logs -f pod/%s -n %s`" % (pod.name, pod.namespace)) def __verify_job_has_started(self): """Asserts that the job has successfully started""" # Verify that the job started self.__get_job() # Verify that the pod started pods = self.__get_pods() if not pods: self.__logger.debug( 'No pods found for %s, waiting for cluster state to match the job definition' % self.uu_name) time.sleep(self.pod_creation_wait_interal) pods = self.__get_pods() assert len(pods) > 0, "No pod scheduled by " + self.uu_name for pod in pods: status = pod.obj['status'] for cont_stats in status.get('containerStatuses', []): if 'terminated' in cont_stats['state']: t = cont_stats['state']['terminated'] err_msg = "Pod %s %s (exit code %d). Logs: `kubectl logs pod/%s`" % ( pod.name, t['reason'], t['exitCode'], pod.name) assert t['exitCode'] == 0, err_msg if 'waiting' in cont_stats['state']: wr = cont_stats['state']['waiting']['reason'] assert wr == 'ContainerCreating', "Pod %s %s. Logs: `kubectl logs pod/%s`" % ( pod.name, wr, pod.name) for cond in status.get('conditions', []): if 'message' in cond: if cond['reason'] == 'ContainersNotReady': return False assert cond['status'] != 'False', \ "[ERROR] %s - %s" % (cond['reason'], cond['message']) return True def __get_job_status(self): """Return the Kubernetes job status""" # Figure out status and return it job = self.__get_job() if "succeeded" in job.obj[ "status"] and job.obj["status"]["succeeded"] > 0: job.scale(replicas=0) if self.print_pod_logs_on_exit: self.__print_pod_logs() if self.delete_on_success: self.__delete_job_cascade(job) return "SUCCEEDED" if "failed" in job.obj["status"]: failed_cnt = job.obj["status"]["failed"] self.__logger.debug("Kubernetes job " + self.uu_name + " status.failed: " + str(failed_cnt)) if self.print_pod_logs_on_exit: self.__print_pod_logs() if failed_cnt > self.max_retrials: job.scale(replicas=0) # avoid more retrials return "FAILED" return "RUNNING" def __delete_job_cascade(self, job): delete_options_cascade = { "kind": "DeleteOptions", "apiVersion": "v1", "propagationPolicy": "Background" } r = self.__kube_api.delete(json=delete_options_cascade, **job.api_kwargs()) if r.status_code != 200: self.__kube_api.raise_for_status(r) def run(self): self._init_kubernetes() # Render job job_json = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "name": self.uu_name, "labels": { "spawned_by": "luigi", "luigi_task_id": self.job_uuid } }, "spec": { "backoffLimit": self.backoff_limit, "template": { "metadata": { "name": self.uu_name, "labels": {} }, "spec": self.spec_schema } } } if self.kubernetes_namespace is not None: job_json['metadata']['namespace'] = self.kubernetes_namespace if self.active_deadline_seconds is not None: job_json['spec']['activeDeadlineSeconds'] = \ self.active_deadline_seconds # Update user labels job_json['metadata']['labels'].update(self.labels) job_json['spec']['template']['metadata']['labels'].update(self.labels) # Add default restartPolicy if not specified if "restartPolicy" not in self.spec_schema: job_json["spec"]["template"]["spec"]["restartPolicy"] = "Never" # Submit job self.__logger.info("Submitting Kubernetes Job: " + self.uu_name) job = Job(self.__kube_api, job_json) job.create() # Track the Job (wait while active) self.__logger.info("Start tracking Kubernetes Job: " + self.uu_name) self.__track_job() def output(self): """ An output target is necessary for checking job completion unless an alternative complete method is defined. Example:: return luigi.LocalTarget(os.path.join('/tmp', 'example')) """ pass
"service_name": service_name, "is_default_endpoint": is_default_endpoint, "edge_num": i, "edge_location": edge_location, "edge_target": edge_target, "run_id": pod_run_id , "additional" : additional} else: print('Unable to get details of the tool {} from API due to errors. Empty endpoints will be returned'.format(docker_image)) else: print('Unable to get details of the tool {} from API due to errors. Empty endpoints will be returned'.format(docker_image)) else: print('Unable to get details of a RunID {} from API due to errors'.format(pod_run_id)) return service_list kube_api = HTTPClient(KubeConfig.from_service_account()) kube_api.session.verify = False edge_kube_service = Service.objects(kube_api).filter(selector={EDGE_SVC_ROLE_LABEL: EDGE_SVC_ROLE_LABEL_VALUE}) if len(edge_kube_service.response['items']) == 0: print('EDGE service is not found by label: cloud-pipeline/role=EDGE') exit(1) else: edge_kube_service_object = edge_kube_service.response['items'][0] edge_kube_service_object_metadata = edge_kube_service_object['metadata'] if 'labels' in edge_kube_service_object_metadata and EDGE_SVC_HOST_LABEL in edge_kube_service_object_metadata['labels']: edge_service_external_ip = edge_kube_service_object_metadata['labels'][EDGE_SVC_HOST_LABEL] if 'labels' in edge_kube_service_object_metadata and EDGE_SVC_PORT_LABEL in edge_kube_service_object_metadata['labels']: edge_service_port = edge_kube_service_object_metadata['labels'][EDGE_SVC_PORT_LABEL]
def __init__(self): self.__kube_api = HTTPClient(KubeConfig.from_service_account()) self.__kube_api.session.verify = False
class KubernetesJobTask(luigi.Task): __POLL_TIME = 5 # see __track_job _kubernetes_config = None # Needs to be loaded at runtime def _init_kubernetes(self): self.__logger = logger self.__logger.debug("Kubernetes auth method: " + self.auth_method) if self.auth_method == "kubeconfig": self.__kube_api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path)) elif self.auth_method == "service-account": self.__kube_api = HTTPClient(KubeConfig.from_service_account()) else: raise ValueError("Illegal auth_method") self.job_uuid = str(uuid.uuid4().hex) now = datetime.utcnow() self.uu_name = "%s-%s-%s" % (self.name, now.strftime('%Y%m%d%H%M%S'), self.job_uuid[:16]) @property def auth_method(self): """ This can be set to ``kubeconfig`` or ``service-account``. It defaults to ``kubeconfig``. For more details, please refer to: - kubeconfig: http://kubernetes.io/docs/user-guide/kubeconfig-file - service-account: http://kubernetes.io/docs/user-guide/service-accounts """ return self.kubernetes_config.auth_method @property def kubeconfig_path(self): """ Path to kubeconfig file used for cluster authentication. It defaults to "~/.kube/config", which is the default location when using minikube (http://kubernetes.io/docs/getting-started-guides/minikube). When auth_method is ``service-account`` this property is ignored. **WARNING**: For Python versions < 3.5 kubeconfig must point to a Kubernetes API hostname, and NOT to an IP address. For more details, please refer to: http://kubernetes.io/docs/user-guide/kubeconfig-file """ return self.kubernetes_config.kubeconfig_path @property def name(self): """ A name for this job. This task will automatically append a UUID to the name before to submit to Kubernetes. """ raise NotImplementedError("subclass must define name") @property def labels(self): """ Return custom labels for kubernetes job. example:: ``{"run_dt": datetime.date.today().strftime('%F')}`` """ return {} @property def spec_schema(self): """ Kubernetes Job spec schema in JSON format, an example follows. .. code-block:: javascript { "containers": [{ "name": "pi", "image": "perl", "command": ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] }], "restartPolicy": "Never" } **restartPolicy** - If restartPolicy is not defined, it will be set to "Never" by default. - **Warning**: restartPolicy=OnFailure will bypass max_retrials, and restart the container until success, with the risk of blocking the Luigi task. For more informations please refer to: http://kubernetes.io/docs/user-guide/pods/multi-container/#the-spec-schema """ raise NotImplementedError("subclass must define spec_schema") @property def max_retrials(self): """ Maximum number of retrials in case of failure. """ return self.kubernetes_config.max_retrials @property def delete_on_success(self): """ Delete the Kubernetes workload if the job has ended successfully. """ return True @property def print_pod_logs_on_exit(self): """ Fetch and print the pod logs once the job is completed. """ return False @property def active_deadline_seconds(self): """ Time allowed to successfully schedule pods. See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#job-termination-and-cleanup """ return 100 @property def kubernetes_config(self): if not self._kubernetes_config: self._kubernetes_config = kubernetes() return self._kubernetes_config def __track_job(self): """Poll job status while active""" while not self.__verify_job_has_started(): time.sleep(self.__POLL_TIME) self.__logger.debug("Waiting for Kubernetes job " + self.uu_name + " to start") self.__print_kubectl_hints() status = self.__get_job_status() while status == "RUNNING": self.__logger.debug("Kubernetes job " + self.uu_name + " is running") time.sleep(self.__POLL_TIME) status = self.__get_job_status() assert status != "FAILED", "Kubernetes job " + self.uu_name + " failed" # status == "SUCCEEDED" self.__logger.info("Kubernetes job " + self.uu_name + " succeeded") self.signal_complete() def signal_complete(self): """Signal job completion for scheduler and dependent tasks. Touching a system file is an easy way to signal completion. example:: .. code-block:: python with self.output().open('w') as output_file: output_file.write('') """ pass def __get_pods(self): pod_objs = Pod.objects(self.__kube_api) \ .filter(selector="job-name=" + self.uu_name) \ .response['items'] return [Pod(self.__kube_api, p) for p in pod_objs] def __get_job(self): jobs = Job.objects(self.__kube_api) \ .filter(selector="luigi_task_id=" + self.job_uuid) \ .response['items'] assert len(jobs) == 1, "Kubernetes job " + self.uu_name + " not found" return Job(self.__kube_api, jobs[0]) def __print_pod_logs(self): for pod in self.__get_pods(): logs = pod.logs(timestamps=True).strip() self.__logger.info("Fetching logs from " + pod.name) if len(logs) > 0: for l in logs.split('\n'): self.__logger.info(l) def __print_kubectl_hints(self): self.__logger.info("To stream Pod logs, use:") for pod in self.__get_pods(): self.__logger.info("`kubectl logs -f pod/%s`" % pod.name) def __verify_job_has_started(self): """Asserts that the job has successfully started""" # Verify that the job started self.__get_job() # Verify that the pod started pods = self.__get_pods() assert len(pods) > 0, "No pod scheduled by " + self.uu_name for pod in pods: status = pod.obj['status'] for cont_stats in status.get('containerStatuses', []): if 'terminated' in cont_stats['state']: t = cont_stats['state']['terminated'] err_msg = "Pod %s %s (exit code %d). Logs: `kubectl logs pod/%s`" % ( pod.name, t['reason'], t['exitCode'], pod.name) assert t['exitCode'] == 0, err_msg if 'waiting' in cont_stats['state']: wr = cont_stats['state']['waiting']['reason'] assert wr == 'ContainerCreating', "Pod %s %s. Logs: `kubectl logs pod/%s`" % ( pod.name, wr, pod.name) for cond in status['conditions']: if 'message' in cond: if cond['reason'] == 'ContainersNotReady': return False assert cond['status'] != 'False', \ "[ERROR] %s - %s" % (cond['reason'], cond['message']) return True def __get_job_status(self): """Return the Kubernetes job status""" # Figure out status and return it job = self.__get_job() if "succeeded" in job.obj["status"] and job.obj["status"]["succeeded"] > 0: job.scale(replicas=0) if self.print_pod_logs_on_exit: self.__print_pod_logs() if self.delete_on_success: self.__delete_job_cascade(job) return "SUCCEEDED" if "failed" in job.obj["status"]: failed_cnt = job.obj["status"]["failed"] self.__logger.debug("Kubernetes job " + self.uu_name + " status.failed: " + str(failed_cnt)) if self.print_pod_logs_on_exit: self.__print_pod_logs() if failed_cnt > self.max_retrials: job.scale(replicas=0) # avoid more retrials return "FAILED" return "RUNNING" def __delete_job_cascade(self, job): delete_options_cascade = { "kind": "DeleteOptions", "apiVersion": "v1", "propagationPolicy": "Background" } r = self.__kube_api.delete(json=delete_options_cascade, **job.api_kwargs()) if r.status_code != 200: self.__kube_api.raise_for_status(r) def run(self): self._init_kubernetes() # Render job job_json = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "name": self.uu_name, "labels": { "spawned_by": "luigi", "luigi_task_id": self.job_uuid } }, "spec": { "activeDeadlineSeconds": self.active_deadline_seconds, "template": { "metadata": { "name": self.uu_name }, "spec": self.spec_schema } } } # Update user labels job_json['metadata']['labels'].update(self.labels) # Add default restartPolicy if not specified if "restartPolicy" not in self.spec_schema: job_json["spec"]["template"]["spec"]["restartPolicy"] = "Never" # Submit job self.__logger.info("Submitting Kubernetes Job: " + self.uu_name) job = Job(self.__kube_api, job_json) job.create() # Track the Job (wait while active) self.__logger.info("Start tracking Kubernetes Job: " + self.uu_name) self.__track_job() def output(self): """ An output target is necessary for checking job completion unless an alternative complete method is defined. Example:: return luigi.LocalTarget(os.path.join('/tmp', 'example')) """ pass