def execute(self, context):
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file)
            gen = pod_generator.PodGenerator()

            for port in self.ports:
                gen.add_port(port)
            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.image_pull_secrets = self.image_pull_secrets
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations
            pod.configmaps = self.configmaps
            pod.security_context = self.security_context
            pod.pod_runtime_info_envs = self.pod_runtime_info_envs
            pod.dnspolicy = self.dnspolicy

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.xcom_push)
            try:
                (final_state, result) = launcher.run_pod(
                    pod,
                    startup_timeout=self.startup_timeout_seconds,
                    get_logs=self.get_logs)
            finally:
                if self.is_delete_operator_pod:
                    launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            if self.xcom_push:
                return result
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
    def execute(self, context):
        try:
            client = kube_client.get_kube_client(in_cluster=self.in_cluster)
            gen = pod_generator.PodGenerator()

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels
            )

            pod.secrets = self.secrets
            pod.envs = self.env_vars

            launcher = pod_launcher.PodLauncher(client)
            final_state = launcher.run_pod(
                pod,
                startup_timeout=self.startup_timeout_seconds,
                get_logs=self.get_logs)
            if final_state != State.SUCCESS:
                raise AirflowException('Pod returned a failure')
        except AirflowException as ex:
            raise AirflowException('Pod Launching failed: {error}'.format(error=ex))
    def infrastructure(self):

        kube_client = get_kube_client()
        namespace = configuration.get("kubernetes", "namespace")

        headers = {
            "Authorization":
            kube_client.api_client.configuration.get_api_key_with_prefix(
                'authorization')
        }
        url = "{0}/oapi/v1/namespaces/{1}/deploymentconfigs".format(
            kube_client.api_client.configuration.host, namespace)

        response = requests.get(
            url,
            headers=headers,
            params={"labelSelector": self.AIRFLOW_LABEL},
            verify=kube_client.api_client.configuration.ssl_ca_cert)

        if response.status_code == 200:
            deployment_configs = response.json()
        else:
            return abort(response.status_code)

        try:
            pods = kube_client.list_namespaced_pod(
                namespace=namespace, label_selector=self.AIRFLOW_LABEL)
            pods = pods.to_dict()
        except ApiException:
            return abort(401)

        return jsonify(
            self.to_graph(namespace=namespace,
                          oc_deployment_configs=deployment_configs,
                          oc_pods=pods))
    def pod(self, pod):
        task_instances = self.get_task_instances(pod=pod)
        if not task_instances:
            abort(404)

        kube_client = get_kube_client()
        namespace = configuration.get("kubernetes", "namespace")
        client = elasticsearch.Elasticsearch([ELASTICSEARCH_HOST])

        count = Search(using=client) \
            .query('match', **{"beat.hostname": pod}) \
            .sort('offset') \
            .count()
        task_instance = task_instances
        try:
            pod = kube_client.read_namespaced_pod(name=pod,
                                                  namespace=namespace)
            pod = self.pod_info(oc_pod=pod.to_dict(),
                                task_instance=task_instance,
                                namespace=namespace,
                                host=self.HOST)
        except ApiException:
            pod = self.pod_info(
                oc_pod=self.get_pod_from_log_info(task_instance=task_instance),
                task_instance=task_instance,
                namespace=namespace,
                host=self.HOST)
        pod['log'] = {"count": count}

        return jsonify(pod)
    def execute(self, context):
        try:

            client = kube_client.get_kube_client(in_cluster=self.in_cluster)
            gen = pod_generator.PodGenerator()

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels
            )

            pod.secrets = self.secrets

            launcher = pod_launcher.PodLauncher(client)
            final_state = launcher.run_pod(
                pod,
                startup_timeout=self.startup_timeout_seconds,
                get_logs=self.get_logs)
            if final_state != State.SUCCESS:
                raise AirflowException('Pod returned a failure')
        except AirflowException as ex:
            raise AirflowException('Pod Launching failed: {error}'.format(error=ex))
Example #6
0
    def on_kill(self):

        self.log.debug("Kill Command is being called")

        if self._should_track_driver_status:
            if self._driver_id:
                self.log.info("Killing driver {} on cluster".format(
                    self._driver_id))

                kill_cmd = self._build_spark_driver_kill_command()
                driver_kill = subprocess.Popen(kill_cmd,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)

                self.log.info(
                    "Spark driver {} killed with return code: {}".format(
                        self._driver_id, driver_kill.wait()))

        if self._submit_sp and self._submit_sp.poll() is None:
            self.log.info("Sending kill signal to %s",
                          self._connection["spark_binary"])
            self._submit_sp.kill()

            if self._yarn_application_id:
                self.log.info("Killing application {} on YARN".format(
                    self._yarn_application_id))

                kill_cmd = "yarn application -kill {}".format(
                    self._yarn_application_id).split()
                yarn_kill = subprocess.Popen(kill_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)

                self.log.info("YARN killed with return code: %s",
                              yarn_kill.wait())

            if self._kubernetes_driver_pod:
                self.log.info("Killing pod %s on Kubernetes",
                              self._kubernetes_driver_pod)

                # Currently only instantiate Kubernetes client for killing a spark pod.
                try:
                    import kubernetes

                    client = kube_client.get_kube_client()
                    api_response = client.delete_namespaced_pod(
                        self._kubernetes_driver_pod,
                        self._connection["namespace"],
                        body=kubernetes.client.V1DeleteOptions(),
                        pretty=True,
                    )

                    self.log.info("Spark on K8s killed with response: %s",
                                  api_response)

                except kube_client.ApiException as e:
                    self.log.info(
                        "Exception when attempting to kill Spark on K8s:")
                    self.log.exception(e)
Example #7
0
    def execute(self, context):
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file)
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            if self.in_cluster:
                worker_pod_name = os.environ.get('WORKER_POD_NAME')
                worker_pod_uid = os.environ.get('WORKER_POD_UID')
                gen.add_ownerreference(worker_pod_name, "v1", "Pod",
                                       worker_pod_uid)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.image_pull_secrets = self.image_pull_secrets
            pod.hostnetwork = self.hostnetwork

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.xcom_push)
            (final_state, result) = launcher.run_pod(
                pod,
                startup_timeout=self.startup_timeout_seconds,
                get_logs=self.get_logs)

            # if self.is_delete_operator_pod:
            #     launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            if self.xcom_push:
                return result
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
    def cluster_view(self):
        kube_client = get_kube_client()
        namespace = configuration.get("kubernetes", "namespace")

        pods = kube_client.list_namespaced_pod(
            namespace=namespace, label_selector=self.AIRFLOW_LABEL)
        return self.render("openshift_cluster_view.html",
                           namespace=namespace,
                           pods=pods)
Example #9
0
    def log_pod_creation(self, pod: Pod, resp, session=None):
        from openshift_plugin.executor.airflow_openshift_scheduler import AirflowOpenShiftScheduler

        execution_date = AirflowOpenShiftScheduler.label_safe_datestring_to_datetime(pod.labels['execution_date'])

        task_instance = session.query(TaskInstance) \
            .filter(TaskInstance.dag_id == pod.labels['dag_id']) \
            .filter(TaskInstance.task_id == pod.labels['task_id']) \
            .filter(TaskInstance.execution_date == execution_date).first()

        if not task_instance:
            self.log.error(
                "Could not find task instance based on the pod labels"
                " ({dag_id} {task_id} {execution_date} {try_number})".format(
                    **pod.labels))
            self.log.error("Log information will be incomplete. This is a BUG please report!!!")

        def default(o):
            if isinstance(o, (datetime.date, datetime.datetime)):
                return o.isoformat()

        kube_client = get_kube_client()

        headers = {"Authorization": kube_client.api_client.configuration.get_api_key_with_prefix('authorization')}
        url = "{0}/apis/image.openshift.io/v1/namespaces/{1}/imagestreamtags/{2}".format(
            kube_client.api_client.configuration.host,
            pod.image.split("/")[-2], quote_plus(pod.image.split("/")[-1]))

        response = requests.get(url,
                                headers=headers,
                                verify=kube_client.api_client.configuration.ssl_ca_cert)

        resp = resp.to_dict()

        if response.status_code == 200:
            image_reference = response.json()
            resp['spec']['containers'][0]['image'] = image_reference["tag"]["from"]["name"]
        else:
            image_reference = None

        log = Log(
            event=OpenShiftPodLauncer.EVENT_POD_CREATION,
            dag_id=task_instance.dag_id,
            task_instance=None,
            task_id=task_instance.task_id,
            execution_date=task_instance.execution_date,
            extra=json.dumps(
                {
                    "request": self.kube_req_factory.create(pod),
                    "response": resp,
                    "image": image_reference
                }, default=default)
        )
        session.add(log)
        session.commit()
        pass
    def execute(self, context):
        try:
            client = kube_client.get_kube_client(in_cluster=self.in_cluster,
                                                 cluster_context=self.cluster_context,
                                                 config_file=self.config_file)
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.image_pull_secrets = self.image_pull_secrets
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations
            pod.configmaps = self.configmaps
            pod.security_context = self.security_context

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.do_xcom_push)
            try:
                (final_state, result) = launcher.run_pod(
                    pod,
                    startup_timeout=self.startup_timeout_seconds,
                    get_logs=self.get_logs)
            finally:
                if self.is_delete_operator_pod:
                    launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(state=final_state)
                )

            return result
        except AirflowException as ex:
            raise AirflowException('Pod Launching failed: {error}'.format(error=ex))
 def run(self):
     kube_client = get_kube_client()
     while True:
         try:
             self.resource_version = self._run(kube_client, self.resource_version,
                                               self.worker_uuid)
         except Exception:
             self.log.exception('Unknown error in KubernetesJobWatcher. Failing')
             raise
         else:
             self.log.warn('Watch died gracefully, starting back up with: '
                           'last resource_version: %s', self.resource_version)
Example #12
0
 def run(self):
     kube_client = get_kube_client()
     while True:
         try:
             self.resource_version = self._run(kube_client, self.resource_version,
                                               self.worker_uuid)
         except Exception:
             self.log.exception('Unknown error in KubernetesJobWatcher. Failing')
             raise
         else:
             self.log.warn('Watch died gracefully, starting back up with: '
                           'last resource_version: %s', self.resource_version)
    def get_image_url(self, host, orig_namespace, image_reference: str):
        if "kibana" in image_reference:
            return "https://www.docker.elastic.co"

        if "postgresql" in image_reference:
            return None

        kube_client = get_kube_client()

        image_reference_split = image_reference.split("/")

        image_hash = image_reference_split[-1]
        if len(image_reference_split) >= 2:
            namespace = image_reference_split[-2]
        else:
            namespace = orig_namespace

        if "@" in image_hash:
            image_stream, image_sha256 = image_hash.split("@")
            headers = {
                "Authorization":
                kube_client.api_client.configuration.get_api_key_with_prefix(
                    'authorization')
            }
            url = "{0}/oapi/v1/namespaces/{1}/imagestreams/{2}".format(
                kube_client.api_client.configuration.host, namespace,
                image_stream)

            response = requests.get(
                url,
                headers=headers,
                params={"labelSelector": self.AIRFLOW_LABEL},
                verify=kube_client.api_client.configuration.ssl_ca_cert)

            if response.status_code == 200:
                image = response.json()
            else:
                return None

            image_tag_name = None
            for image_tag in image['spec']['tags']:
                if image_tag['from']['name'] == image_hash:
                    image_tag_name = image_tag['name']
                    break
        else:
            image_stream, image_tag_name = image_hash.split(":")

        if image_tag_name:
            return "{0}/console/project/{1}/browse/images/{2}/{3}?tab=body".format(
                host, namespace, image_stream, image_tag_name)
        else:
            return None
    def on_kill(self):

        self.log.debug("Kill Command is being called")

        if self._should_track_driver_status:
            if self._driver_id:
                self.log.info('Killing driver {} on cluster'
                              .format(self._driver_id))

                kill_cmd = self._build_spark_driver_kill_command()
                driver_kill = subprocess.Popen(kill_cmd,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)

                self.log.info("Spark driver {} killed with return code: {}"
                              .format(self._driver_id, driver_kill.wait()))

        if self._submit_sp and self._submit_sp.poll() is None:
            self.log.info('Sending kill signal to %s', self._connection['spark_binary'])
            self._submit_sp.kill()

            if self._yarn_application_id:
                self.log.info('Killing application {} on YARN'
                              .format(self._yarn_application_id))

                kill_cmd = "yarn application -kill {}" \
                    .format(self._yarn_application_id).split()
                yarn_kill = subprocess.Popen(kill_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)

                self.log.info("YARN killed with return code: %s", yarn_kill.wait())

            if self._kubernetes_driver_pod:
                self.log.info('Killing pod %s on Kubernetes', self._kubernetes_driver_pod)

                # Currently only instantiate Kubernetes client for killing a spark pod.
                try:
                    import kubernetes
                    client = kube_client.get_kube_client()
                    api_response = client.delete_namespaced_pod(
                        self._kubernetes_driver_pod,
                        self._connection['namespace'],
                        body=kubernetes.client.V1DeleteOptions(),
                        pretty=True)

                    self.log.info("Spark on K8s killed with response: %s", api_response)

                except kube_client.ApiException as e:
                    self.log.info("Exception when attempting to kill Spark on K8s:")
                    self.log.exception(e)
Example #15
0
    def get_image_dag_info(self):
        client = self.kube_client or get_kube_client()
        launcher = PodLauncher(kube_client=client)
        pod = self.create_sync_pod()
        status, result = launcher.run_pod(pod, get_logs=False)

        logs = client.read_namespaced_pod_log(
            name=pod.name,
            namespace=pod.namespace,
            container='base',
            follow=True,
            _preload_content=False)

        launcher.delete_pod(pod)

        return status, logs.data, pod
    def extract_env_and_secrets(pod, req):
        KubernetesRequestFactory.extract_env_and_secrets(pod, req)

        env = req['spec']['containers'][0]['env'].copy()
        env = [i for i in env if not i["name"].startswith("AIRFLOW")]
        kube_client = get_kube_client()

        if configuration.conf.getboolean("kubernetes", "in_cluster"):
            pod_config = kube_client.read_namespaced_pod(
                name=os.getenv("HOSTNAME"),
                namespace=configuration.conf.get("kubernetes", "namespace"))
        else:
            pods = kube_client.list_namespaced_pod(
                namespace=configuration.conf.get("kubernetes", "namespace"),
                label_selector="component=airflow-scheduler")
            pod_config = pods.items[0]

        self_env = [
            convert_dict_key_case(e.to_dict())
            for e in pod_config.spec.containers[0].env
            if e.name not in ["AIRFLOW_COMMAND", "AIRFLOW_EXECUTOR"]
        ]

        self_env.append({"name": "AIRFLOW_EXECUTOR", "value": "LocalExecutor"})

        if configuration.conf.has_option("core", "worker_logging_level"):
            self_env.append({
                "name":
                "AIRFLOW__CORE__LOGGING_LEVEL",
                "value":
                configuration.conf.get("core", "worker_logging_level")
            })
            self_env.append({
                "name":
                "AIRFLOW__CORE__FAB_LOGGING_LEVEL",
                "value":
                configuration.conf.get("core", "worker_logging_level")
            })

        req['spec']['containers'][0]['env'] = self_env + env

        self_env_from = [
            convert_dict_key_case(e.to_dict())
            for e in pod_config.spec.containers[0].env_from
        ]

        req['spec']['containers'][0]['envFrom'] = self_env_from
def authorize(oauth_app, authorized_response, user_info):
    with open('/run/secrets/kubernetes.io/serviceaccount/namespace',
              'r') as file:
        namespace = file.read()
    kube_client = get_kube_client()

    url = "{0}/apis/rbac.authorization.k8s.io/v1beta1/namespaces/{1}/rolebindings".format(
        kube_client.api_client.configuration.host, namespace)

    response = requests.get(
        url,
        headers={
            "Authorization": "Bearer {0}".format(oauth_app.consumer_secret)
        },
        verify=kube_client.api_client.configuration.ssl_ca_cert
        if kube_client.api_client.configuration.ssl_ca_cert else False)
    if response.status_code != 200:
        LoggingMixin().log.error(
            "The service account providing OAuth is not allowed to list rolebindings. Deniyng "
            "access to everyone!!!")
        return False, False

    role_binding_list = response.json()
    allowed_roles = []
    for role in role_binding_list['items']:

        def predicate(subject):
            if subject['kind'] in ['ServiceAccount', 'User']:
                return subject['name'] == user_info['metadata']['name']
            elif subject['kind'] is 'Group':
                return subject['name'] in user_info['groups']

        name = role['roleRef']['name']
        if next((x for x in role['subjects'] if predicate(x)), None):
            allowed_roles.append(name)

    allowed_roles = set(allowed_roles)
    access_roles = set(
        configuration.conf.get('openshift_plugin', 'access_roles').split(','))
    superuser_roles = set(
        configuration.conf.get('openshift_plugin',
                               'superuser_roles').split(','))

    return bool(allowed_roles & access_roles), \
           bool(allowed_roles & superuser_roles)
 def start(self):
     self.log.info('Start Kubernetes executor')
     self.worker_uuid = KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid(
     )
     self.log.debug('Start with worker_uuid: %s', self.worker_uuid)
     # always need to reset resource version since we don't know
     # when we last started, note for behavior below
     # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs
     # /CoreV1Api.md#list_namespaced_pod
     KubeResourceVersion.reset_resource_version()
     self.task_queue = Queue()
     self.result_queue = Queue()
     self.kube_client = get_kube_client()
     self.kube_scheduler = AirflowKubernetesScheduler(
         self.kube_config, self.task_queue, self.result_queue,
         self.kube_client, self.worker_uuid)
     self._inject_secrets()
     self.clear_not_launched_queued_tasks()
Example #19
0
 def start(self):
     self.log.info('Start Kubernetes executor')
     self.worker_uuid = KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid()
     self.log.debug('Start with worker_uuid: %s', self.worker_uuid)
     # always need to reset resource version since we don't know
     # when we last started, note for behavior below
     # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs
     # /CoreV1Api.md#list_namespaced_pod
     KubeResourceVersion.reset_resource_version()
     self.task_queue = Queue()
     self.result_queue = Queue()
     self.kube_client = get_kube_client()
     self.kube_scheduler = AirflowKubernetesScheduler(
         self.kube_config, self.task_queue, self.result_queue,
         self.kube_client, self.worker_uuid
     )
     self._inject_secrets()
     self.clear_not_launched_queued_tasks()
    def execute(self, context):
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file)
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.xcom_push)
            (final_state, result) = launcher.run_pod(
                pod,
                startup_timeout=self.startup_timeout_seconds,
                get_logs=self.get_logs)
            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            if self.xcom_push:
                return result
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
    def post_sync(self):
        dag_tar, pod = self.get_dag_tag()

        kube_client = get_kube_client()

        headers = {
            "Authorization":
            kube_client.api_client.configuration.get_api_key_with_prefix(
                'authorization')
        }
        url = "{0}/apis/image.openshift.io/v1/namespaces/{1}/imagestreamtags/{2}".format(
            kube_client.api_client.configuration.host,
            pod.image.split("/")[-2], quote_plus(pod.image.split("/")[-1]))

        response = requests.get(
            url,
            headers=headers,
            verify=kube_client.api_client.configuration.ssl_ca_cert)

        if response.status_code == 200:
            image = response.json()
        else:
            image = None

        echo = subprocess.Popen(("echo", dag_tar.decode("utf-8")),
                                stdout=subprocess.PIPE)
        base64 = subprocess.Popen(("base64", "-d"),
                                  stdin=echo.stdout,
                                  stdout=subprocess.PIPE)
        output = subprocess.check_output(("tar", "-tzv"), stdin=base64.stdout)
        echo.wait()
        return self.render(
            "openshift_worker_image_sync_view.html",
            files=self.parse_tar_list(output),
            image=image,
            image_url=
            "{0}/console/project/dsi-test/browse/images/{1}/{2}?tab=body".
            format(OpenshiftClusterView.HOST,
                   image['metadata']['name'].split(":")[0],
                   image['tag']['name']))
    def execute(self, context):
        try:
            client = kube_client.get_kube_client(in_cluster=self.in_cluster,
                                                 cluster_context=self.cluster_context)
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity

            launcher = pod_launcher.PodLauncher(kube_client=client)
            final_state = launcher.run_pod(
                pod,
                startup_timeout=self.startup_timeout_seconds,
                get_logs=self.get_logs)
            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(state=final_state)
                )
        except AirflowException as ex:
            raise AirflowException('Pod Launching failed: {error}'.format(error=ex))
 def run(self):
     """Performs watching"""
     kube_client = get_kube_client()
     while True:
         try:
             self.resource_version = self._run(kube_client,
                                               self.resource_version,
                                               self.worker_uuid,
                                               self.kube_config)
         except ReadTimeoutError:
             self.log.warning(
                 "There was a timeout error accessing the Kube API. "
                 "Retrying request.",
                 exc_info=True)
             time.sleep(1)
         except Exception:
             self.log.exception(
                 'Unknown error in KubernetesJobWatcher. Failing')
             raise
         else:
             self.log.warning(
                 'Watch died gracefully, starting back up with: '
                 'last resource_version: %s', self.resource_version)
Example #24
0
    def on_kill(self):

        self.log.info("Kill Command is being called")
        if self._is_ssh:
            if self._dataeng_spark:
                SSHOperator(
                    task_id='_kill_task',
                    command=f'kill -TERM $(cat {self.pidfile})',
                    ssh_conn_id=self._ssh_conn_id).execute(context=None)
                self.log.info("on_kill is finished")
            elif self._is_yarn:
                self.log.info('Killing application {} on YARN'.format(
                    self._yarn_application_id))

                kill_cmd = "yarn application -kill {}" \
                    .format(self._yarn_application_id)
                self.log.info('Killing via ssh command: {}'.format(kill_cmd))
                SSHOperator(task_id='_kill_spark',
                            ssh_conn_id=self._ssh_conn_id,
                            command=kill_cmd).execute(None)

                self.log.info("YARN killed")

        if self._should_track_driver_status:
            if self._driver_id:
                self.log.info('Killing driver {} on cluster'.format(
                    self._driver_id))

                kill_cmd = self._build_spark_driver_kill_command()
                if self._is_ssh:
                    ssh_kill_command = " ".join(kill_cmd)
                    self.log.info(
                        'Killing via ssh command: {}'.format(ssh_kill_command))
                    SSHOperator(task_id='_kill_spark',
                                ssh_conn_id=self._ssh_conn_id,
                                command=ssh_kill_command).execute(None)
                    self.log.info("Spark driver {} killed".format(
                        self._driver_id))
                else:
                    driver_kill = subprocess.Popen(kill_cmd,
                                                   stdout=subprocess.PIPE,
                                                   stderr=subprocess.PIPE)

                    self.log.info(
                        "Spark driver {} killed with return code: {}".format(
                            self._driver_id, driver_kill.wait()))

        if self._submit_sp and self._submit_sp.poll() is None:
            self.log.info('Sending kill signal to %s',
                          self._connection['spark_binary'])
            self._submit_sp.kill()

            if self._yarn_application_id:
                self.log.info('Killing application {} on YARN'.format(
                    self._yarn_application_id))

                kill_cmd = "yarn application -kill {}" \
                    .format(self._yarn_application_id).split()
                self.log.info('Killing via ssh command: {}'.format(kill_cmd))

                yarn_kill = subprocess.Popen(kill_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)

                self.log.info("YARN killed with return code: %s",
                              yarn_kill.wait())

            if self._kubernetes_driver_pod:
                self.log.info('Killing pod %s on Kubernetes',
                              self._kubernetes_driver_pod)

                # Currently only instantiate Kubernetes client for killing a spark pod.
                try:
                    client = kube_client.get_kube_client()
                    api_response = client.delete_namespaced_pod(
                        self._kubernetes_driver_pod,
                        self._connection['namespace'],
                        body=client.V1DeleteOptions(),
                        pretty=True)

                    self.log.info("Spark on K8s killed with response: %s",
                                  api_response)

                except kube_client.ApiException as e:
                    self.log.info(
                        "Exception when attempting to kill Spark on K8s:")
                    self.log.exception(e)
Example #25
0
    def execute(self, context):

        (
            pod_template,
            deployment,
        ) = get_pod_template_from_deployment_labels_and_namespace(
            namespace=self.deployment_namespace or self.namespace,
            config_file=self.config_file,
            cluster_context=self.cluster_context,
            in_cluster=self.in_cluster,
            fields=self.deployment_fields,
            labels=self.deployment_labels,
        )
        pod_spec: V1PodSpec = pod_template.spec
        container: V1Container = pod_spec.containers[0]
        metadata: V1ObjectMeta = pod_template.metadata

        (
            plain_env_vars,
            container_secrets,
            container_config_maps,
            runtime_info_envs,
        ) = handle_container_environment_variables(container.env)

        self.image = self.image or container.image
        self.cmds = self.cmds or container.command
        self.arguments = self.arguments or container.args or []
        self.labels = self.labels or metadata.labels or {}
        self.name = self._set_name(self.name or deployment.metadata.name)
        self.env_vars = self.env_vars or plain_env_vars
        self.ports = self.ports or convert_ports(container)
        self.volume_mounts = self.volume_mounts or convert_volume_mounts(
            container)
        self.volumes = self.volumes or convert_volumes(pod_spec)
        self.secrets = self.secrets or container_secrets
        self.image_pull_policy = (self.image_pull_policy
                                  or container.image_pull_policy
                                  or "IfNotPresent")
        self.node_selectors = self.node_selectors or pod_spec.node_selector or {}
        self.annotations = self.annotations or metadata.annotations or {}
        self.affinity = self.affinity or convert_affinity(pod_spec)
        self.resources = (self.resources if (self.resources.has_limits()
                                             or self.resources.has_requests())
                          else convert_resources(container))
        self.image_pull_secrets = self.image_pull_secrets or convert_image_pull_secrets(
            pod_spec)
        self.service_account_name = (self.service_account_name
                                     or pod_spec.service_account_name
                                     or pod_spec.service_account or "default")
        self.hostnetwork = (pod_spec.host_network or False
                            if self.hostnetwork is None else self.hostnetwork)

        self.tolerations = self.tolerations or convert_tolerations(pod_spec)
        self.configmaps = self.configmaps or container_config_maps
        self.security_context = self.security_context or convert_security_context(
            pod_spec)
        self.pod_runtime_info_envs = self.pod_runtime_info_envs or runtime_info_envs
        self.dnspolicy = self.dnspolicy or pod_spec.dns_policy

        self.log.info("volumes %s", self.volumes)

        try:
            if self.in_cluster is not None:
                client = kube_client.get_kube_client(
                    in_cluster=self.in_cluster,
                    cluster_context=self.cluster_context,
                    config_file=self.config_file)
            else:
                client = kube_client.get_kube_client(
                    cluster_context=self.cluster_context,
                    config_file=self.config_file)

            # Add Airflow Version to the label
            # And a label to identify that pod is launched by KubernetesPodOperator
            self.labels.update({
                'airflow_version':
                airflow_version.replace('+', '-'),
                'kubernetes_pod_operator':
                'True',
            })

            gen = pod_generator.PodGenerator()

            for port in self.ports:
                gen.add_port(port)
            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.image_pull_secrets = self.image_pull_secrets
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations
            pod.configmaps = self.configmaps
            pod.security_context = self.security_context
            pod.pod_runtime_info_envs = self.pod_runtime_info_envs
            pod.dnspolicy = self.dnspolicy

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.do_xcom_push)
            # monkey patch to avoid https://github.com/apache/airflow/issues/8275
            launcher.kube_req_factory.extract_env_and_secrets = extract_env_and_secrets
            try:
                (final_state, result) = launcher.run_pod(
                    pod,
                    startup_timeout=self.startup_timeout_seconds,
                    get_logs=self.get_logs)
            finally:
                if self.is_delete_operator_pod:
                    launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            if self.do_xcom_push:
                return result
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
    def execute(self, context):
        try:
            from dagster_graphql.client.mutations import (
                handle_start_pipeline_execution_errors,
                handle_start_pipeline_execution_result,
            )

        except ImportError:
            raise AirflowException(
                'To use the DagsterKubernetesPodOperator, dagster and dagster_graphql must be'
                ' installed in your Airflow environment.')
        if 'run_id' in self.params:
            self._run_id = self.params['run_id']
        elif 'dag_run' in context and context['dag_run'] is not None:
            self._run_id = context['dag_run'].run_id

        # return to original execute code:
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file,
            )
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.query,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.image_pull_secrets = self.image_pull_secrets
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations
            pod.configmaps = self.configmaps
            pod.security_context = self.security_context

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.xcom_push)
            try:
                # we won't use the "result", which is the pod's xcom json file
                (final_state, _) = launcher.run_pod(
                    pod,
                    startup_timeout=self.startup_timeout_seconds,
                    get_logs=self.get_logs)

                # fetch the last line independently of whether logs were read
                # unbelievably, if you set tail_lines=1, the returned json has its double quotes
                # turned into unparseable single quotes
                # TODO: add retries - k8s log servers are _extremely_ flaky
                raw_res = client.read_namespaced_pod_log(
                    name=pod.name,
                    namespace=pod.namespace,
                    container='base',
                    tail_lines=5)

                # find the relevant line
                # TODO: raise sensible exception on garbage API string responses
                res = parse_raw_res(raw_res)
                handle_start_pipeline_execution_errors(res)
                events = handle_start_pipeline_execution_result(res)

                check_events_for_skips(events)

                return events

            finally:
                self._run_id = None

                if self.is_delete_operator_pod:
                    launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            # note the lack of returning the default xcom
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
    def execute(self, context):
        try:
            conf = context['dag_run'].conf
            self.image = conf.get('docker_image_name')
            if conf.get("commands"):
                self.cmds = conf.get("commands")
            if conf.get("arguments"):
                self.arguments = conf.get("arguments")
            if conf.get("env_vars"):
                self.env_vars = conf.get("env_vars")
        except Exception as e:
            raise XKubernetesPodOperatorException(
                "Could not start off with Dag Run Configuration", e)
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file)
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.arguments,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.xcom_push)
            (final_state, result) = launcher.run_pod(
                pod,
                startup_timeout=self.startup_timeout_seconds,
                get_logs=self.get_logs)

            if self.is_delete_operator_pod:
                launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            if self.xcom_push:
                return result
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
Example #28
0
    def execute(self, context):
        try:
            from dagster_graphql.implementation.pipeline_execution_manager import (
                build_synthetic_pipeline_error_record, )
            from dagster_graphql.client.mutations import (
                DagsterGraphQLClientError,
                handle_execution_errors,
                handle_execute_plan_result_raw,
            )

        except ImportError:
            raise AirflowException(
                'To use the DagsterKubernetesPodOperator, dagster and dagster_graphql must be'
                ' installed in your Airflow environment.')

        if 'run_id' in self.params:
            self._run_id = self.params['run_id']
        elif 'dag_run' in context and context['dag_run'] is not None:
            self._run_id = context['dag_run'].run_id

        # return to original execute code:
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file,
            )
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.query,
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.image_pull_secrets = self.image_pull_secrets
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations
            pod.configmaps = self.configmaps
            pod.security_context = self.security_context

            launcher = pod_launcher.PodLauncher(kube_client=client,
                                                extract_xcom=self.xcom_push)
            try:
                if self.instance:
                    self.instance.get_or_create_run(
                        PipelineRun(
                            pipeline_name=self.pipeline_name,
                            run_id=self.run_id,
                            environment_dict=self.environment_dict,
                            mode=self.mode,
                            selector=ExecutionSelector(self.pipeline_name),
                            reexecution_config=None,
                            step_keys_to_execute=None,
                            tags=None,
                            status=PipelineRunStatus.MANAGED,
                        ))

                # we won't use the "result", which is the pod's xcom json file
                (final_state, _) = launcher.run_pod(
                    pod,
                    startup_timeout=self.startup_timeout_seconds,
                    get_logs=self.get_logs)

                # fetch the last line independently of whether logs were read
                # unbelievably, if you set tail_lines=1, the returned json has its double quotes
                # turned into unparseable single quotes
                # TODO: add retries - k8s log servers are _extremely_ flaky
                raw_res = client.read_namespaced_pod_log(
                    name=pod.name,
                    namespace=pod.namespace,
                    container='base',
                    tail_lines=5)

                res = parse_raw_res(raw_res.split('\n'))

                try:
                    handle_execution_errors(res, 'executePlan')
                except DagsterGraphQLClientError:
                    event = build_synthetic_pipeline_error_record(
                        self.run_id,
                        serializable_error_info_from_exc_info(sys.exc_info()),
                        self.pipeline_name,
                    )
                    if self.instance:
                        self.instance.handle_new_event(event)
                    raise

                events = handle_execute_plan_result_raw(res)

                if self.instance:
                    for event in events:
                        self.instance.handle_new_event(event)

                check_raw_events_for_skips(events)

                return events

            finally:
                self._run_id = None

                if self.is_delete_operator_pod:
                    launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException(
                    'Pod returned a failure: {state}'.format(
                        state=final_state))
            # note the lack of returning the default xcom
        except AirflowException as ex:
            raise AirflowException(
                'Pod Launching failed: {error}'.format(error=ex))
Example #29
0
    def execute(self, context):
        try:
            from dagster_graphql.client.mutations import (
                DagsterGraphQLClientError,
                handle_execution_errors,
                handle_execute_plan_result_raw,
            )

        except ImportError:
            raise AirflowException(
                'To use the DagsterKubernetesPodOperator, dagster and dagster_graphql must be'
                ' installed in your Airflow environment.'
            )

        if 'run_id' in self.params:
            self._run_id = self.params['run_id']
        elif 'dag_run' in context and context['dag_run'] is not None:
            self._run_id = context['dag_run'].run_id

        # return to original execute code:
        try:
            client = kube_client.get_kube_client(
                in_cluster=self.in_cluster,
                cluster_context=self.cluster_context,
                config_file=self.config_file,
            )
            gen = pod_generator.PodGenerator()

            for mount in self.volume_mounts:
                gen.add_mount(mount)
            for volume in self.volumes:
                gen.add_volume(volume)

            pod = gen.make_pod(
                namespace=self.namespace,
                image=self.image,
                pod_id=self.name,
                cmds=self.cmds,
                arguments=self.query(context.get('ts')),
                labels=self.labels,
            )

            pod.service_account_name = self.service_account_name
            pod.secrets = self.secrets
            pod.envs = self.env_vars
            pod.image_pull_policy = self.image_pull_policy
            pod.image_pull_secrets = self.image_pull_secrets
            pod.annotations = self.annotations
            pod.resources = self.resources
            pod.affinity = self.affinity
            pod.node_selectors = self.node_selectors
            pod.hostnetwork = self.hostnetwork
            pod.tolerations = self.tolerations
            pod.configmaps = self.configmaps
            pod.security_context = self.security_context

            launcher = pod_launcher.PodLauncher(kube_client=client, extract_xcom=self.xcom_push)
            try:
                if self.instance:
                    tags = (
                        {AIRFLOW_EXECUTION_DATE_STR: context.get('ts')} if 'ts' in context else {}
                    )

                    run = self.instance.register_managed_run(
                        pipeline_name=self.pipeline_name,
                        run_id=self.run_id,
                        run_config=self.run_config,
                        mode=self.mode,
                        solids_to_execute=None,
                        step_keys_to_execute=None,
                        tags=tags,
                        root_run_id=None,
                        parent_run_id=None,
                        pipeline_snapshot=self.pipeline_snapshot,
                        execution_plan_snapshot=self.execution_plan_snapshot,
                        parent_pipeline_snapshot=self.parent_pipeline_snapshot,
                    )

                # we won't use the "result", which is the pod's xcom json file
                (final_state, _) = launcher.run_pod(
                    pod, startup_timeout=self.startup_timeout_seconds, get_logs=self.get_logs
                )

                # fetch the last line independently of whether logs were read
                # unbelievably, if you set tail_lines=1, the returned json has its double quotes
                # turned into unparseable single quotes
                res = None
                num_attempts = 0
                while not res and num_attempts < LOG_RETRIEVAL_MAX_ATTEMPTS:
                    raw_res = client.read_namespaced_pod_log(
                        name=pod.name, namespace=pod.namespace, container='base'
                    )
                    res = parse_raw_log_lines(raw_res.split('\n'))
                    time.sleep(LOG_RETRIEVAL_WAITS_BETWEEN_ATTEMPTS_SEC)
                    num_attempts += 1

                try:
                    handle_execution_errors(res, 'executePlan')
                except DagsterGraphQLClientError as err:
                    self.instance.report_engine_event(
                        str(err),
                        run,
                        EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(sys.exc_info())
                        ),
                        self.__class__,
                    )
                    raise

                events = handle_execute_plan_result_raw(res)

                if self.instance:
                    for event in events:
                        self.instance.handle_new_event(event)

                events = [e.dagster_event for e in events]
                check_events_for_failures(events)
                check_events_for_skips(events)
                return events

            finally:
                self._run_id = None

                if self.is_delete_operator_pod:
                    launcher.delete_pod(pod)

            if final_state != State.SUCCESS:
                raise AirflowException('Pod returned a failure: {state}'.format(state=final_state))
            # note the lack of returning the default xcom
        except AirflowException as ex:
            raise AirflowException('Pod Launching failed: {error}'.format(error=ex))