def _delete_workflow_engine_pod(workflow):
    """Delete workflow engine pod."""
    try:
        jobs = current_k8s_corev1_api_client.list_namespaced_pod(
            namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
        )
        for job in jobs.items:
            if str(workflow.id_) in job.metadata.name:
                workflow_enginge_logs = current_k8s_corev1_api_client.read_namespaced_pod_log(
                    namespace=job.metadata.namespace,
                    name=job.metadata.name,
                    container="workflow-engine",
                )
                workflow.logs = (workflow.logs or "") + workflow_enginge_logs + "\n"
                current_k8s_batchv1_api_client.delete_namespaced_job(
                    namespace=job.metadata.namespace,
                    propagation_policy="Background",
                    name=job.metadata.labels["job-name"],
                )
                break
    except ApiException as e:
        raise REANAWorkflowControllerError(
            "Workflow engine pod cound not be deleted {}.".format(e)
        )
    except Exception as e:
        logging.error(traceback.format_exc())
        logging.error("Unexpected error: {}".format(e))
def _delete_workflow_engine_pod(workflow):
    """Delete workflow engine pod."""
    try:
        jobs = current_k8s_corev1_api_client.list_namespaced_pod(
            namespace='default',
        )
        for job in jobs.items:
            if str(workflow.id_) in job.metadata.name:
                workflow_enginge_logs = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=job.metadata.namespace,
                        name=job.metadata.name,
                        container='workflow-engine')
                workflow.logs =  \
                    (workflow.logs or '') + workflow_enginge_logs + '\n'
                current_k8s_batchv1_api_client.delete_namespaced_job(
                    namespace='default',
                    propagation_policy="Background",
                    name=job.metadata.labels['job-name'])
                break
    except ApiException as e:
        raise REANAWorkflowControllerError(
            "Workflow engine pod cound not be deleted {}.".format(e))
    except Exception as e:
        logging.error(traceback.format_exc())
        logging.error("Unexpected error: {}".format(e))
Esempio n. 3
0
    def get_k8s_jobs_by_status(self, status):
        """Get from k8s API jobs in ``status`` status."""
        pods = current_k8s_corev1_api_client.list_namespaced_pod(
            REANA_RUNTIME_KUBERNETES_NAMESPACE, field_selector=f"status.phase={status}",
        )

        job_pods = [
            pod.metadata.name
            for pod in pods.items
            if pod.metadata.name.startswith(f"{REANA_COMPONENT_PREFIX}-run-job")
        ]

        return job_pods
def _get_workflow_engine_pod_logs(workflow: Workflow) -> str:
    try:
        pods = current_k8s_corev1_api_client.list_namespaced_pod(
            namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
            label_selector=f"reana-run-batch-workflow-uuid={str(workflow.id_)}",
        )
        for pod in pods.items:
            if str(workflow.id_) in pod.metadata.name:
                return current_k8s_corev1_api_client.read_namespaced_pod_log(
                    namespace=pod.metadata.namespace,
                    name=pod.metadata.name,
                    container="workflow-engine",
                )
    except ApiException as e:
        raise REANAWorkflowControllerError(
            f"Workflow engine pod logs could not be fetched. Error: {e}")
Esempio n. 5
0
def k8s_watch_jobs(job_db):
    """Open stream connection to k8s apiserver to watch all jobs status.

    :param job_db: Dictionary which contains all current jobs.
    :param config: configuration to connect to k8s apiserver.
    """
    while True:
        logging.debug('Starting a new stream request to watch Jobs')
        try:
            w = watch.Watch()
            for event in w.stream(current_k8s_batchv1_api_client.
                                  list_job_for_all_namespaces):
                logging.info('New Job event received: {0}'.format(
                    event['type']))
                job = event['object']

                # Taking note of the remaining jobs since deletion might not
                # happend straight away.
                remaining_jobs = [
                    j for j in job_db.keys() if not job_db[j]['deleted']
                ]
                if (not job_db.get(job.metadata.name)
                        or job.metadata.name not in remaining_jobs):
                    # Ignore jobs not created by this specific instance
                    # or already deleted jobs.
                    continue
                elif job.status.succeeded:
                    logging.info('Job {} succeeded.'.format(job.metadata.name))
                    job_db[job.metadata.name]['status'] = 'succeeded'
                elif (job.status.failed
                      and job.status.failed >= config.MAX_JOB_RESTARTS):
                    logging.info('Job {} failed.'.format(job.metadata.name))
                    job_db[job.metadata.name]['status'] = 'failed'
                else:
                    continue
                # Grab logs when job either succeeds or fails.
                logging.info('Getting last spawned pod for job {}'.format(
                    job.metadata.name))
                last_spawned_pod = \
                    current_k8s_corev1_api_client.list_namespaced_pod(
                        job.metadata.namespace,
                        label_selector='job-name={job_name}'.format(
                            job_name=job.metadata.name)).items[-1]
                logging.info('Grabbing pod {} logs...'.format(
                    last_spawned_pod.metadata.name))
                job_db[job.metadata.name]['log'] = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=last_spawned_pod.metadata.namespace,
                        name=last_spawned_pod.metadata.name)
                # Store job logs
                try:
                    logging.info('Storing job logs: {}'.format(
                        job_db[job.metadata.name]['log']))
                    Session.query(Job).filter_by(id_=job.metadata.name). \
                        update(dict(logs=job_db[job.metadata.name]['log']))
                    Session.commit()

                except Exception as e:
                    logging.debug(
                        'Could not retrieve'
                        ' logs for object: {}'.format(last_spawned_pod))
                    logging.debug('Exception: {}'.format(str(e)))

                logging.info('Cleaning job {} ...'.format(job.metadata.name))
                k8s_delete_job(job)
                job_db[job.metadata.name]['deleted'] = True
        except client.rest.ApiException as e:
            logging.debug(
                "Error while connecting to Kubernetes API: {}".format(e))
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.debug("Unexpected error: {}".format(e))
Esempio n. 6
0
def watch_jobs_kubernetes(job_db):
    """Open stream connection to k8s apiserver to watch all jobs status.

    :param job_db: Dictionary which contains all current jobs.
    """
    while True:
        logging.debug('Starting a new stream request to watch Jobs')
        try:
            w = watch.Watch()
            for event in w.stream(current_k8s_batchv1_api_client.
                                  list_job_for_all_namespaces):
                logging.info('New Job event received: {0}'.format(
                    event['type']))
                job = event['object']

                # Taking note of the remaining jobs since deletion might not
                # happen straight away.
                remaining_jobs = dict()
                for job_id, job_dict in job_db.items():
                    if not job_db[job_id]['deleted']:
                        remaining_jobs[job_dict['backend_job_id']] = job_id
                if (not job_db.get(remaining_jobs.get(job.metadata.name))
                        or job.metadata.name not in remaining_jobs):
                    # Ignore jobs not created by this specific instance
                    # or already deleted jobs.
                    continue
                job_id = remaining_jobs[job.metadata.name]
                kubernetes_job_id = job.metadata.name
                if job.status.succeeded:
                    logging.info('Job job_id: {}, kubernetes_job_id: {}'
                                 ' succeeded.'.format(job_id,
                                                      kubernetes_job_id))
                    job_db[job_id]['status'] = 'succeeded'
                elif (job.status.failed
                      and job.status.failed >= config.MAX_JOB_RESTARTS):
                    logging.info(
                        'Job job_id: {}, kubernetes_job_id: {} failed.'.format(
                            job_id, kubernetes_job_id))
                    job_db[job_id]['status'] = 'failed'
                else:
                    continue
                # Grab logs when job either succeeds or fails.
                logging.info('Getting last spawned pod for kubernetes'
                             ' job {}'.format(kubernetes_job_id))
                last_spawned_pod = \
                    current_k8s_corev1_api_client.list_namespaced_pod(
                        namespace=job.metadata.namespace,
                        label_selector='job-name={job_name}'.format(
                            job_name=kubernetes_job_id)).items[-1]
                logging.info('Grabbing pod {} logs...'.format(
                    last_spawned_pod.metadata.name))
                job_db[job_id]['log'] = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=last_spawned_pod.metadata.namespace,
                        name=last_spawned_pod.metadata.name)
                store_logs(job_id=job_id, logs=job_db[job_id]['log'])

                logging.info(
                    'Cleaning Kubernetes job {} ...'.format(kubernetes_job_id))
                KubernetesJobManager.stop(kubernetes_job_id)
                job_db[job_id]['deleted'] = True
        except client.rest.ApiException as e:
            logging.debug(
                "Error while connecting to Kubernetes API: {}".format(e))
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.debug("Unexpected error: {}".format(e))
Esempio n. 7
0
 def get_pods_by_status(self, status, namespace):
     """Get pod name list by status."""
     pods = current_k8s_corev1_api_client.list_namespaced_pod(
         namespace, field_selector=f"status.phase={status}",
     )
     return [pod.metadata.name for pod in pods.items]