Exemple #1
0
def event_loop_init(bloom, object_map, task_q, global_count):
    builds = "builds"
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    past_builds = test_endpoint(
        host=host, req_headers=get_header(api_key), namespace=namespace, resource=builds
    )
    logging.debug("PAST BUILDS : {}".format(len(past_builds.json()["items"])))
    for pbuild in past_builds.json()["items"]:
        if is_value_in_label(
            pbuild["metadata"]["labels"], object_map[PLUGIN_BUILD_CONFIG_LABEL]
        ):
            mkey, mstatus, mcount = add_build_to_map(build=pbuild, map=bloom)
            logging.debug(
                "BUILDS : seen-before: {} {} B:{} G:{}".format(
                    mstatus, mkey, mcount, global_count
                )
            )
            process_new_event("builds", pbuild, bloom, object_map, task_q, global_count)
    jobs = "jobs"
    _, past_jobs = get_job(
        req_url=host, req_headers=get_header(api_key), namespace=namespace
    )
    logging.debug("PAST Jobs : {}".format(len(past_jobs["items"])))
    for pjob in past_jobs["items"]:
        if is_value_in_label(pjob["metadata"]["labels"], object_map[PLUGIN_JOB_LABEL]):
            mkey, mstatus, mcount = add_job_to_map(job=pjob, map=bloom)
            logging.debug(
                "JOBS : seen-before: {} {} B:{} G:{}".format(
                    mstatus, mkey, mcount, global_count
                )
            )
            process_new_event("jobs", pjob, bloom, object_map, task_q, global_count)
def do_resource_requests_check(task_q, quota_name):
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    if task_q.qsize() == 0:
        quota_available, avail_mem, avail_cpu = is_resource_available(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            quota_name=quota_name,
            resource_mem=0,
            resource_cpu=0,
        )
        logging.debug(
            "[{}] quota_available={}, avail_mem={}, avail_cpu={}".format(
                task_q.qsize(), quota_available, avail_mem, avail_cpu))
        return quota_available, avail_cpu, avail_mem
    else:
        # This is like peek() instead of qq.get()
        item = task_q.queue[0]
        resource = item["object"]
        mem_requested = None
        cpu_requested = None
        avail_mem = None
        avail_cpu = None
        quota_available = True
        spec = None

        if resource and resource != "-1" and resource["kind"] == "Job":
            # TODO a Job can have many containers. Find for each container.
            spec = resource["spec"]["template"]["spec"]["containers"][0]
        elif resource and resource != "-1" and resource[
                "kind"] == "BuildConfig":
            spec = resource.get("spec", None)

        if (spec and "resources" in spec and ("requests" in spec["resources"])
                and (spec["resources"]["requests"]["cpu"]
                     or spec["resources"]["requests"]["memory"])):
            mem_requested = spec["resources"]["limits"]["memory"]
            cpu_requested = spec["resources"]["limits"]["cpu"]
            logging.debug(
                "[{}] mem_requested={}; cpu_requested={} by {} ".format(
                    task_q.qsize(),
                    mem_requested,
                    cpu_requested,
                    resource["metadata"]["name"],
                ))
        if mem_requested and cpu_requested:
            quota_available, avail_mem, avail_cpu = is_resource_available(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                quota_name=quota_name,
                resource_mem=mem_requested,
                resource_cpu=cpu_requested,
            )
            logging.debug(
                "[{}] quota_available={}, avail_mem={}, avail_cpu={}".format(
                    task_q.qsize(), quota_available, avail_mem, avail_cpu))
        return quota_available, avail_cpu, avail_mem
Exemple #3
0
def add_event_job_to_map(bloom, event):
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    job_exist, jresponse = get_job(
        req_url=host,
        req_headers=get_header(api_key),
        namespace=namespace,
        job_name=event["object"]["involvedObject"]["name"],
    )
    if not job_exist:
        message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
            event["object"]["involvedObject"]["kind"],
            event["object"]["involvedObject"]["name"],
            event["object"]["involvedObject"]["resourceVersion"],
            event["object"]["reason"],
        )
        seen_before = bloom.add(
            [
                event["object"]["involvedObject"]["kind"],
                event["object"]["involvedObject"]["name"],
                event["object"]["involvedObject"]["resourceVersion"],
                event["object"]["reason"],
            ]
        )
        return message, seen_before, bloom.count
    job_status = get_job_status(jresponse.get("status"))
    seen_before = bloom.add(
        [
            event["object"]["involvedObject"]["kind"],
            event["object"]["involvedObject"]["name"],
            jresponse["metadata"]["resourceVersion"],
            job_status,
        ]
    )
    message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
        event["object"]["involvedObject"]["kind"],
        event["object"]["involvedObject"]["name"],
        jresponse["metadata"]["resourceVersion"],
        job_status,
    )
    if not seen_before:
        message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
            event["object"]["involvedObject"]["kind"],
            event["object"]["involvedObject"]["name"],
            event["object"]["involvedObject"]["resourceVersion"],
            event["object"]["reason"],
        )
        seen_before = bloom.add(
            [
                event["object"]["involvedObject"]["kind"],
                event["object"]["involvedObject"]["name"],
                event["object"]["involvedObject"]["resourceVersion"],
                event["object"]["reason"],
            ]
        )
        return message, seen_before, bloom.count
    else:
        return message, seen_before, bloom.count
Exemple #4
0
def add_event_build_to_map(bloom, event):
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    build_exist, bresponse = get_build(
        req_url=host,
        req_headers=get_header(api_key),
        namespace=namespace,
        build_name=event["object"]["involvedObject"]["name"],
    )
    if not build_exist:
        message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
            event["object"]["involvedObject"]["kind"],
            event["object"]["involvedObject"]["name"],
            event["object"]["involvedObject"]["resourceVersion"],
            event["object"]["reason"],
        )
        # seen_before = bloom.add([event['object']['involvedObject']['kind'], event['object']['involvedObject']['name'],
        #                          event['object']['involvedObject']['resourceVersion'],
        #                          event['object']['reason']])
        return message, True, bloom.count
    else:
        build_status = get_build_status(bresponse.get("status"))
        seen_before = bloom.add(
            [
                event["object"]["involvedObject"]["kind"],
                event["object"]["involvedObject"]["name"],
                bresponse["metadata"]["resourceVersion"],
                build_status,
            ]
        )
        message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
            event["object"]["involvedObject"]["kind"],
            event["object"]["involvedObject"]["name"],
            bresponse["metadata"]["resourceVersion"],
            build_status,
        )
        if not seen_before:
            message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
                event["object"]["involvedObject"]["kind"],
                event["object"]["involvedObject"]["name"],
                event["object"]["involvedObject"]["resourceVersion"],
                event["object"]["reason"],
            )
            seen_before = bloom.add(
                [
                    event["object"]["involvedObject"]["kind"],
                    event["object"]["involvedObject"]["name"],
                    event["object"]["involvedObject"]["resourceVersion"],
                    event["object"]["reason"],
                ]
            )
            return message, seen_before, bloom.count
        else:
            return message, seen_before, bloom.count
Exemple #5
0
def event_loop(resource, bloom, object_map, task_q, global_count):
    logging.info("STARTING")
    control = 1
    _running = True
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    # ----------------------------------
    #           event loop - init
    # ----------------------------------
    event_loop_init(bloom, object_map, task_q, global_count)
    # ----------------------------------
    #           event loop - start
    # ----------------------------------
    while _running and control != -1:
        try:
            for event, code in stream(
                    host=host,
                    resource=resource,
                    authorization=api_key["authorization"],
                    namespace=namespace,
                    tls_verify=False,
            ):
                control = code
                if type(event) is dict and control == 1:
                    # ----------------------------------
                    #           event loop - process
                    # ----------------------------------
                    process_events(event, resource, bloom, object_map, task_q,
                                   global_count)
                if task_q.qsize() == 0 and global_count.get_val() == 0:
                    logging.debug("[{}] break loop-1. G:{}".format(
                        task_q.qsize(), global_count))
                    _running = False
                    break
                if control == -1:
                    logging.debug("stream terminated 1.......")
                    control = 1
                    continue
        except Exception as e:
            if "Connection" in str(e):
                logging.debug(str(e))
                logging.debug("stream terminated 2.......")
                control = 1
                if task_q.qsize() == 0 and global_count.get_val() == 0:
                    logging.debug("[{}] break loop-2. G:{}".format(
                        task_q.qsize(), global_count))
                    _running = False
                    break
                continue
            else:
                raise e
Exemple #6
0
def add_job_to_map(job, map):
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    job_exists, jresponse = get_job(
        req_url=host,
        req_headers=get_header(api_key),
        namespace=namespace,
        job_name=job["metadata"]["name"],
    )
    if job_exists:
        job_status = get_job_status(jresponse.get("status"))
        seen_before = map.add(
            [
                "Job",
                job["metadata"]["name"],
                jresponse["metadata"]["resourceVersion"],
                job_status,
            ]
        )
        message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
            "Job",
            job["metadata"]["name"],
            jresponse["metadata"]["resourceVersion"],
            job_status,
        )
        logging.debug(
            "JOBS :*seen-before: {} {} {} ".format(seen_before, message, map.count)
        )
        if not seen_before:
            message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format(
                "Job",
                job["metadata"]["name"],
                job["metadata"]["resourceVersion"],
                get_job_status(job["status"]),
            )

            seen_before = map.add(
                [
                    "Job",
                    job["metadata"]["name"],
                    job["metadata"]["resourceVersion"],
                    get_job_status(job["status"]),
                ]
            )
            return message, seen_before, map.count
        else:
            return message, seen_before, map.count
    else:
        return "", True, map.count
Exemple #7
0
def main(token_file=None, cert_file=None, config_file=None):
    logging.basicConfig(level=logging.DEBUG,
                        format="(%(threadName)-9s) %(message)s")
    # logging.getLogger("requests").setLevel(logging.CRITICAL)
    logging.getLogger("urllib3").setLevel(logging.CRITICAL)

    kubernetes_verify_tls = get_param("KUBERNETES_VERIFY_TLS", None, "0")
    kubernetes_verify_tls = bool(int(kubernetes_verify_tls))

    # Load in-cluster configuration that is exposed by OpenShift/k8s configuration.
    InClusterConfigLoader(
        token_filename=_get_incluster_token_file(token_file=token_file),
        cert_filename=_get_incluster_ca_file(ca_file=cert_file),
        environ=os.environ,
    ).load_and_set()

    # We need to explicitly set whether we want to verify SSL/TLS connection to the master.
    configuration = client.Configuration()
    configuration.verify_ssl = kubernetes_verify_tls

    ocp_client = DynamicClient(client.ApiClient(configuration=configuration))
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()

    plugin = TensorflowBuildPlugin()
    # login_checks = [check_none(v) for v in [OCP_URL, DEFAULT_NAMESPACE, ACCESS_TOKEN]]
    # if not all(login_checks):
    #     raise Exception("Release Trigger can't start! OCP credentials are not provided!")

    # TODO may use config.json or use CRD
    # Load BUILD_MAP
    build_map = os.getenv(ENV_BUILD_MAP, "{}")
    build_map = json.loads(build_map)
    if str(build_map) == "{}":
        build_map = load_json_file(config_file)

    if not build_map:
        raise Exception("No BUILD_MAP loaded.Nothing todo")

    imagestream_list = []
    buildconfig_list = []
    job_list = []
    object_map = {}
    object_map.update(plugin.get_labels_dict())

    # Process BUILD_MAP
    for py_version, os_details in build_map.items():
        for os_version, image_details in os_details.items():
            try:
                application_build_name = "tf-{}-build-image-{}".format(
                    os_version.lower(), py_version.replace(".", ""))
                application_name = "tf-{}-build-job-{}".format(
                    os_version.lower(), py_version.replace(".", ""))
                builder_imagestream = "{}:{}".format(application_build_name,
                                                     DEFAULT_IMAGE_VERSION)
                nb_python_ver = py_version
                docker_file_path = "Dockerfile.{}".format(os_version.lower())
                logging.debug(
                    "-------------------VARIABLES-------------------------")
                logging.debug("APPLICATION_BUILD_NAME: {}".format(
                    application_build_name))
                logging.debug("APPLICATION_NAME: {}".format(application_name))
                logging.debug(
                    "BUILDER_IMAGESTREAM: {}".format(builder_imagestream))
                logging.debug("PYTHON VERSION: {}".format(nb_python_ver))
                logging.debug("DOCKERFILE: {}".format(docker_file_path))
                for var_key, var_val in image_details.items():
                    # self.__dict__[var_key] = var_val
                    logging.debug("{}: {}".format(var_key, var_val))
                logging.debug(
                    "-----------------------------------------------------")
                imagestream_template = plugin.fill_imagestream_template(
                    ims_name=application_build_name)
                imagestream_list.append({
                    "kind": "ImageStream",
                    "object": imagestream_template,
                    "trigger_count": 0,
                    "retrigger": False,
                })
                job_template = plugin.fill_job_template1(
                    application_name=application_name,
                    builder_imagestream=builder_imagestream,
                    nb_python_ver=nb_python_ver,
                    image_details=image_details,
                )
                object_map[application_name] = job_template
                job_list.append(job_template)
                build_template = plugin.fill_buildconfig_template1(
                    build_name=application_build_name,
                    docker_file_path=docker_file_path,
                    nb_python_ver=nb_python_ver,
                    image_details=image_details,
                )
                object_map[application_build_name] = build_template

                buildconfig_list.append({
                    "kind": "BuildConfig",
                    "object": build_template,
                    "trigger_count": 0,
                    "retrigger": False,
                    "application_name": application_name,
                    "builder_imagestream": builder_imagestream,
                    "nb_python_ver": nb_python_ver,
                })
            except Exception as e:
                logging.error("Exception: ", e)
                logging.error(
                    "Error in Tensorflow Build or Job trigger! Please refer the above log, Starting the next "
                    "one in queue!")

    for ims in imagestream_list:
        ims_name = ims["object"]["metadata"]["name"]

        ims_exist, ims_response = get_imagestream(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            imagestream_name=ims_name,
        )
        if not ims_exist:
            generated_img = create_imagestream(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                imagestream=ims["object"],
            )
            if not generated_img:
                raise Exception(
                    "Image could not be generated for {}".format(ims_name))

    quota_event = threading.Condition()
    done_event = threading.Event()
    global_count = ResourceCounter()
    task_q = Queue(maxsize=1000)
    bloom = BloomFilter(10000, 0.001)

    # TODO TFBuildConfig  OpenBlasBuildConfig, numpy
    for y in buildconfig_list:
        task_q.put(y)

    # global_count.set_val(task_q.qsize())
    logging.debug("Q size {}".format(task_q.qsize()))
    quota_name = get_param("QUOTA_NAME", None, DEFAULT_QUOTA_NAME)
    quota_thread = threading.Thread(
        name="quota-thread",
        target=quota_check,
        args=(quota_name, quota_event, done_event, task_q, global_count),
    )

    resource_thread = threading.Thread(
        name="resource-thread",
        target=create_resource,
        args=(quota_event, done_event, task_q, global_count, object_map),
    )

    event_thread = threading.Thread(
        name="event-thread",
        target=event_loop,
        args=("events", bloom, object_map, task_q, global_count),
    )

    # event_thread.daemon = True
    event_thread.start()
    time.sleep(3)
    quota_thread.start()
    resource_thread.start()
    event_thread.join()
    resource_thread.join()
    quota_thread.join()
    logging.debug("END")
Exemple #8
0
def process_new_event(
    resource_type, event_obj, bloom, object_map, task_q, global_count
):
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    if resource_type == "builds":
        # =========================
        # Process Failed Builds(init)
        # =========================
        if is_build_failed(event_obj["status"]):
            build_config_name = get_value_in_label(
                event_obj["metadata"]["labels"], "appName"
            )
            build_ver = int(event_obj["metadata"]["name"][-1:])
            bc_exist, bc_response = get_buildconfig(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                build_config_name=build_config_name,
            )
            if bc_exist:
                latest_build_version = bc_response["status"]["lastVersion"]
                latest_build_id = int(latest_build_version)
                b_exist, build_resp = get_build(
                    req_url=host,
                    req_headers=get_header(api_key),
                    namespace=namespace,
                    build_name="{}-{}".format(build_config_name, str(latest_build_id)),
                )
                # -----------------------------------------------------
                # build_ver | latest_build_id |    ACTION
                # -----------------------------------------------------
                #   0       |       0         |    not possible(SKIP)
                #   x       |      less than x|    not possible(SKIP)
                #   x       |      more than x|    possible(PROCESS)
                # -----------------------------------------------------
                if not (build_ver < latest_build_id):
                    # If no new builds are Running then trigger
                    name = event_obj["metadata"]["name"]
                    bc_name = name[:-2]
                    ver = int(name[-1:])
                    # Do we know of the Build ?
                    if bc_name in object_map:
                        obj = object_map[bc_name]
                        # print(obj)
                        ver = ver + 1
                        obj["spec"]["output"]["to"]["name"] = bc_name + ":" + str(ver)
                        logging.debug(
                            "Adding new BuildConfig with version {} ".format(
                                obj["spec"]["output"]["to"]["name"]
                            )
                        )
                        task_q.put(
                            {
                                "kind": "BuildConfig",
                                "object": obj,
                                "trigger_count": 1,
                                "retrigger": True,
                            }
                        )
                        global_count.increment()
                        logging.debug(
                            "Adding new BuildConfig {} G:{}".format(
                                obj["spec"]["output"]["to"]["name"], global_count
                            )
                        )
                else:
                    logging.debug(
                        "Ignoring {}-{} since {}-{} found.".format(
                            build_config_name,
                            build_ver,
                            build_config_name,
                            latest_build_id,
                        )
                    )
    elif resource_type == "jobs":
        # =========================
        # Process Failed Jobs(init)
        # =========================
        if is_job_failed(event_obj["status"]):
            logging.debug(
                "Ignoring new Job event {}.Let Job EVENT do processing ".format(
                    event_obj["metadata"]["name"]
                )
            )
    elif resource_type == "events":
        if "type" in event_obj:
            if event_obj["object"]["involvedObject"]["name"]:
                # print("New EVENTS Object {} ".format(event_obj['object']['involvedObject']["kind"]))
                # =========================
                # EVENTS of type Pods
                # =========================
                name = event_obj["object"]["involvedObject"]["name"]
                if event_obj["object"]["involvedObject"]["kind"] == "Pod":
                    name = name[: -len("-build")]
                    ver = int(name.rsplit("-", 1)[1])
                    bc_name = name.rsplit("-", 1)[0]
                    status = event_obj["object"]["reason"]
                    logging.debug(
                        "TODO - processing EVENTS Object of type Pod {} with status {}".format(
                            name, status
                        )
                    )

                # =========================
                # EVENTS of type Build
                # =========================
                elif event_obj["object"]["involvedObject"]["kind"] == "Build":
                    name = name
                    ver = int(name.rsplit("-", 1)[1])
                    bc_name = name.rsplit("-", 1)[0]
                    status = event_obj["object"]["reason"]
                    logging.debug(
                        "processing EVENTS Object of type Build; {} with status {}; BuildConfig {}".format(
                            name, status, bc_name
                        )
                    )
                    bc_exist, bc_response = get_buildconfig(
                        req_url=host,
                        req_headers=get_header(api_key),
                        namespace=namespace,
                        build_config_name=bc_name,
                    )

                    if bc_exist:
                        latest_build_version = bc_response["status"]["lastVersion"]
                        latest_build_id = int(latest_build_version)
                        b_exist, build_resp = get_build(
                            req_url=host,
                            req_headers=get_header(api_key),
                            namespace=namespace,
                            build_name="{}-{}".format(bc_name, str(latest_build_id)),
                        )

                        if b_exist:
                            build_status = build_resp.get("status")
                            # if latest build is failed retrigger
                            if is_build_failed(build_status):
                                seen = bloom.add(
                                    [
                                        build_status["config"]["kind"],
                                        build_status["config"]["name"],
                                        build_status["phase"],
                                    ]
                                )
                                if not seen:
                                    logging.debug(
                                        "Build not seen {} Failed-status is {}".format(
                                            bc_name, build_status["phase"]
                                        )
                                    )
                                    latest_build_id += 1
                                    if bc_name in object_map:
                                        obj = object_map[bc_name]
                                        # print(obj)
                                        obj["spec"]["output"]["to"]["name"] = (
                                            bc_name + ":" + str(latest_build_id)
                                        )
                                        logging.debug(
                                            "Adding new BuildConfig to retrigger {} ".format(
                                                obj["spec"]["output"]["to"]["name"]
                                            )
                                        )
                                        task_q.put(
                                            {
                                                "kind": "BuildConfig",
                                                "object": obj,
                                                "trigger_count": 1,
                                                "retrigger": True,
                                            }
                                        )
                                else:
                                    logging.debug(
                                        "Build seen {} Failed-status is {}".format(
                                            bc_name, build_status["phase"]
                                        )
                                    )
                                    build_pod_name = "{}-{}-build".format(
                                        bc_name, latest_build_id
                                    )
                                    pod_exist, logs = get_build_logs(
                                        req_url=host,
                                        req_headers=get_header(api_key),
                                        namespace=namespace,
                                        build_pod=build_pod_name,
                                    )
                                    if report_issue(
                                        bc_name, build_status["phase"], detail=logs
                                    ):
                                        logging.debug(
                                            "The build {} status is {}. A GitHub Issue has been raised.".format(
                                                bc_name, build_status["phase"]
                                            )
                                        )
                                    else:
                                        logging.debug(
                                            "The build {} status is {}. Failed to raise a GitHub Issue. Please contact the admin".format(
                                                bc_name, build_status["phase"]
                                            )
                                        )
                                    if (
                                        pod_exist
                                        and "gpg: keyserver receive failed: Keyserver error"
                                        in logs
                                    ):
                                        obj = object_map[bc_name]
                                        task_q.put(
                                            {
                                                "kind": "BuildConfig",
                                                "object": obj,
                                                "trigger_count": 1,
                                                "retrigger": True,
                                            }
                                        )
                                    else:
                                        global_count.decrement()
                                        logging.debug(
                                            "Build seen {} Failed-status is {} G:{}".format(
                                                bc_name,
                                                build_status["phase"],
                                                global_count,
                                            )
                                        )

                            else:
                                # Build is COMPLETE
                                seen = bloom.add(
                                    [
                                        build_status["config"]["kind"],
                                        build_status["config"]["name"],
                                        build_status["phase"],
                                    ]
                                )
                                if not seen and bc_name in object_map.keys():
                                    # global_count.decrement()
                                    logging.debug(
                                        "{} The Build {} status is {} global_count={}.".format(
                                            task_q.qsize(),
                                            bc_name,
                                            build_status["phase"],
                                            global_count,
                                        )
                                    )
                                    job_name = bc_name.replace("image", "job")
                                    jexist, jresp = get_job(
                                        req_url=host,
                                        req_headers=get_header(api_key),
                                        namespace=namespace,
                                        job_name=job_name,
                                    )
                                    if not jexist:
                                        if job_name in object_map:
                                            job = object_map[job_name]
                                            task_q.put(
                                                {
                                                    "kind": "Job",
                                                    "object": job,
                                                    "trigger_count": 0,
                                                    "retrigger": False,
                                                }
                                            )
                                            global_count.increment()
                                            logging.debug(
                                                "{} The Build->Job {} does not exist.Adding it. G:{}.".format(
                                                    task_q.qsize(),
                                                    job_name,
                                                    global_count,
                                                )
                                            )
                                    else:
                                        job_status = get_job_status(jresp.get("status"))
                                        logging.debug(
                                            "{} The Build->Job {} status is {}.G:{}.".format(
                                                task_q.qsize(),
                                                job_name,
                                                job_status,
                                                global_count,
                                            )
                                        )

                # =========================
                # EVENTS of type Jobs
                # =========================
                elif event_obj["object"]["involvedObject"]["kind"] == "Job":
                    job_name = event_obj["object"]["involvedObject"]["name"]
                    jbool, jresponse = get_job(
                        req_url=host,
                        req_headers=get_header(api_key),
                        namespace=namespace,
                        job_name=job_name,
                    )
                    job_status = get_job_status(jresponse.get("status"))

                    if job_status == "BackoffLimitExceeded":
                        global_count.decrement()
                        # Raising GitHub Issue
                        _, pods_info = get_all_pods(
                            req_url=host,
                            req_headers=get_header(api_key),
                            namespace=namespace,
                        )
                        job_pod_name = get_job_pod(job_name, pods_info)
                        pod_exist, joblogs = get_job_logs(
                            req_url=host,
                            req_headers=get_header(api_key),
                            namespace=namespace,
                            job_pod=job_pod_name,
                        )
                        detail = "Due to BackoffLimitExceeded"
                        if joblogs:
                            detail = joblogs
                        if report_issue(job_name, job_status, detail=detail):
                            logging.debug(
                                "{} The Job {} status is {} global_count={}. A GitHub Issue has been raised.".format(
                                    task_q.qsize(), job_name, job_status, global_count
                                )
                            )
                        else:
                            logging.debug(
                                "{} The Job {} status is {} global_count={}. Failed to raise a GitHub Issue. Please contact the admin".format(
                                    task_q.qsize(), job_name, job_status, global_count
                                )
                            )

                    elif job_status == "Complete":
                        global_count.decrement()
                        logging.debug(
                            "{} The Job {} status is {}. global_count={}.".format(
                                task_q.qsize(), job_name, job_status, global_count
                            )
                        )
                    else:
                        # if active
                        logging.debug(
                            "{} The Job {} status is {}.TODO".format(
                                task_q.qsize(), job_name, job_status
                            )
                        )
Exemple #9
0
def process_taskq(task_q, global_count, object_map):
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()
    if task_q.qsize() == 0:
        return

    created = None
    check_key_exists = lambda mydict, mykey: mydict[
        mykey] if mykey in mydict else ""
    check_bkey_exists = (lambda mydict, mykey: mydict[mykey]
                         if mykey in mydict else False)
    q_item = task_q.get()
    q_resource = check_key_exists(q_item, "object")
    retrigger = check_bkey_exists(q_item, "retrigger")

    # TODO use defaultdict
    if not q_resource and q_resource["kind"] and q_resource["metadata"]["name"]:
        return

    logging.debug("{} processing STARTED. Q-size: {} , G:{}.".format(
        task_q.qsize(), task_q.qsize(), global_count))
    q_resource_name = q_resource["metadata"]["name"]
    q_resource_kind = q_resource["kind"]
    logging.debug(
        "{} processing CREATING; name: {}; kind: {}; retrigger:{}".format(
            task_q.qsize(), q_resource_name, q_resource_kind, retrigger))
    # ==========================
    # BuildConfig
    # ==========================
    if q_resource_kind == "BuildConfig" and not retrigger:
        # The scheduler might have restarted;
        # So, have we built before ?
        build_created, build_response = get_buildconfig(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            build_config_name=q_resource_name,
        )
        if not build_created:
            created = create_buildconfig(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                build_config=q_resource,
            )
            if not created:
                raise Exception("Build could not be created for {}".format(
                    q_resource_name))
            global_count.increment()
        else:
            # We have built this(q_resource) Resource before.

            latest_build_version = build_response["status"]["lastVersion"]
            latest_build_version = str(latest_build_version)
            build_name = q_resource_name + "-" + latest_build_version
            bexist, bresp = get_build(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                build_name=build_name,
            )
            logging.debug("{} The Build {} exists={} version={}. G:{}".format(
                task_q.qsize(),
                q_resource_name,
                bexist,
                latest_build_version,
                global_count,
            ))
            if bexist:
                phase = bresp["status"]["phase"]
                if phase == "Complete":
                    # global_count.decrement()
                    logging.debug("{} The Build {} status is {}. G:{}".format(
                        task_q.qsize(), build_name, phase, global_count))
                    # Lets do the next step associated with this resource
                    # TODO Ask a service what todo next?
                    # ==========================
                    # Create --> Job
                    # ==========================
                    job_name = q_resource_name.replace("image", "job")
                    builder_imagestream = q_resource_name + ":" + "1"
                    job_created, job_response = get_job(
                        req_url=host,
                        req_headers=get_header(api_key),
                        namespace=namespace,
                        job_name=job_name,
                    )
                    job_status = get_job_status(job_response["status"])
                    logging.debug("{} The Job {} status is {}.".format(
                        task_q.qsize(), job_name, job_status))

                    if not job_created:
                        # Add job to Queue

                        # job_template = fill_job_template(application_name=job_name,
                        #                                 builder_imagesream=builder_imagestream,
                        #                                 nb_python_ver=q_item["nb_python_ver"])
                        job_template = object_map[job_name]
                        job_item = {
                            "kind": "Job",
                            "object": job_template,
                            "trigger_count": 0,
                            "retrigger": False,
                        }
                        task_q.task_done()
                        task_q.put(job_item)
                        global_count.increment()
                        logging.debug(
                            "{} processing DONE; ADDED new task: {}; kind: {} G:{}"
                            .format(task_q.qsize(), job_name, "Job",
                                    global_count))
                        return  # donot remove this.
                    else:
                        # JOB was created before the scheduler started.
                        # And also before buildconfig was in the task queue.
                        if job_status == "BackoffLimitExceeded":
                            # Delete the Job
                            logging.debug(
                                "{} processing DONE; deleting existing {}; ".
                                format(task_q.qsize(), job_name))
                            dstate, dresp = delete_job(
                                req_url=host,
                                req_headers=get_header(api_key),
                                namespace=namespace,
                                job_name=job_name,
                            )
                            # if deleted then add job task
                            if dstate:
                                # Add job to Queue
                                # job_template = fill_job_template(application_name=job_name,
                                #                                  builder_imagesream=builder_imagestream,
                                #                                  nb_python_ver=q_item["nb_python_ver"])
                                job_template = object_map[job_name]
                                job_item = {
                                    "kind": "Job",
                                    "object": job_template,
                                    "trigger_count": 0,
                                    "retrigger": False,
                                }
                                task_q.task_done()
                                task_q.put(job_item)
                                global_count.increment()
                                logging.debug(
                                    "{} processing DONE; ADDED kind: {}; new name: {}; G:{}"
                                    .format(task_q.qsize(), "Job", job_name,
                                            global_count))
                                return  # donot remove this.
                        elif job_status == "ACTIVE":
                            global_count.increment()
                            logging.debug(
                                "{} processing DONE; Job is already {}. G:{}; "
                                .format(task_q.qsize(), job_status,
                                        global_count))
                        else:
                            logging.debug(
                                "{} The Job {} status is {}.- Trusting-1 Event Thread to do follow up actions"
                                .format(task_q.qsize(), job_name, job_status))
                elif phase == "Failed":
                    logging.debug(
                        "{} The Build {} retriggered since status={}. G:{}".
                        format(task_q.qsize(), build_name, phase,
                               global_count))
                    trigger_build(
                        req_url=host,
                        req_headers=get_header(api_key),
                        namespace=namespace,
                        build_resource=q_resource,
                    )
                else:
                    logging.debug("{} The Build {} status is {}. TODO".format(
                        task_q.qsize(), q_resource_name, phase))
    elif q_resource_kind == "BuildConfig" and retrigger:
        build_created, build_response = get_buildconfig(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            build_config_name=q_resource_name,
        )
        latest_build_version = build_response["status"]["lastVersion"]
        latest_build_version = str(latest_build_version)
        build_name = q_resource_name + "-" + latest_build_version
        bexist, bresp = get_build(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            build_name=build_name,
        )
        logging.debug("{} The Build {} exists={} version={}. G:{}".format(
            task_q.qsize(),
            q_resource_name,
            bexist,
            latest_build_version,
            global_count,
        ))

        if bexist:
            phase = bresp["status"]["phase"]
            if phase != "ACTIVE":
                trigger_build(
                    req_url=host,
                    req_headers=get_header(api_key),
                    namespace=namespace,
                    build_resource=q_resource,
                )
        else:
            trigger_build(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                build_resource=q_resource,
            )
    # ==========================
    # ImageStream
    # ==========================
    elif q_resource_kind == "ImageStream":
        if not get_imagestream(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                imagestream_name=q_resource_name,
        ):
            created = create_imagestream(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                imagestream=q_resource,
            )
            if not created:
                raise Exception(
                    "Image {} could not be created.".format(q_resource_name))
    # ==========================
    # Job
    # ==========================
    elif q_resource_kind == "Job":
        job_created, job_response = get_job(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            job_name=q_resource_name,
        )
        job_status = get_job_status(job_response["status"])
        if not job_created:
            created = create_job(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                job_name=q_resource,
            )
            if not created:
                raise Exception(
                    "Job {} could not be created.".format(q_resource_name))
        else:
            # JOB was created before the scheduler started.
            logging.debug(
                "{} The Job {} status is {}.Trusting Event Thread to do follow up actions."
                .format(task_q.qsize(), q_resource_name, job_status))

    else:
        logging.debug("{} processing unknown resource : {}".format(
            task_q.qsize(), str(q_resource_kind)))
    task_q.task_done()
    logging.debug("{} processing DONE for {}".format(task_q.qsize(),
                                                     q_resource_name))
    return created