def event_loop_init(bloom, object_map, task_q, global_count): builds = "builds" host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() past_builds = test_endpoint( host=host, req_headers=get_header(api_key), namespace=namespace, resource=builds ) logging.debug("PAST BUILDS : {}".format(len(past_builds.json()["items"]))) for pbuild in past_builds.json()["items"]: if is_value_in_label( pbuild["metadata"]["labels"], object_map[PLUGIN_BUILD_CONFIG_LABEL] ): mkey, mstatus, mcount = add_build_to_map(build=pbuild, map=bloom) logging.debug( "BUILDS : seen-before: {} {} B:{} G:{}".format( mstatus, mkey, mcount, global_count ) ) process_new_event("builds", pbuild, bloom, object_map, task_q, global_count) jobs = "jobs" _, past_jobs = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace ) logging.debug("PAST Jobs : {}".format(len(past_jobs["items"]))) for pjob in past_jobs["items"]: if is_value_in_label(pjob["metadata"]["labels"], object_map[PLUGIN_JOB_LABEL]): mkey, mstatus, mcount = add_job_to_map(job=pjob, map=bloom) logging.debug( "JOBS : seen-before: {} {} B:{} G:{}".format( mstatus, mkey, mcount, global_count ) ) process_new_event("jobs", pjob, bloom, object_map, task_q, global_count)
def do_resource_requests_check(task_q, quota_name): host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() if task_q.qsize() == 0: quota_available, avail_mem, avail_cpu = is_resource_available( req_url=host, req_headers=get_header(api_key), namespace=namespace, quota_name=quota_name, resource_mem=0, resource_cpu=0, ) logging.debug( "[{}] quota_available={}, avail_mem={}, avail_cpu={}".format( task_q.qsize(), quota_available, avail_mem, avail_cpu)) return quota_available, avail_cpu, avail_mem else: # This is like peek() instead of qq.get() item = task_q.queue[0] resource = item["object"] mem_requested = None cpu_requested = None avail_mem = None avail_cpu = None quota_available = True spec = None if resource and resource != "-1" and resource["kind"] == "Job": # TODO a Job can have many containers. Find for each container. spec = resource["spec"]["template"]["spec"]["containers"][0] elif resource and resource != "-1" and resource[ "kind"] == "BuildConfig": spec = resource.get("spec", None) if (spec and "resources" in spec and ("requests" in spec["resources"]) and (spec["resources"]["requests"]["cpu"] or spec["resources"]["requests"]["memory"])): mem_requested = spec["resources"]["limits"]["memory"] cpu_requested = spec["resources"]["limits"]["cpu"] logging.debug( "[{}] mem_requested={}; cpu_requested={} by {} ".format( task_q.qsize(), mem_requested, cpu_requested, resource["metadata"]["name"], )) if mem_requested and cpu_requested: quota_available, avail_mem, avail_cpu = is_resource_available( req_url=host, req_headers=get_header(api_key), namespace=namespace, quota_name=quota_name, resource_mem=mem_requested, resource_cpu=cpu_requested, ) logging.debug( "[{}] quota_available={}, avail_mem={}, avail_cpu={}".format( task_q.qsize(), quota_available, avail_mem, avail_cpu)) return quota_available, avail_cpu, avail_mem
def add_event_job_to_map(bloom, event): host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() job_exist, jresponse = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=event["object"]["involvedObject"]["name"], ) if not job_exist: message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ) seen_before = bloom.add( [ event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ] ) return message, seen_before, bloom.count job_status = get_job_status(jresponse.get("status")) seen_before = bloom.add( [ event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], jresponse["metadata"]["resourceVersion"], job_status, ] ) message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], jresponse["metadata"]["resourceVersion"], job_status, ) if not seen_before: message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ) seen_before = bloom.add( [ event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ] ) return message, seen_before, bloom.count else: return message, seen_before, bloom.count
def add_event_build_to_map(bloom, event): host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() build_exist, bresponse = get_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_name=event["object"]["involvedObject"]["name"], ) if not build_exist: message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ) # seen_before = bloom.add([event['object']['involvedObject']['kind'], event['object']['involvedObject']['name'], # event['object']['involvedObject']['resourceVersion'], # event['object']['reason']]) return message, True, bloom.count else: build_status = get_build_status(bresponse.get("status")) seen_before = bloom.add( [ event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], bresponse["metadata"]["resourceVersion"], build_status, ] ) message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], bresponse["metadata"]["resourceVersion"], build_status, ) if not seen_before: message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ) seen_before = bloom.add( [ event["object"]["involvedObject"]["kind"], event["object"]["involvedObject"]["name"], event["object"]["involvedObject"]["resourceVersion"], event["object"]["reason"], ] ) return message, seen_before, bloom.count else: return message, seen_before, bloom.count
def add_job_to_map(job, map): host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() job_exists, jresponse = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=job["metadata"]["name"], ) if job_exists: job_status = get_job_status(jresponse.get("status")) seen_before = map.add( [ "Job", job["metadata"]["name"], jresponse["metadata"]["resourceVersion"], job_status, ] ) message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( "Job", job["metadata"]["name"], jresponse["metadata"]["resourceVersion"], job_status, ) logging.debug( "JOBS :*seen-before: {} {} {} ".format(seen_before, message, map.count) ) if not seen_before: message = "Kind: {0}; Name: {1}; version:{2}; reason:{3}".format( "Job", job["metadata"]["name"], job["metadata"]["resourceVersion"], get_job_status(job["status"]), ) seen_before = map.add( [ "Job", job["metadata"]["name"], job["metadata"]["resourceVersion"], get_job_status(job["status"]), ] ) return message, seen_before, map.count else: return message, seen_before, map.count else: return "", True, map.count
def main(token_file=None, cert_file=None, config_file=None): logging.basicConfig(level=logging.DEBUG, format="(%(threadName)-9s) %(message)s") # logging.getLogger("requests").setLevel(logging.CRITICAL) logging.getLogger("urllib3").setLevel(logging.CRITICAL) kubernetes_verify_tls = get_param("KUBERNETES_VERIFY_TLS", None, "0") kubernetes_verify_tls = bool(int(kubernetes_verify_tls)) # Load in-cluster configuration that is exposed by OpenShift/k8s configuration. InClusterConfigLoader( token_filename=_get_incluster_token_file(token_file=token_file), cert_filename=_get_incluster_ca_file(ca_file=cert_file), environ=os.environ, ).load_and_set() # We need to explicitly set whether we want to verify SSL/TLS connection to the master. configuration = client.Configuration() configuration.verify_ssl = kubernetes_verify_tls ocp_client = DynamicClient(client.ApiClient(configuration=configuration)) host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() plugin = TensorflowBuildPlugin() # login_checks = [check_none(v) for v in [OCP_URL, DEFAULT_NAMESPACE, ACCESS_TOKEN]] # if not all(login_checks): # raise Exception("Release Trigger can't start! OCP credentials are not provided!") # TODO may use config.json or use CRD # Load BUILD_MAP build_map = os.getenv(ENV_BUILD_MAP, "{}") build_map = json.loads(build_map) if str(build_map) == "{}": build_map = load_json_file(config_file) if not build_map: raise Exception("No BUILD_MAP loaded.Nothing todo") imagestream_list = [] buildconfig_list = [] job_list = [] object_map = {} object_map.update(plugin.get_labels_dict()) # Process BUILD_MAP for py_version, os_details in build_map.items(): for os_version, image_details in os_details.items(): try: application_build_name = "tf-{}-build-image-{}".format( os_version.lower(), py_version.replace(".", "")) application_name = "tf-{}-build-job-{}".format( os_version.lower(), py_version.replace(".", "")) builder_imagestream = "{}:{}".format(application_build_name, DEFAULT_IMAGE_VERSION) nb_python_ver = py_version docker_file_path = "Dockerfile.{}".format(os_version.lower()) logging.debug( "-------------------VARIABLES-------------------------") logging.debug("APPLICATION_BUILD_NAME: {}".format( application_build_name)) logging.debug("APPLICATION_NAME: {}".format(application_name)) logging.debug( "BUILDER_IMAGESTREAM: {}".format(builder_imagestream)) logging.debug("PYTHON VERSION: {}".format(nb_python_ver)) logging.debug("DOCKERFILE: {}".format(docker_file_path)) for var_key, var_val in image_details.items(): # self.__dict__[var_key] = var_val logging.debug("{}: {}".format(var_key, var_val)) logging.debug( "-----------------------------------------------------") imagestream_template = plugin.fill_imagestream_template( ims_name=application_build_name) imagestream_list.append({ "kind": "ImageStream", "object": imagestream_template, "trigger_count": 0, "retrigger": False, }) job_template = plugin.fill_job_template1( application_name=application_name, builder_imagestream=builder_imagestream, nb_python_ver=nb_python_ver, image_details=image_details, ) object_map[application_name] = job_template job_list.append(job_template) build_template = plugin.fill_buildconfig_template1( build_name=application_build_name, docker_file_path=docker_file_path, nb_python_ver=nb_python_ver, image_details=image_details, ) object_map[application_build_name] = build_template buildconfig_list.append({ "kind": "BuildConfig", "object": build_template, "trigger_count": 0, "retrigger": False, "application_name": application_name, "builder_imagestream": builder_imagestream, "nb_python_ver": nb_python_ver, }) except Exception as e: logging.error("Exception: ", e) logging.error( "Error in Tensorflow Build or Job trigger! Please refer the above log, Starting the next " "one in queue!") for ims in imagestream_list: ims_name = ims["object"]["metadata"]["name"] ims_exist, ims_response = get_imagestream( req_url=host, req_headers=get_header(api_key), namespace=namespace, imagestream_name=ims_name, ) if not ims_exist: generated_img = create_imagestream( req_url=host, req_headers=get_header(api_key), namespace=namespace, imagestream=ims["object"], ) if not generated_img: raise Exception( "Image could not be generated for {}".format(ims_name)) quota_event = threading.Condition() done_event = threading.Event() global_count = ResourceCounter() task_q = Queue(maxsize=1000) bloom = BloomFilter(10000, 0.001) # TODO TFBuildConfig OpenBlasBuildConfig, numpy for y in buildconfig_list: task_q.put(y) # global_count.set_val(task_q.qsize()) logging.debug("Q size {}".format(task_q.qsize())) quota_name = get_param("QUOTA_NAME", None, DEFAULT_QUOTA_NAME) quota_thread = threading.Thread( name="quota-thread", target=quota_check, args=(quota_name, quota_event, done_event, task_q, global_count), ) resource_thread = threading.Thread( name="resource-thread", target=create_resource, args=(quota_event, done_event, task_q, global_count, object_map), ) event_thread = threading.Thread( name="event-thread", target=event_loop, args=("events", bloom, object_map, task_q, global_count), ) # event_thread.daemon = True event_thread.start() time.sleep(3) quota_thread.start() resource_thread.start() event_thread.join() resource_thread.join() quota_thread.join() logging.debug("END")
def process_new_event( resource_type, event_obj, bloom, object_map, task_q, global_count ): host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() if resource_type == "builds": # ========================= # Process Failed Builds(init) # ========================= if is_build_failed(event_obj["status"]): build_config_name = get_value_in_label( event_obj["metadata"]["labels"], "appName" ) build_ver = int(event_obj["metadata"]["name"][-1:]) bc_exist, bc_response = get_buildconfig( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_config_name=build_config_name, ) if bc_exist: latest_build_version = bc_response["status"]["lastVersion"] latest_build_id = int(latest_build_version) b_exist, build_resp = get_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_name="{}-{}".format(build_config_name, str(latest_build_id)), ) # ----------------------------------------------------- # build_ver | latest_build_id | ACTION # ----------------------------------------------------- # 0 | 0 | not possible(SKIP) # x | less than x| not possible(SKIP) # x | more than x| possible(PROCESS) # ----------------------------------------------------- if not (build_ver < latest_build_id): # If no new builds are Running then trigger name = event_obj["metadata"]["name"] bc_name = name[:-2] ver = int(name[-1:]) # Do we know of the Build ? if bc_name in object_map: obj = object_map[bc_name] # print(obj) ver = ver + 1 obj["spec"]["output"]["to"]["name"] = bc_name + ":" + str(ver) logging.debug( "Adding new BuildConfig with version {} ".format( obj["spec"]["output"]["to"]["name"] ) ) task_q.put( { "kind": "BuildConfig", "object": obj, "trigger_count": 1, "retrigger": True, } ) global_count.increment() logging.debug( "Adding new BuildConfig {} G:{}".format( obj["spec"]["output"]["to"]["name"], global_count ) ) else: logging.debug( "Ignoring {}-{} since {}-{} found.".format( build_config_name, build_ver, build_config_name, latest_build_id, ) ) elif resource_type == "jobs": # ========================= # Process Failed Jobs(init) # ========================= if is_job_failed(event_obj["status"]): logging.debug( "Ignoring new Job event {}.Let Job EVENT do processing ".format( event_obj["metadata"]["name"] ) ) elif resource_type == "events": if "type" in event_obj: if event_obj["object"]["involvedObject"]["name"]: # print("New EVENTS Object {} ".format(event_obj['object']['involvedObject']["kind"])) # ========================= # EVENTS of type Pods # ========================= name = event_obj["object"]["involvedObject"]["name"] if event_obj["object"]["involvedObject"]["kind"] == "Pod": name = name[: -len("-build")] ver = int(name.rsplit("-", 1)[1]) bc_name = name.rsplit("-", 1)[0] status = event_obj["object"]["reason"] logging.debug( "TODO - processing EVENTS Object of type Pod {} with status {}".format( name, status ) ) # ========================= # EVENTS of type Build # ========================= elif event_obj["object"]["involvedObject"]["kind"] == "Build": name = name ver = int(name.rsplit("-", 1)[1]) bc_name = name.rsplit("-", 1)[0] status = event_obj["object"]["reason"] logging.debug( "processing EVENTS Object of type Build; {} with status {}; BuildConfig {}".format( name, status, bc_name ) ) bc_exist, bc_response = get_buildconfig( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_config_name=bc_name, ) if bc_exist: latest_build_version = bc_response["status"]["lastVersion"] latest_build_id = int(latest_build_version) b_exist, build_resp = get_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_name="{}-{}".format(bc_name, str(latest_build_id)), ) if b_exist: build_status = build_resp.get("status") # if latest build is failed retrigger if is_build_failed(build_status): seen = bloom.add( [ build_status["config"]["kind"], build_status["config"]["name"], build_status["phase"], ] ) if not seen: logging.debug( "Build not seen {} Failed-status is {}".format( bc_name, build_status["phase"] ) ) latest_build_id += 1 if bc_name in object_map: obj = object_map[bc_name] # print(obj) obj["spec"]["output"]["to"]["name"] = ( bc_name + ":" + str(latest_build_id) ) logging.debug( "Adding new BuildConfig to retrigger {} ".format( obj["spec"]["output"]["to"]["name"] ) ) task_q.put( { "kind": "BuildConfig", "object": obj, "trigger_count": 1, "retrigger": True, } ) else: logging.debug( "Build seen {} Failed-status is {}".format( bc_name, build_status["phase"] ) ) build_pod_name = "{}-{}-build".format( bc_name, latest_build_id ) pod_exist, logs = get_build_logs( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_pod=build_pod_name, ) if report_issue( bc_name, build_status["phase"], detail=logs ): logging.debug( "The build {} status is {}. A GitHub Issue has been raised.".format( bc_name, build_status["phase"] ) ) else: logging.debug( "The build {} status is {}. Failed to raise a GitHub Issue. Please contact the admin".format( bc_name, build_status["phase"] ) ) if ( pod_exist and "gpg: keyserver receive failed: Keyserver error" in logs ): obj = object_map[bc_name] task_q.put( { "kind": "BuildConfig", "object": obj, "trigger_count": 1, "retrigger": True, } ) else: global_count.decrement() logging.debug( "Build seen {} Failed-status is {} G:{}".format( bc_name, build_status["phase"], global_count, ) ) else: # Build is COMPLETE seen = bloom.add( [ build_status["config"]["kind"], build_status["config"]["name"], build_status["phase"], ] ) if not seen and bc_name in object_map.keys(): # global_count.decrement() logging.debug( "{} The Build {} status is {} global_count={}.".format( task_q.qsize(), bc_name, build_status["phase"], global_count, ) ) job_name = bc_name.replace("image", "job") jexist, jresp = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=job_name, ) if not jexist: if job_name in object_map: job = object_map[job_name] task_q.put( { "kind": "Job", "object": job, "trigger_count": 0, "retrigger": False, } ) global_count.increment() logging.debug( "{} The Build->Job {} does not exist.Adding it. G:{}.".format( task_q.qsize(), job_name, global_count, ) ) else: job_status = get_job_status(jresp.get("status")) logging.debug( "{} The Build->Job {} status is {}.G:{}.".format( task_q.qsize(), job_name, job_status, global_count, ) ) # ========================= # EVENTS of type Jobs # ========================= elif event_obj["object"]["involvedObject"]["kind"] == "Job": job_name = event_obj["object"]["involvedObject"]["name"] jbool, jresponse = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=job_name, ) job_status = get_job_status(jresponse.get("status")) if job_status == "BackoffLimitExceeded": global_count.decrement() # Raising GitHub Issue _, pods_info = get_all_pods( req_url=host, req_headers=get_header(api_key), namespace=namespace, ) job_pod_name = get_job_pod(job_name, pods_info) pod_exist, joblogs = get_job_logs( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_pod=job_pod_name, ) detail = "Due to BackoffLimitExceeded" if joblogs: detail = joblogs if report_issue(job_name, job_status, detail=detail): logging.debug( "{} The Job {} status is {} global_count={}. A GitHub Issue has been raised.".format( task_q.qsize(), job_name, job_status, global_count ) ) else: logging.debug( "{} The Job {} status is {} global_count={}. Failed to raise a GitHub Issue. Please contact the admin".format( task_q.qsize(), job_name, job_status, global_count ) ) elif job_status == "Complete": global_count.decrement() logging.debug( "{} The Job {} status is {}. global_count={}.".format( task_q.qsize(), job_name, job_status, global_count ) ) else: # if active logging.debug( "{} The Job {} status is {}.TODO".format( task_q.qsize(), job_name, job_status ) )
def process_taskq(task_q, global_count, object_map): host = client.Configuration().host api_key = client.Configuration().api_key namespace = get_namespace() if task_q.qsize() == 0: return created = None check_key_exists = lambda mydict, mykey: mydict[ mykey] if mykey in mydict else "" check_bkey_exists = (lambda mydict, mykey: mydict[mykey] if mykey in mydict else False) q_item = task_q.get() q_resource = check_key_exists(q_item, "object") retrigger = check_bkey_exists(q_item, "retrigger") # TODO use defaultdict if not q_resource and q_resource["kind"] and q_resource["metadata"]["name"]: return logging.debug("{} processing STARTED. Q-size: {} , G:{}.".format( task_q.qsize(), task_q.qsize(), global_count)) q_resource_name = q_resource["metadata"]["name"] q_resource_kind = q_resource["kind"] logging.debug( "{} processing CREATING; name: {}; kind: {}; retrigger:{}".format( task_q.qsize(), q_resource_name, q_resource_kind, retrigger)) # ========================== # BuildConfig # ========================== if q_resource_kind == "BuildConfig" and not retrigger: # The scheduler might have restarted; # So, have we built before ? build_created, build_response = get_buildconfig( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_config_name=q_resource_name, ) if not build_created: created = create_buildconfig( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_config=q_resource, ) if not created: raise Exception("Build could not be created for {}".format( q_resource_name)) global_count.increment() else: # We have built this(q_resource) Resource before. latest_build_version = build_response["status"]["lastVersion"] latest_build_version = str(latest_build_version) build_name = q_resource_name + "-" + latest_build_version bexist, bresp = get_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_name=build_name, ) logging.debug("{} The Build {} exists={} version={}. G:{}".format( task_q.qsize(), q_resource_name, bexist, latest_build_version, global_count, )) if bexist: phase = bresp["status"]["phase"] if phase == "Complete": # global_count.decrement() logging.debug("{} The Build {} status is {}. G:{}".format( task_q.qsize(), build_name, phase, global_count)) # Lets do the next step associated with this resource # TODO Ask a service what todo next? # ========================== # Create --> Job # ========================== job_name = q_resource_name.replace("image", "job") builder_imagestream = q_resource_name + ":" + "1" job_created, job_response = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=job_name, ) job_status = get_job_status(job_response["status"]) logging.debug("{} The Job {} status is {}.".format( task_q.qsize(), job_name, job_status)) if not job_created: # Add job to Queue # job_template = fill_job_template(application_name=job_name, # builder_imagesream=builder_imagestream, # nb_python_ver=q_item["nb_python_ver"]) job_template = object_map[job_name] job_item = { "kind": "Job", "object": job_template, "trigger_count": 0, "retrigger": False, } task_q.task_done() task_q.put(job_item) global_count.increment() logging.debug( "{} processing DONE; ADDED new task: {}; kind: {} G:{}" .format(task_q.qsize(), job_name, "Job", global_count)) return # donot remove this. else: # JOB was created before the scheduler started. # And also before buildconfig was in the task queue. if job_status == "BackoffLimitExceeded": # Delete the Job logging.debug( "{} processing DONE; deleting existing {}; ". format(task_q.qsize(), job_name)) dstate, dresp = delete_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=job_name, ) # if deleted then add job task if dstate: # Add job to Queue # job_template = fill_job_template(application_name=job_name, # builder_imagesream=builder_imagestream, # nb_python_ver=q_item["nb_python_ver"]) job_template = object_map[job_name] job_item = { "kind": "Job", "object": job_template, "trigger_count": 0, "retrigger": False, } task_q.task_done() task_q.put(job_item) global_count.increment() logging.debug( "{} processing DONE; ADDED kind: {}; new name: {}; G:{}" .format(task_q.qsize(), "Job", job_name, global_count)) return # donot remove this. elif job_status == "ACTIVE": global_count.increment() logging.debug( "{} processing DONE; Job is already {}. G:{}; " .format(task_q.qsize(), job_status, global_count)) else: logging.debug( "{} The Job {} status is {}.- Trusting-1 Event Thread to do follow up actions" .format(task_q.qsize(), job_name, job_status)) elif phase == "Failed": logging.debug( "{} The Build {} retriggered since status={}. G:{}". format(task_q.qsize(), build_name, phase, global_count)) trigger_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_resource=q_resource, ) else: logging.debug("{} The Build {} status is {}. TODO".format( task_q.qsize(), q_resource_name, phase)) elif q_resource_kind == "BuildConfig" and retrigger: build_created, build_response = get_buildconfig( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_config_name=q_resource_name, ) latest_build_version = build_response["status"]["lastVersion"] latest_build_version = str(latest_build_version) build_name = q_resource_name + "-" + latest_build_version bexist, bresp = get_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_name=build_name, ) logging.debug("{} The Build {} exists={} version={}. G:{}".format( task_q.qsize(), q_resource_name, bexist, latest_build_version, global_count, )) if bexist: phase = bresp["status"]["phase"] if phase != "ACTIVE": trigger_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_resource=q_resource, ) else: trigger_build( req_url=host, req_headers=get_header(api_key), namespace=namespace, build_resource=q_resource, ) # ========================== # ImageStream # ========================== elif q_resource_kind == "ImageStream": if not get_imagestream( req_url=host, req_headers=get_header(api_key), namespace=namespace, imagestream_name=q_resource_name, ): created = create_imagestream( req_url=host, req_headers=get_header(api_key), namespace=namespace, imagestream=q_resource, ) if not created: raise Exception( "Image {} could not be created.".format(q_resource_name)) # ========================== # Job # ========================== elif q_resource_kind == "Job": job_created, job_response = get_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=q_resource_name, ) job_status = get_job_status(job_response["status"]) if not job_created: created = create_job( req_url=host, req_headers=get_header(api_key), namespace=namespace, job_name=q_resource, ) if not created: raise Exception( "Job {} could not be created.".format(q_resource_name)) else: # JOB was created before the scheduler started. logging.debug( "{} The Job {} status is {}.Trusting Event Thread to do follow up actions." .format(task_q.qsize(), q_resource_name, job_status)) else: logging.debug("{} processing unknown resource : {}".format( task_q.qsize(), str(q_resource_kind))) task_q.task_done() logging.debug("{} processing DONE for {}".format(task_q.qsize(), q_resource_name)) return created