def process_and_update_elastic_doc(elastic_object, message_type, message_body, request_id, headers, index_name): if message_type == 'unknown': print('UNKNOWN REQUEST TYPE FOR ' + request_id + ' - NOT PROCESSING') sys.stdout.flush() #first do any needed transformations new_content_part = process_content(message_type, message_body) #set metadata specific to this part (request or response) log_helper.field_from_header(content=new_content_part, header_name='ce-time', headers=headers) log_helper.field_from_header(content=new_content_part, header_name='ce-source', headers=headers) upsert_body = { "doc_as_upsert": True, "doc": { message_type: new_content_part } } log_helper.set_metadata(upsert_body['doc'], headers, message_type, request_id) new_content = elastic_object.update(index=index_name, doc_type=log_helper.DOC_TYPE_NAME, id=request_id, body=upsert_body, retry_on_conflict=3, refresh=True, timeout="60s") print('upserted to doc ' + index_name + "/" + log_helper.DOC_TYPE_NAME + "/" + request_id + ' adding ' + message_type) sys.stdout.flush() return str(new_content)
def process_and_update_elastic_doc(elastic_object, message_type, message_body, request_id, headers, index_name): if message_type == 'unknown': print('UNKNOWN REQUEST TYPE FOR ' + request_id + ' - NOT PROCESSING') sys.stdout.flush() #first do any needed transformations new_content_part = process_content(message_type, message_body) #set metadata to go just in this part (request or response) and not top-level log_helper.field_from_header(content=new_content_part, header_name='ce-time', headers=headers) log_helper.field_from_header(content=new_content_part, header_name='ce-source', headers=headers) doc_body = {message_type: new_content_part} log_helper.set_metadata(doc_body, headers, message_type, request_id) # req or res might be batches of instances so split out into individual docs if "instance" in new_content_part: if type(new_content_part["instance"]) == type([]): #if we've a list then this is batch #we assume first dimension is always batch no_items_in_batch = len(new_content_part["instance"]) index = 0 for item in new_content_part["instance"]: item_body = doc_body.copy() item_body[message_type]['instance'] = item item_request_id = build_request_id_batched( request_id, no_items_in_batch, index) upsert_doc_to_elastic(elastic_object, message_type, item_body, item_request_id, index_name) index = index + 1 else: item_request_id = build_request_id_batched(request_id, 1, 0) upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name) elif "data" in new_content_part and message_type == 'outlier': no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"]) index = 0 for item in doc_body[message_type]["data"]["is_outlier"]: item_body = doc_body.copy() item_body[message_type]["data"]["is_outlier"] = item if "feature_score" in item_body[message_type]["data"] and item_body[ message_type]["data"]["feature_score"] is not None and len( item_body[message_type]["data"] ["feature_score"]) == no_items_in_batch: item_body[message_type]["data"]["feature_score"] = item_body[ message_type]["data"]["feature_score"][index] if "instance_score" in item_body[message_type][ "data"] and item_body[message_type]["data"][ "instance_score"] is not None and len( item_body[message_type]["data"] ["instance_score"]) == no_items_in_batch: item_body[message_type]["data"]["instance_score"] = item_body[ message_type]["data"]["instance_score"][index] item_request_id = build_request_id_batched(request_id, no_items_in_batch, index) upsert_doc_to_elastic(elastic_object, message_type, item_body, item_request_id, index_name) index = index + 1 else: print('unexpected data format') print(new_content_part) return
def process_and_update_elastic_doc(elastic_object, message_type, message_body, request_id, headers, index_name): if message_type == "unknown": print("UNKNOWN REQUEST TYPE FOR " + request_id + " - NOT PROCESSING") sys.stdout.flush() # first do any needed transformations new_content_part = process_content(message_type, message_body, headers) # set metadata to go just in this part (request or response) and not top-level log_helper.field_from_header(content=new_content_part, header_name="ce-time", headers=headers) log_helper.field_from_header(content=new_content_part, header_name="ce-source", headers=headers) doc_body = {message_type: new_content_part} log_helper.set_metadata(doc_body, headers, message_type, request_id) # req or res might be batches of instances so split out into individual docs if "instance" in new_content_part: if type(new_content_part["instance"]) == type([]): # if we've a list then this is batch # we assume first dimension is always batch no_items_in_batch = len(new_content_part["instance"]) index = 0 elements = None if "elements" in new_content_part: elements = new_content_part["elements"] for num, item in enumerate(new_content_part["instance"], start=0): item_body = doc_body.copy() item_body[message_type]["instance"] = item if type(elements) == type([]) and len(elements) > num: item_body[message_type]["elements"] = elements[num] item_request_id = build_request_id_batched( request_id, no_items_in_batch, index) upsert_doc_to_elastic(elastic_object, message_type, item_body, item_request_id, index_name) index = index + 1 else: #not batch so don't batch elements either if "elements" in new_content_part and type( new_content_part["elements"]) == type([]): new_content_part["elements"] = new_content_part["elements"][0] item_request_id = build_request_id_batched(request_id, 1, 0) upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name) elif message_type == "feedback": item_request_id = build_request_id_batched(request_id, 1, 0) upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name) elif "data" in new_content_part and message_type == "outlier": no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"]) index = 0 for item in doc_body[message_type]["data"]["is_outlier"]: item_body = doc_body.copy() item_body[message_type]["data"]["is_outlier"] = item if ("feature_score" in item_body[message_type]["data"] and item_body[message_type]["data"]["feature_score"] is not None and len(item_body[message_type]["data"]["feature_score"]) == no_items_in_batch): item_body[message_type]["data"]["feature_score"] = item_body[ message_type]["data"]["feature_score"][index] if ("instance_score" in item_body[message_type]["data"] and item_body[message_type]["data"]["instance_score"] is not None and len(item_body[message_type]["data"]["instance_score"]) == no_items_in_batch): item_body[message_type]["data"]["instance_score"] = item_body[ message_type]["data"]["instance_score"][index] item_request_id = build_request_id_batched(request_id, no_items_in_batch, index) upsert_doc_to_elastic(elastic_object, message_type, item_body, item_request_id, index_name) index = index + 1 else: print("unexpected data format") print(new_content_part) return
def process_and_update_elastic_doc( elastic_object, message_type, message_body, request_id, headers, index_name ): added_content = [] if message_type == "unknown": print("UNKNOWN REQUEST TYPE FOR " + request_id + " - NOT PROCESSING") sys.stdout.flush() # first do any needed transformations new_content_part = process_content(message_type, message_body, headers) # set metadata to go just in this part (request or response) and not top-level log_helper.field_from_header( content=new_content_part, header_name="ce-time", headers=headers ) log_helper.field_from_header( content=new_content_part, header_name="ce-source", headers=headers ) doc_body = {message_type: new_content_part} log_helper.set_metadata(doc_body, headers, message_type, request_id) # req or res might be batches of instances so split out into individual docs if "instance" in new_content_part: if log_helper.is_reference_data(headers): index_name = log_helper.build_index_name(headers, prefix="reference", suffix=False) # Ignore payload for reference data doc_body[message_type].pop("payload", None) if type(new_content_part["instance"]) == type([]) and not (new_content_part["dataType"] == "json"): # if we've a list then this is batch # we assume first dimension is always batch bulk_upsert_doc_to_elastic(elastic_object, message_type, doc_body, doc_body[message_type].copy(), request_id, index_name) else: #not batch so don't batch elements either if "elements" in new_content_part and type(new_content_part["elements"]) == type([]): new_content_part["elements"] = new_content_part["elements"][0] item_request_id = build_request_id_batched(request_id, 1, 0) added_content.append(upsert_doc_to_elastic( elastic_object, message_type, doc_body, item_request_id, index_name )) elif message_type == "feedback": item_request_id = build_request_id_batched(request_id, 1, 0) upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name) elif "data" in new_content_part and message_type == "outlier": no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"]) index = 0 for item in doc_body[message_type]["data"]["is_outlier"]: item_body = doc_body.copy() item_body[message_type]["data"]["is_outlier"] = item if ( "feature_score" in item_body[message_type]["data"] and item_body[message_type]["data"]["feature_score"] is not None and len(item_body[message_type]["data"]["feature_score"]) == no_items_in_batch ): item_body[message_type]["data"]["feature_score"] = item_body[ message_type ]["data"]["feature_score"][index] if ( "instance_score" in item_body[message_type]["data"] and item_body[message_type]["data"]["instance_score"] is not None and len(item_body[message_type]["data"]["instance_score"]) == no_items_in_batch ): item_body[message_type]["data"]["instance_score"] = item_body[ message_type ]["data"]["instance_score"][index] item_request_id = build_request_id_batched( request_id, no_items_in_batch, index ) upsert_doc_to_elastic( elastic_object, message_type, item_body, item_request_id, index_name ) index = index + 1 elif "data" in new_content_part and message_type == "drift": item_body = doc_body.copy() namespace = log_helper.get_header(log_helper.NAMESPACE_HEADER_NAME, headers) inferenceservice_name = log_helper.get_header(log_helper.INFERENCESERVICE_HEADER_NAME, headers) endpoint_name = log_helper.get_header(log_helper.ENDPOINT_HEADER_NAME, headers) serving_engine = log_helper.serving_engine(headers) item_body[message_type]["data"]["is_drift"] = bool(item_body[message_type]["data"]["is_drift"]) item_body[message_type]["data"]["drift_type"] = "batch" if ( "distance" in item_body[message_type]["data"] and item_body[message_type]["data"]["distance"] is not None and isinstance(item_body[message_type]["data"]["distance"], list) ): content_dist = np.array(item_body[message_type]["data"]["distance"]) x = np.expand_dims(content_dist, axis=0) item_body[message_type]["data"]["drift_type"] = "feature" elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True) if type(elements) == type([]): elements = elements[0] item_body[message_type]["data"]["feature_distance"] = elements del item_body[message_type]["data"]["distance"] if ( "p_val" in item_body[message_type]["data"] and item_body[message_type]["data"]["p_val"] is not None and isinstance(item_body[message_type]["data"]["p_val"], list) ): content_dist = np.array(item_body[message_type]["data"]["p_val"]) x = np.expand_dims(content_dist, axis=0) item_body[message_type]["data"]["drift_type"] = "feature" elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True) if type(elements) == type([]): elements = elements[0] item_body[message_type]["data"]["feature_p_val"] = elements del item_body[message_type]["data"]["p_val"] detectorName=None ce_source = item_body[message_type]["ce-source"] if ce_source.startswith("io.seldon.serving."): detectorName = ce_source[len("io.seldon.serving."):] elif ce_source.startswith("org.kubeflow.serving."): detectorName = ce_source[len("org.kubeflow.serving."):] index_name = log_helper.build_index_name(request.headers, message_type, False, detectorName) upsert_doc_to_elastic( elastic_object, message_type, item_body, request_id, index_name ) else: print("unexpected data format") print(new_content_part) return added_content