Ejemplo n.º 1
0
def index():

    request_id = log_helper.extract_request_id(request.headers)
    if not request_id:
        return Response(f"Header {log_helper.REQUEST_ID_HEADER_NAME} not found", 400)
    type_header = request.headers.get(log_helper.TYPE_HEADER_NAME)
    if type_header is None:
        return Response(f"Header {log_helper.TYPE_HEADER_NAME} not found", 400)
    message_type = log_helper.parse_message_type(type_header)
    index_name = log_helper.build_index_name(request.headers)

    body = request.get_json(force=True)

    # max size is configurable with env var or defaults to constant
    max_payload_bytes = log_helper.get_max_payload_bytes(MAX_PAYLOAD_BYTES)

    body_length = request.headers.get(log_helper.LENGTH_HEADER_NAME)
    if body_length and int(body_length) > int(max_payload_bytes):
        too_large_message = (
            "body too large for "
            + index_name
            + "/"
            + (log_helper.DOC_TYPE_NAME if log_helper.DOC_TYPE_NAME != None else "_doc")
            + "/"
            + request_id
            + " adding "
            + message_type
        )
        print(too_large_message)
        sys.stdout.flush()
        return too_large_message

    if not type(body) is dict:
        body = json.loads(body)

    # print('RECEIVED MESSAGE.')
    # print(str(request.headers))
    # print(str(body))
    # print('----')
    # sys.stdout.flush()

    try:

        # now process and update the doc
        added_content = process_and_update_elastic_doc(
            es, message_type, body, request_id, request.headers, index_name
        )

        return jsonify(added_content)
    except Exception as ex:
        traceback.print_exc()
    sys.stdout.flush()
    return Response("problem logging request", 500)
Ejemplo n.º 2
0
def index():

    request_id = log_helper.extract_request_id(request.headers)
    type_header = request.headers.get(log_helper.TYPE_HEADER_NAME)
    message_type = log_helper.parse_message_type(type_header)
    index_name = log_helper.build_index_name(request.headers)

    body = request.get_json(force=True)

    # max size is configurable with env var or defaults to constant
    max_payload_bytes = log_helper.get_max_payload_bytes(MAX_PAYLOAD_BYTES)

    body_length = request.headers.get(log_helper.LENGTH_HEADER_NAME)
    if body_length and int(body_length) > int(max_payload_bytes):
        too_large_message = (
            "body too large for "
            + index_name
            + "/"
            + log_helper.DOC_TYPE_NAME
            + "/"
            + request_id
            + " adding "
            + message_type
        )
        print(too_large_message)
        sys.stdout.flush()
        return too_large_message

    if not type(body) is dict:
        body = json.loads(body)

    # print('RECEIVED MESSAGE.')
    # print(str(request.headers))
    # print(str(body))
    # print('----')
    # sys.stdout.flush()

    try:

        # now process and update the doc
        doc = process_and_update_elastic_doc(
            es, message_type, body, request_id, request.headers, index_name
        )

        return str(doc)
    except Exception as ex:
        print(ex)
    sys.stdout.flush()
    return "problem logging request"
Ejemplo n.º 3
0
def process_and_update_elastic_doc(
    elastic_object, message_type, message_body, request_id, headers, index_name
):

    added_content = []

    if message_type == "unknown":
        print("UNKNOWN REQUEST TYPE FOR " + request_id + " - NOT PROCESSING")
        sys.stdout.flush()

    # first do any needed transformations
    new_content_part = process_content(message_type, message_body, headers)

    # set metadata to go just in this part (request or response) and not top-level
    log_helper.field_from_header(
        content=new_content_part, header_name="ce-time", headers=headers
    )
    log_helper.field_from_header(
        content=new_content_part, header_name="ce-source", headers=headers
    )

    doc_body = {message_type: new_content_part}

    log_helper.set_metadata(doc_body, headers, message_type, request_id)

    # req or res might be batches of instances so split out into individual docs
    if "instance" in new_content_part:

        if log_helper.is_reference_data(headers):
            index_name = log_helper.build_index_name(headers, prefix="reference", suffix=False)
            # Ignore payload for reference data
            doc_body[message_type].pop("payload", None)

        if type(new_content_part["instance"]) == type([]) and not (new_content_part["dataType"] == "json"):
            # if we've a list then this is batch
            # we assume first dimension is always batch
            bulk_upsert_doc_to_elastic(elastic_object, message_type, doc_body,
                                       doc_body[message_type].copy(), request_id, index_name)
        else:
            #not batch so don't batch elements either
            if "elements" in new_content_part and type(new_content_part["elements"]) == type([]):
                new_content_part["elements"] = new_content_part["elements"][0]

            item_request_id = build_request_id_batched(request_id, 1, 0)
            added_content.append(upsert_doc_to_elastic(
                elastic_object, message_type, doc_body, item_request_id, index_name
            ))
    elif message_type == "feedback":
        item_request_id = build_request_id_batched(request_id, 1, 0)
        upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name)
    elif "data" in new_content_part and message_type == "outlier":
        no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"])
        index = 0
        for item in doc_body[message_type]["data"]["is_outlier"]:
            item_body = doc_body.copy()
            item_body[message_type]["data"]["is_outlier"] = item
            if (
                "feature_score" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["feature_score"] is not None
                and len(item_body[message_type]["data"]["feature_score"])
                == no_items_in_batch
            ):
                item_body[message_type]["data"]["feature_score"] = item_body[
                    message_type
                ]["data"]["feature_score"][index]
            if (
                "instance_score" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["instance_score"] is not None
                and len(item_body[message_type]["data"]["instance_score"])
                == no_items_in_batch
            ):
                item_body[message_type]["data"]["instance_score"] = item_body[
                    message_type
                ]["data"]["instance_score"][index]
            item_request_id = build_request_id_batched(
                request_id, no_items_in_batch, index
            )
            upsert_doc_to_elastic(
                elastic_object, message_type, item_body, item_request_id, index_name
            )
            index = index + 1
    elif "data" in new_content_part and message_type == "drift":
        item_body = doc_body.copy()

        namespace = log_helper.get_header(log_helper.NAMESPACE_HEADER_NAME, headers)
        inferenceservice_name = log_helper.get_header(log_helper.INFERENCESERVICE_HEADER_NAME, headers)
        endpoint_name = log_helper.get_header(log_helper.ENDPOINT_HEADER_NAME, headers)
        serving_engine = log_helper.serving_engine(headers)
        item_body[message_type]["data"]["is_drift"] = bool(item_body[message_type]["data"]["is_drift"])
        item_body[message_type]["data"]["drift_type"] = "batch"
        if (
                "distance" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["distance"] is not None
                and isinstance(item_body[message_type]["data"]["distance"], list)
            ):
                content_dist = np.array(item_body[message_type]["data"]["distance"])
                x = np.expand_dims(content_dist, axis=0)
                item_body[message_type]["data"]["drift_type"] = "feature"
                elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True)
                if type(elements) == type([]):
                    elements = elements[0]
                item_body[message_type]["data"]["feature_distance"] = elements
                del item_body[message_type]["data"]["distance"]
        if (
                "p_val" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["p_val"] is not None
                and isinstance(item_body[message_type]["data"]["p_val"], list)
            ):
                content_dist = np.array(item_body[message_type]["data"]["p_val"])
                x = np.expand_dims(content_dist, axis=0)
                item_body[message_type]["data"]["drift_type"] = "feature"
                elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True)
                if type(elements) == type([]):
                    elements = elements[0]
                item_body[message_type]["data"]["feature_p_val"] = elements
                del item_body[message_type]["data"]["p_val"]
        detectorName=None
        ce_source = item_body[message_type]["ce-source"]
        if ce_source.startswith("io.seldon.serving."):
            detectorName =  ce_source[len("io.seldon.serving."):]
        elif ce_source.startswith("org.kubeflow.serving."):
            detectorName =  ce_source[len("org.kubeflow.serving."):]
        index_name = log_helper.build_index_name(request.headers, message_type, False, detectorName)
        upsert_doc_to_elastic(
            elastic_object, message_type, item_body, request_id, index_name
        )
    else:
        print("unexpected data format")
        print(new_content_part)
    return added_content