Exemple #1
0
def process_and_update_elastic_doc(elastic_object, message_type, message_body,
                                   request_id, headers, index_name):

    if message_type == 'unknown':
        print('UNKNOWN REQUEST TYPE FOR ' + request_id + ' - NOT PROCESSING')
        sys.stdout.flush()

    #first do any needed transformations
    new_content_part = process_content(message_type, message_body)

    #set metadata specific to this part (request or response)
    log_helper.field_from_header(content=new_content_part,
                                 header_name='ce-time',
                                 headers=headers)
    log_helper.field_from_header(content=new_content_part,
                                 header_name='ce-source',
                                 headers=headers)

    upsert_body = {
        "doc_as_upsert": True,
        "doc": {
            message_type: new_content_part
        }
    }

    log_helper.set_metadata(upsert_body['doc'], headers, message_type,
                            request_id)

    new_content = elastic_object.update(index=index_name,
                                        doc_type=log_helper.DOC_TYPE_NAME,
                                        id=request_id,
                                        body=upsert_body,
                                        retry_on_conflict=3,
                                        refresh=True,
                                        timeout="60s")
    print('upserted to doc ' + index_name + "/" + log_helper.DOC_TYPE_NAME +
          "/" + request_id + ' adding ' + message_type)
    sys.stdout.flush()
    return str(new_content)
Exemple #2
0
def process_and_update_elastic_doc(elastic_object, message_type, message_body,
                                   request_id, headers, index_name):

    if message_type == 'unknown':
        print('UNKNOWN REQUEST TYPE FOR ' + request_id + ' - NOT PROCESSING')
        sys.stdout.flush()

    #first do any needed transformations
    new_content_part = process_content(message_type, message_body)

    #set metadata to go just in this part (request or response) and not top-level
    log_helper.field_from_header(content=new_content_part,
                                 header_name='ce-time',
                                 headers=headers)
    log_helper.field_from_header(content=new_content_part,
                                 header_name='ce-source',
                                 headers=headers)

    doc_body = {message_type: new_content_part}

    log_helper.set_metadata(doc_body, headers, message_type, request_id)

    # req or res might be batches of instances so split out into individual docs
    if "instance" in new_content_part:

        if type(new_content_part["instance"]) == type([]):
            #if we've a list then this is batch
            #we assume first dimension is always batch

            no_items_in_batch = len(new_content_part["instance"])
            index = 0
            for item in new_content_part["instance"]:

                item_body = doc_body.copy()

                item_body[message_type]['instance'] = item
                item_request_id = build_request_id_batched(
                    request_id, no_items_in_batch, index)
                upsert_doc_to_elastic(elastic_object, message_type, item_body,
                                      item_request_id, index_name)
                index = index + 1
        else:
            item_request_id = build_request_id_batched(request_id, 1, 0)
            upsert_doc_to_elastic(elastic_object, message_type, doc_body,
                                  item_request_id, index_name)
    elif "data" in new_content_part and message_type == 'outlier':
        no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"])
        index = 0
        for item in doc_body[message_type]["data"]["is_outlier"]:
            item_body = doc_body.copy()
            item_body[message_type]["data"]["is_outlier"] = item
            if "feature_score" in item_body[message_type]["data"] and item_body[
                    message_type]["data"]["feature_score"] is not None and len(
                        item_body[message_type]["data"]
                        ["feature_score"]) == no_items_in_batch:
                item_body[message_type]["data"]["feature_score"] = item_body[
                    message_type]["data"]["feature_score"][index]
            if "instance_score" in item_body[message_type][
                    "data"] and item_body[message_type]["data"][
                        "instance_score"] is not None and len(
                            item_body[message_type]["data"]
                            ["instance_score"]) == no_items_in_batch:
                item_body[message_type]["data"]["instance_score"] = item_body[
                    message_type]["data"]["instance_score"][index]
            item_request_id = build_request_id_batched(request_id,
                                                       no_items_in_batch,
                                                       index)
            upsert_doc_to_elastic(elastic_object, message_type, item_body,
                                  item_request_id, index_name)
            index = index + 1
    else:
        print('unexpected data format')
        print(new_content_part)
    return
Exemple #3
0
def process_and_update_elastic_doc(elastic_object, message_type, message_body,
                                   request_id, headers, index_name):

    if message_type == "unknown":
        print("UNKNOWN REQUEST TYPE FOR " + request_id + " - NOT PROCESSING")
        sys.stdout.flush()

    # first do any needed transformations
    new_content_part = process_content(message_type, message_body, headers)

    # set metadata to go just in this part (request or response) and not top-level
    log_helper.field_from_header(content=new_content_part,
                                 header_name="ce-time",
                                 headers=headers)
    log_helper.field_from_header(content=new_content_part,
                                 header_name="ce-source",
                                 headers=headers)

    doc_body = {message_type: new_content_part}

    log_helper.set_metadata(doc_body, headers, message_type, request_id)

    # req or res might be batches of instances so split out into individual docs
    if "instance" in new_content_part:

        if type(new_content_part["instance"]) == type([]):
            # if we've a list then this is batch
            # we assume first dimension is always batch

            no_items_in_batch = len(new_content_part["instance"])
            index = 0
            elements = None
            if "elements" in new_content_part:
                elements = new_content_part["elements"]

            for num, item in enumerate(new_content_part["instance"], start=0):

                item_body = doc_body.copy()

                item_body[message_type]["instance"] = item

                if type(elements) == type([]) and len(elements) > num:
                    item_body[message_type]["elements"] = elements[num]

                item_request_id = build_request_id_batched(
                    request_id, no_items_in_batch, index)
                upsert_doc_to_elastic(elastic_object, message_type, item_body,
                                      item_request_id, index_name)
                index = index + 1
        else:
            #not batch so don't batch elements either
            if "elements" in new_content_part and type(
                    new_content_part["elements"]) == type([]):
                new_content_part["elements"] = new_content_part["elements"][0]

            item_request_id = build_request_id_batched(request_id, 1, 0)
            upsert_doc_to_elastic(elastic_object, message_type, doc_body,
                                  item_request_id, index_name)
    elif message_type == "feedback":
        item_request_id = build_request_id_batched(request_id, 1, 0)
        upsert_doc_to_elastic(elastic_object, message_type, doc_body,
                              item_request_id, index_name)
    elif "data" in new_content_part and message_type == "outlier":
        no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"])
        index = 0
        for item in doc_body[message_type]["data"]["is_outlier"]:
            item_body = doc_body.copy()
            item_body[message_type]["data"]["is_outlier"] = item
            if ("feature_score" in item_body[message_type]["data"]
                    and item_body[message_type]["data"]["feature_score"]
                    is not None
                    and len(item_body[message_type]["data"]["feature_score"])
                    == no_items_in_batch):
                item_body[message_type]["data"]["feature_score"] = item_body[
                    message_type]["data"]["feature_score"][index]
            if ("instance_score" in item_body[message_type]["data"]
                    and item_body[message_type]["data"]["instance_score"]
                    is not None
                    and len(item_body[message_type]["data"]["instance_score"])
                    == no_items_in_batch):
                item_body[message_type]["data"]["instance_score"] = item_body[
                    message_type]["data"]["instance_score"][index]
            item_request_id = build_request_id_batched(request_id,
                                                       no_items_in_batch,
                                                       index)
            upsert_doc_to_elastic(elastic_object, message_type, item_body,
                                  item_request_id, index_name)
            index = index + 1
    else:
        print("unexpected data format")
        print(new_content_part)
    return
def process_and_update_elastic_doc(
    elastic_object, message_type, message_body, request_id, headers, index_name
):

    added_content = []

    if message_type == "unknown":
        print("UNKNOWN REQUEST TYPE FOR " + request_id + " - NOT PROCESSING")
        sys.stdout.flush()

    # first do any needed transformations
    new_content_part = process_content(message_type, message_body, headers)

    # set metadata to go just in this part (request or response) and not top-level
    log_helper.field_from_header(
        content=new_content_part, header_name="ce-time", headers=headers
    )
    log_helper.field_from_header(
        content=new_content_part, header_name="ce-source", headers=headers
    )

    doc_body = {message_type: new_content_part}

    log_helper.set_metadata(doc_body, headers, message_type, request_id)

    # req or res might be batches of instances so split out into individual docs
    if "instance" in new_content_part:

        if log_helper.is_reference_data(headers):
            index_name = log_helper.build_index_name(headers, prefix="reference", suffix=False)
            # Ignore payload for reference data
            doc_body[message_type].pop("payload", None)

        if type(new_content_part["instance"]) == type([]) and not (new_content_part["dataType"] == "json"):
            # if we've a list then this is batch
            # we assume first dimension is always batch
            bulk_upsert_doc_to_elastic(elastic_object, message_type, doc_body,
                                       doc_body[message_type].copy(), request_id, index_name)
        else:
            #not batch so don't batch elements either
            if "elements" in new_content_part and type(new_content_part["elements"]) == type([]):
                new_content_part["elements"] = new_content_part["elements"][0]

            item_request_id = build_request_id_batched(request_id, 1, 0)
            added_content.append(upsert_doc_to_elastic(
                elastic_object, message_type, doc_body, item_request_id, index_name
            ))
    elif message_type == "feedback":
        item_request_id = build_request_id_batched(request_id, 1, 0)
        upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name)
    elif "data" in new_content_part and message_type == "outlier":
        no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"])
        index = 0
        for item in doc_body[message_type]["data"]["is_outlier"]:
            item_body = doc_body.copy()
            item_body[message_type]["data"]["is_outlier"] = item
            if (
                "feature_score" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["feature_score"] is not None
                and len(item_body[message_type]["data"]["feature_score"])
                == no_items_in_batch
            ):
                item_body[message_type]["data"]["feature_score"] = item_body[
                    message_type
                ]["data"]["feature_score"][index]
            if (
                "instance_score" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["instance_score"] is not None
                and len(item_body[message_type]["data"]["instance_score"])
                == no_items_in_batch
            ):
                item_body[message_type]["data"]["instance_score"] = item_body[
                    message_type
                ]["data"]["instance_score"][index]
            item_request_id = build_request_id_batched(
                request_id, no_items_in_batch, index
            )
            upsert_doc_to_elastic(
                elastic_object, message_type, item_body, item_request_id, index_name
            )
            index = index + 1
    elif "data" in new_content_part and message_type == "drift":
        item_body = doc_body.copy()

        namespace = log_helper.get_header(log_helper.NAMESPACE_HEADER_NAME, headers)
        inferenceservice_name = log_helper.get_header(log_helper.INFERENCESERVICE_HEADER_NAME, headers)
        endpoint_name = log_helper.get_header(log_helper.ENDPOINT_HEADER_NAME, headers)
        serving_engine = log_helper.serving_engine(headers)
        item_body[message_type]["data"]["is_drift"] = bool(item_body[message_type]["data"]["is_drift"])
        item_body[message_type]["data"]["drift_type"] = "batch"
        if (
                "distance" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["distance"] is not None
                and isinstance(item_body[message_type]["data"]["distance"], list)
            ):
                content_dist = np.array(item_body[message_type]["data"]["distance"])
                x = np.expand_dims(content_dist, axis=0)
                item_body[message_type]["data"]["drift_type"] = "feature"
                elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True)
                if type(elements) == type([]):
                    elements = elements[0]
                item_body[message_type]["data"]["feature_distance"] = elements
                del item_body[message_type]["data"]["distance"]
        if (
                "p_val" in item_body[message_type]["data"]
                and item_body[message_type]["data"]["p_val"] is not None
                and isinstance(item_body[message_type]["data"]["p_val"], list)
            ):
                content_dist = np.array(item_body[message_type]["data"]["p_val"])
                x = np.expand_dims(content_dist, axis=0)
                item_body[message_type]["data"]["drift_type"] = "feature"
                elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True)
                if type(elements) == type([]):
                    elements = elements[0]
                item_body[message_type]["data"]["feature_p_val"] = elements
                del item_body[message_type]["data"]["p_val"]
        detectorName=None
        ce_source = item_body[message_type]["ce-source"]
        if ce_source.startswith("io.seldon.serving."):
            detectorName =  ce_source[len("io.seldon.serving."):]
        elif ce_source.startswith("org.kubeflow.serving."):
            detectorName =  ce_source[len("org.kubeflow.serving."):]
        index_name = log_helper.build_index_name(request.headers, message_type, False, detectorName)
        upsert_doc_to_elastic(
            elastic_object, message_type, item_body, request_id, index_name
        )
    else:
        print("unexpected data format")
        print(new_content_part)
    return added_content