Esempio n. 1
0
def predict(app_name, api_name):
    try:
        payload = request.get_json()
    except Exception as e:
        return "Malformed JSON", status.HTTP_400_BAD_REQUEST

    model = local_cache["model"]
    api = local_cache["api"]

    response = {}

    if not util.is_dict(payload) or "samples" not in payload:
        util.log_pretty(payload, logging_func=logger.error)
        return prediction_failed(payload, "top level `samples` key not found in request")

    logger.info("Predicting " + util.pluralize(len(payload["samples"]), "sample", "samples"))

    predictions = []
    samples = payload["samples"]
    if not util.is_list(samples):
        util.log_pretty(samples, logging_func=logger.error)
        return prediction_failed(
            payload, "expected the value of key `samples` to be a list of json objects"
        )

    for i, sample in enumerate(payload["samples"]):
        util.log_indent("sample {}".format(i + 1), 2)

        is_valid, reason = is_valid_sample(sample)
        if not is_valid:
            return prediction_failed(sample, reason)

        for column in local_cache["required_inputs"]:
            sample[column["name"]] = util.upcast(sample[column["name"]], column["type"])

        try:
            result = run_predict(sample)
        except CortexException as e:
            e.wrap("error", "sample {}".format(i + 1))
            logger.error(str(e))
            logger.exception(
                "An error occurred, see `cx logs api {}` for more details.".format(api["name"])
            )
            return prediction_failed(sample, str(e))
        except Exception as e:
            logger.exception(
                "An error occurred, see `cx logs api {}` for more details.".format(api["name"])
            )
            return prediction_failed(sample, str(e))

        predictions.append(result)

    if model["type"] == "regression":
        response["regression_predictions"] = predictions
    if model["type"] == "classification":
        response["classification_predictions"] = predictions

    response["resource_id"] = api["id"]

    return jsonify(response)
Esempio n. 2
0
 def _expand_columns_input_dict(self, input_columns_dict):
     expanded = {}
     for column_name, value in input_columns_dict.items():
         if util.is_str(value):
             expanded[column_name] = self.column_config(value)
         elif util.is_list(value):
             expanded[column_name] = [self.column_config(name) for name in value]
     return expanded
Esempio n. 3
0
 def _expand_feature_inputs_dict(self, input_features_dict):
     expanded = {}
     for feature_name, value in input_features_dict.items():
         if util.is_str(value):
             expanded[feature_name] = self.feature_config(value)
         elif util.is_list(value):
             expanded[feature_name] = [
                 self.feature_config(name) for name in value
             ]
     return expanded
Esempio n. 4
0
def create_inputs_map(values_map, input_config):
    inputs = {}
    for input_name, input_config_item in input_config.items():
        if util.is_str(input_config_item):
            inputs[input_name] = values_map[input_config_item]
        elif util.is_int(input_config_item):
            inputs[input_name] = values_map[input_config_item]
        elif util.is_list(input_config_item):
            inputs[input_name] = [values_map[f] for f in input_config_item]
        else:
            raise CortexException("invalid column inputs")

    return inputs
Esempio n. 5
0
def validate_transformers(spark, ctx, cols_to_transform, raw_df):
    logger.info("Validating Transformers")

    TEST_DF_SIZE = 100

    logger.info(
        "Sanity checking transformers against the first {} samples".format(
            TEST_DF_SIZE))
    sample_df = raw_df.limit(TEST_DF_SIZE).cache()
    test_df = raw_df.limit(TEST_DF_SIZE).cache()

    resource_list = sorted([ctx.tf_id_map[f] for f in cols_to_transform],
                           key=lambda r: r["name"])
    for transformed_column in resource_list:
        ctx.upload_resource_status_start(transformed_column)
        try:
            input_columns_dict = transformed_column["inputs"]["columns"]

            input_cols = []

            for k in sorted(input_columns_dict.keys()):
                if util.is_list(input_columns_dict[k]):
                    input_cols += sorted(input_columns_dict[k])
                else:
                    input_cols.append(input_columns_dict[k])

            tf_name = transformed_column["name"]
            logger.info("Transforming {} to {}".format(", ".join(input_cols),
                                                       tf_name))

            spark_util.validate_transformer(tf_name, test_df, ctx, spark)
            sample_df = spark_util.transform_column(transformed_column["name"],
                                                    sample_df, ctx, spark)

            sample_df.select(tf_name).collect()  # run the transformer
            show_df(sample_df.select(*input_cols, tf_name),
                    ctx,
                    n=3,
                    sort=False)

            for alias in transformed_column["aliases"][1:]:
                logger.info("Transforming {} to {}".format(
                    ", ".join(input_cols), alias))

                display_transform_df = sample_df.withColumn(
                    alias, F.col(tf_name)).select(*input_cols, alias)
                show_df(display_transform_df, ctx, n=3, sort=False)
        except:
            ctx.upload_resource_status_failed(transformed_column)
            raise
        ctx.upload_resource_status_success(transformed_column)
Esempio n. 6
0
def create_inputs_from_features_map(features_values_map, feature_input_config):
    inputs = {}
    for input_name, input_config_item in feature_input_config.items():
        if util.is_str(input_config_item):
            inputs[input_name] = features_values_map[input_config_item]
        elif util.is_int(input_config_item):
            inputs[input_name] = features_values_map[input_config_item]
        elif util.is_list(input_config_item):
            inputs[input_name] = [
                features_values_map[f] for f in input_config_item
            ]
        else:
            raise CortexException("invalid feature inputs")

    return inputs
Esempio n. 7
0
def add_tf_types(config):
    if not util.is_dict(config):
        return

    type_fields = {}
    for k, v in config.items():
        if util.is_str(k) and util.is_str(v) and v in consts.COLUMN_TYPES:
            type_fields[k] = v
        elif util.is_dict(v):
            add_tf_types(v)
        elif util.is_list(v):
            for sub_v in v:
                add_tf_types(sub_v)

    for k, v in type_fields.items():
        config[k + "_tf"] = CORTEX_TYPE_TO_TF_TYPE[v]
Esempio n. 8
0
def create_prediction_request(transformed_sample):
    ctx = local_cache["ctx"]
    signatureDef = local_cache["metadata"]["signatureDef"]
    signature_key = list(signatureDef.keys())[0]
    prediction_request = predict_pb2.PredictRequest()
    prediction_request.model_spec.name = "default"
    prediction_request.model_spec.signature_name = signature_key

    for column_name, value in transformed_sample.items():
        data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[column_name]["type"]]
        shape = [1]
        if util.is_list(value):
            shape = [len(value)]
        tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape)
        prediction_request.inputs[column_name].CopyFrom(tensor_proto)

    return prediction_request
Esempio n. 9
0
def column_names_to_index(columns_input_config):
    column_list = []
    for k, v in columns_input_config.items():
        if util.is_list(v):
            column_list += v
        else:
            column_list.append(v)

    required_input_columns_sorted = sorted(set(column_list))

    index_to_col_map = dict([
        (column_name, idx)
        for idx, column_name in enumerate(required_input_columns_sorted)
    ])

    columns_input_config_indexed = create_inputs_map(index_to_col_map,
                                                     columns_input_config)
    return required_input_columns_sorted, columns_input_config_indexed
Esempio n. 10
0
def create_raw_prediction_request(sample):
    signature_def = local_cache["metadata"]["signatureDef"]
    signature_key = list(signature_def.keys())[0]
    prediction_request = predict_pb2.PredictRequest()
    prediction_request.model_spec.name = "default"
    prediction_request.model_spec.signature_name = signature_key

    for column_name, value in sample.items():
        shape = [1]
        if util.is_list(value):
            shape = [len(value)]
        sig_type = signature_def[signature_key]["inputs"][column_name]["dtype"]
        tensor_proto = tf.make_tensor_proto([value],
                                            dtype=DTYPE_TO_TF_TYPE[sig_type],
                                            shape=shape)
        prediction_request.inputs[column_name].CopyFrom(tensor_proto)

    return prediction_request
Esempio n. 11
0
def create_prediction_request(transformed_features):
    ctx = local_cache["ctx"]

    prediction_request = predict_pb2.PredictRequest()
    prediction_request.model_spec.name = "default"
    prediction_request.model_spec.signature_name = list(
        local_cache["metadata"]["signatureDef"].keys())[0]

    for feature_name, feature_value in transformed_features.items():
        data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.features[feature_name]
                                                  ["type"]]
        shape = [1]
        if util.is_list(feature_value):
            shape = [len(feature_value)]
        tensor_proto = tf.make_tensor_proto([feature_value],
                                            dtype=data_type,
                                            shape=shape)
        prediction_request.inputs[feature_name].CopyFrom(tensor_proto)

    return prediction_request
Esempio n. 12
0
def create_transformer_inputs_from_map(input, col_value_map):
    if util.is_str(input):
        if util.is_resource_ref(input):
            res_name = util.get_resource_ref(input)
            return col_value_map[res_name]
        return input

    if util.is_list(input):
        replaced = []
        for item in input:
            replaced.append(create_transformer_inputs_from_map(item, col_value_map))
        return replaced

    if util.is_dict(input):
        replaced = {}
        for key, val in input.items():
            key_replaced = create_transformer_inputs_from_map(key, col_value_map)
            val_replaced = create_transformer_inputs_from_map(val, col_value_map)
            replaced[key_replaced] = val_replaced
        return replaced

    return input
Esempio n. 13
0
    def populate_values(self, input, input_schema, preserve_column_refs):
        if input is None:
            if input_schema is None:
                return None
            if input_schema.get("_allow_null") == True:
                return None
            raise UserException("Null value is not allowed")

        if util.is_resource_ref(input):
            res_name = util.get_resource_ref(input)
            if res_name in self.constants:
                if self.constants[res_name].get("value") is not None:
                    const_val = self.constants[res_name]["value"]
                elif self.constants[res_name].get("path") is not None:
                    const_val = self.storage.get_json_external(self.constants[res_name]["path"])
                try:
                    return self.populate_values(const_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("constant " + res_name)
                    raise

            if res_name in self.aggregates:
                agg_val = self.get_obj(self.aggregates[res_name]["key"])
                try:
                    return self.populate_values(agg_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("aggregate " + res_name)
                    raise

            if res_name in self.columns:
                if input_schema is not None:
                    col_type = self.get_inferred_column_type(res_name)
                    if col_type not in input_schema["_type"]:
                        raise UserException(
                            "column {}: unsupported input type (expected type {}, got type {})".format(
                                res_name,
                                util.data_type_str(input_schema["_type"]),
                                util.data_type_str(col_type),
                            )
                        )
                if preserve_column_refs:
                    return input
                else:
                    return res_name

        if util.is_list(input):
            elem_schema = None
            if input_schema is not None:
                if not util.is_list(input_schema["_type"]):
                    raise UserException(
                        "unsupported input type (expected type {}, got {})".format(
                            util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                        )
                    )
                elem_schema = input_schema["_type"][0]

                min_count = input_schema.get("_min_count")
                if min_count is not None and len(input) < min_count:
                    raise UserException(
                        "list has length {}, but the minimum allowed length is {}".format(
                            len(input), min_count
                        )
                    )

                max_count = input_schema.get("_max_count")
                if max_count is not None and len(input) > max_count:
                    raise UserException(
                        "list has length {}, but the maximum allowed length is {}".format(
                            len(input), max_count
                        )
                    )

            casted = []
            for i, elem in enumerate(input):
                try:
                    casted.append(self.populate_values(elem, elem_schema, preserve_column_refs))
                except CortexException as e:
                    e.wrap("index " + i)
                    raise
            return casted

        if util.is_dict(input):
            if input_schema is None:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(key, None, preserve_column_refs)
                    try:
                        val_casted = self.populate_values(val, None, preserve_column_refs)
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            if not util.is_dict(input_schema["_type"]):
                raise UserException(
                    "unsupported input type (expected type {}, got {})".format(
                        util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                    )
                )

            min_count = input_schema.get("_min_count")
            if min_count is not None and len(input) < min_count:
                raise UserException(
                    "map has length {}, but the minimum allowed length is {}".format(
                        len(input), min_count
                    )
                )

            max_count = input_schema.get("_max_count")
            if max_count is not None and len(input) > max_count:
                raise UserException(
                    "map has length {}, but the maximum allowed length is {}".format(
                        len(input), max_count
                    )
                )

            is_generic_map = False
            if len(input_schema["_type"]) == 1:
                input_type_key = next(iter(input_schema["_type"].keys()))
                if is_compound_type(input_type_key):
                    is_generic_map = True
                    generic_map_key_schema = input_schema_from_type_schema(input_type_key)
                    generic_map_value = input_schema["_type"][input_type_key]

            if is_generic_map:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(
                        key, generic_map_key_schema, preserve_column_refs
                    )
                    try:
                        val_casted = self.populate_values(
                            val, generic_map_value, preserve_column_refs
                        )
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            # fixed map
            casted = {}
            for key, val_schema in input_schema["_type"].items():
                if key in input:
                    val = input[key]
                else:
                    if val_schema.get("_optional") is not True:
                        raise UserException("missing key: " + util.user_obj_str(key))
                    if val_schema.get("_default") is None:
                        continue
                    val = val_schema["_default"]

                try:
                    val_casted = self.populate_values(val, val_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap(util.user_obj_str(key))
                    raise
                casted[key] = val_casted
            return casted

        if input_schema is None:
            return input
        if not util.is_str(input_schema["_type"]):
            raise UserException(
                "unsupported input type (expected type {}, got {})".format(
                    util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                )
            )
        return cast_compound_type(input, input_schema["_type"])
Esempio n. 14
0
def predict(deployment_name, api_name):
    try:
        payload = request.get_json()
    except Exception as e:
        return "Malformed JSON", status.HTTP_400_BAD_REQUEST

    ctx = local_cache["ctx"]
    api = local_cache["api"]

    response = {}

    if not util.is_dict(payload) or "samples" not in payload:
        util.log_pretty(payload, logging_func=logger.error)
        return prediction_failed(
            payload, "top level `samples` key not found in request")

    logger.info("Predicting " +
                util.pluralize(len(payload["samples"]), "sample", "samples"))

    predictions = []
    samples = payload["samples"]
    if not util.is_list(samples):
        util.log_pretty(samples, logging_func=logger.error)
        return prediction_failed(
            payload,
            "expected the value of key `samples` to be a list of json objects")

    for i, sample in enumerate(payload["samples"]):
        util.log_indent("sample {}".format(i + 1), 2)

        if util.is_resource_ref(api["model"]):
            is_valid, reason = is_valid_sample(sample)
            if not is_valid:
                return prediction_failed(sample, reason)

            for column in local_cache["required_inputs"]:
                column_type = ctx.get_inferred_column_type(column["name"])
                sample[column["name"]] = util.upcast(sample[column["name"]],
                                                     column_type)

        try:
            result = run_predict(sample)
        except CortexException as e:
            e.wrap("error", "sample {}".format(i + 1))
            logger.error(str(e))
            logger.exception(
                "An error occurred, see `cortex logs -v api {}` for more details."
                .format(api["name"]))
            return prediction_failed(sample, str(e))
        except Exception as e:
            logger.exception(
                "An error occurred, see `cortex logs -v api {}` for more details."
                .format(api["name"]))

            # Show signature def for external models (since we don't validate input)
            schemaStr = ""
            signature_def = local_cache["metadata"]["signatureDef"]
            if (not util.is_resource_ref(api["model"]) and
                    signature_def.get("predict") is not None  # Just to be safe
                    and signature_def["predict"].get("inputs") is
                    not None  # Just to be safe
                ):
                schemaStr = "\n\nExpected shema:\n" + util.pp_str(
                    signature_def["predict"]["inputs"])

            return prediction_failed(sample, str(e) + schemaStr)

        predictions.append(result)

    response["predictions"] = predictions
    response["resource_id"] = api["id"]

    return jsonify(response)
Esempio n. 15
0
def predict(app_name, api_name):
    try:
        payload = request.get_json()
    except Exception as e:
        return "Malformed JSON", status.HTTP_400_BAD_REQUEST

    sess = local_cache["sess"]
    api = local_cache["api"]
    request_handler = local_cache.get("request_handler")
    input_metadata = local_cache["input_metadata"]
    output_metadata = local_cache["output_metadata"]

    response = {}

    if not util.is_dict(payload) or "samples" not in payload:
        util.log_pretty(payload, logging_func=logger.error)
        return prediction_failed(
            payload, "top level `samples` key not found in request")

    logger.info("Predicting " +
                util.pluralize(len(payload["samples"]), "sample", "samples"))

    predictions = []
    samples = payload["samples"]
    if not util.is_list(samples):
        util.log_pretty(samples, logging_func=logger.error)
        return prediction_failed(
            payload,
            "expected the value of key `samples` to be a list of json objects")

    for i, sample in enumerate(payload["samples"]):
        util.log_indent("sample {}".format(i + 1), 2)
        try:
            util.log_indent("Raw sample:", indent=4)
            util.log_pretty(sample, indent=6)

            if request_handler is not None and util.has_function(
                    request_handler, "pre_inference"):
                sample = request_handler.pre_inference(sample, input_metadata)

            inference_input = convert_to_onnx_input(sample, input_metadata)
            model_outputs = sess.run([], inference_input)
            result = []
            for model_output in model_outputs:
                if type(model_output) is np.ndarray:
                    result.append(model_output.tolist())
                else:
                    result.append(model_output)

            if request_handler is not None and util.has_function(
                    request_handler, "post_inference"):
                result = request_handler.post_inference(
                    result, output_metadata)
            util.log_indent("Prediction:", indent=4)
            util.log_pretty(result, indent=6)
            prediction = {"prediction": result}
        except CortexException as e:
            e.wrap("error", "sample {}".format(i + 1))
            logger.error(str(e))
            logger.exception(
                "An error occurred, see `cx logs -v api {}` for more details.".
                format(api["name"]))
            return prediction_failed(sample, str(e))
        except Exception as e:
            logger.exception(
                "An error occurred, see `cx logs -v api {}` for more details.".
                format(api["name"]))
            return prediction_failed(sample, str(e))

        predictions.append(prediction)

    response["predictions"] = predictions
    response["resource_id"] = api["id"]

    return jsonify(response)