def predict(app_name, api_name): try: payload = request.get_json() except Exception as e: return "Malformed JSON", status.HTTP_400_BAD_REQUEST model = local_cache["model"] api = local_cache["api"] response = {} if not util.is_dict(payload) or "samples" not in payload: util.log_pretty(payload, logging_func=logger.error) return prediction_failed(payload, "top level `samples` key not found in request") logger.info("Predicting " + util.pluralize(len(payload["samples"]), "sample", "samples")) predictions = [] samples = payload["samples"] if not util.is_list(samples): util.log_pretty(samples, logging_func=logger.error) return prediction_failed( payload, "expected the value of key `samples` to be a list of json objects" ) for i, sample in enumerate(payload["samples"]): util.log_indent("sample {}".format(i + 1), 2) is_valid, reason = is_valid_sample(sample) if not is_valid: return prediction_failed(sample, reason) for column in local_cache["required_inputs"]: sample[column["name"]] = util.upcast(sample[column["name"]], column["type"]) try: result = run_predict(sample) except CortexException as e: e.wrap("error", "sample {}".format(i + 1)) logger.error(str(e)) logger.exception( "An error occurred, see `cx logs api {}` for more details.".format(api["name"]) ) return prediction_failed(sample, str(e)) except Exception as e: logger.exception( "An error occurred, see `cx logs api {}` for more details.".format(api["name"]) ) return prediction_failed(sample, str(e)) predictions.append(result) if model["type"] == "regression": response["regression_predictions"] = predictions if model["type"] == "classification": response["classification_predictions"] = predictions response["resource_id"] = api["id"] return jsonify(response)
def _expand_columns_input_dict(self, input_columns_dict): expanded = {} for column_name, value in input_columns_dict.items(): if util.is_str(value): expanded[column_name] = self.column_config(value) elif util.is_list(value): expanded[column_name] = [self.column_config(name) for name in value] return expanded
def _expand_feature_inputs_dict(self, input_features_dict): expanded = {} for feature_name, value in input_features_dict.items(): if util.is_str(value): expanded[feature_name] = self.feature_config(value) elif util.is_list(value): expanded[feature_name] = [ self.feature_config(name) for name in value ] return expanded
def create_inputs_map(values_map, input_config): inputs = {} for input_name, input_config_item in input_config.items(): if util.is_str(input_config_item): inputs[input_name] = values_map[input_config_item] elif util.is_int(input_config_item): inputs[input_name] = values_map[input_config_item] elif util.is_list(input_config_item): inputs[input_name] = [values_map[f] for f in input_config_item] else: raise CortexException("invalid column inputs") return inputs
def validate_transformers(spark, ctx, cols_to_transform, raw_df): logger.info("Validating Transformers") TEST_DF_SIZE = 100 logger.info( "Sanity checking transformers against the first {} samples".format( TEST_DF_SIZE)) sample_df = raw_df.limit(TEST_DF_SIZE).cache() test_df = raw_df.limit(TEST_DF_SIZE).cache() resource_list = sorted([ctx.tf_id_map[f] for f in cols_to_transform], key=lambda r: r["name"]) for transformed_column in resource_list: ctx.upload_resource_status_start(transformed_column) try: input_columns_dict = transformed_column["inputs"]["columns"] input_cols = [] for k in sorted(input_columns_dict.keys()): if util.is_list(input_columns_dict[k]): input_cols += sorted(input_columns_dict[k]) else: input_cols.append(input_columns_dict[k]) tf_name = transformed_column["name"] logger.info("Transforming {} to {}".format(", ".join(input_cols), tf_name)) spark_util.validate_transformer(tf_name, test_df, ctx, spark) sample_df = spark_util.transform_column(transformed_column["name"], sample_df, ctx, spark) sample_df.select(tf_name).collect() # run the transformer show_df(sample_df.select(*input_cols, tf_name), ctx, n=3, sort=False) for alias in transformed_column["aliases"][1:]: logger.info("Transforming {} to {}".format( ", ".join(input_cols), alias)) display_transform_df = sample_df.withColumn( alias, F.col(tf_name)).select(*input_cols, alias) show_df(display_transform_df, ctx, n=3, sort=False) except: ctx.upload_resource_status_failed(transformed_column) raise ctx.upload_resource_status_success(transformed_column)
def create_inputs_from_features_map(features_values_map, feature_input_config): inputs = {} for input_name, input_config_item in feature_input_config.items(): if util.is_str(input_config_item): inputs[input_name] = features_values_map[input_config_item] elif util.is_int(input_config_item): inputs[input_name] = features_values_map[input_config_item] elif util.is_list(input_config_item): inputs[input_name] = [ features_values_map[f] for f in input_config_item ] else: raise CortexException("invalid feature inputs") return inputs
def add_tf_types(config): if not util.is_dict(config): return type_fields = {} for k, v in config.items(): if util.is_str(k) and util.is_str(v) and v in consts.COLUMN_TYPES: type_fields[k] = v elif util.is_dict(v): add_tf_types(v) elif util.is_list(v): for sub_v in v: add_tf_types(sub_v) for k, v in type_fields.items(): config[k + "_tf"] = CORTEX_TYPE_TO_TF_TYPE[v]
def create_prediction_request(transformed_sample): ctx = local_cache["ctx"] signatureDef = local_cache["metadata"]["signatureDef"] signature_key = list(signatureDef.keys())[0] prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "default" prediction_request.model_spec.signature_name = signature_key for column_name, value in transformed_sample.items(): data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[column_name]["type"]] shape = [1] if util.is_list(value): shape = [len(value)] tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) return prediction_request
def column_names_to_index(columns_input_config): column_list = [] for k, v in columns_input_config.items(): if util.is_list(v): column_list += v else: column_list.append(v) required_input_columns_sorted = sorted(set(column_list)) index_to_col_map = dict([ (column_name, idx) for idx, column_name in enumerate(required_input_columns_sorted) ]) columns_input_config_indexed = create_inputs_map(index_to_col_map, columns_input_config) return required_input_columns_sorted, columns_input_config_indexed
def create_raw_prediction_request(sample): signature_def = local_cache["metadata"]["signatureDef"] signature_key = list(signature_def.keys())[0] prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "default" prediction_request.model_spec.signature_name = signature_key for column_name, value in sample.items(): shape = [1] if util.is_list(value): shape = [len(value)] sig_type = signature_def[signature_key]["inputs"][column_name]["dtype"] tensor_proto = tf.make_tensor_proto([value], dtype=DTYPE_TO_TF_TYPE[sig_type], shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) return prediction_request
def create_prediction_request(transformed_features): ctx = local_cache["ctx"] prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "default" prediction_request.model_spec.signature_name = list( local_cache["metadata"]["signatureDef"].keys())[0] for feature_name, feature_value in transformed_features.items(): data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.features[feature_name] ["type"]] shape = [1] if util.is_list(feature_value): shape = [len(feature_value)] tensor_proto = tf.make_tensor_proto([feature_value], dtype=data_type, shape=shape) prediction_request.inputs[feature_name].CopyFrom(tensor_proto) return prediction_request
def create_transformer_inputs_from_map(input, col_value_map): if util.is_str(input): if util.is_resource_ref(input): res_name = util.get_resource_ref(input) return col_value_map[res_name] return input if util.is_list(input): replaced = [] for item in input: replaced.append(create_transformer_inputs_from_map(item, col_value_map)) return replaced if util.is_dict(input): replaced = {} for key, val in input.items(): key_replaced = create_transformer_inputs_from_map(key, col_value_map) val_replaced = create_transformer_inputs_from_map(val, col_value_map) replaced[key_replaced] = val_replaced return replaced return input
def populate_values(self, input, input_schema, preserve_column_refs): if input is None: if input_schema is None: return None if input_schema.get("_allow_null") == True: return None raise UserException("Null value is not allowed") if util.is_resource_ref(input): res_name = util.get_resource_ref(input) if res_name in self.constants: if self.constants[res_name].get("value") is not None: const_val = self.constants[res_name]["value"] elif self.constants[res_name].get("path") is not None: const_val = self.storage.get_json_external(self.constants[res_name]["path"]) try: return self.populate_values(const_val, input_schema, preserve_column_refs) except CortexException as e: e.wrap("constant " + res_name) raise if res_name in self.aggregates: agg_val = self.get_obj(self.aggregates[res_name]["key"]) try: return self.populate_values(agg_val, input_schema, preserve_column_refs) except CortexException as e: e.wrap("aggregate " + res_name) raise if res_name in self.columns: if input_schema is not None: col_type = self.get_inferred_column_type(res_name) if col_type not in input_schema["_type"]: raise UserException( "column {}: unsupported input type (expected type {}, got type {})".format( res_name, util.data_type_str(input_schema["_type"]), util.data_type_str(col_type), ) ) if preserve_column_refs: return input else: return res_name if util.is_list(input): elem_schema = None if input_schema is not None: if not util.is_list(input_schema["_type"]): raise UserException( "unsupported input type (expected type {}, got {})".format( util.data_type_str(input_schema["_type"]), util.user_obj_str(input) ) ) elem_schema = input_schema["_type"][0] min_count = input_schema.get("_min_count") if min_count is not None and len(input) < min_count: raise UserException( "list has length {}, but the minimum allowed length is {}".format( len(input), min_count ) ) max_count = input_schema.get("_max_count") if max_count is not None and len(input) > max_count: raise UserException( "list has length {}, but the maximum allowed length is {}".format( len(input), max_count ) ) casted = [] for i, elem in enumerate(input): try: casted.append(self.populate_values(elem, elem_schema, preserve_column_refs)) except CortexException as e: e.wrap("index " + i) raise return casted if util.is_dict(input): if input_schema is None: casted = {} for key, val in input.items(): key_casted = self.populate_values(key, None, preserve_column_refs) try: val_casted = self.populate_values(val, None, preserve_column_refs) except CortexException as e: e.wrap(util.user_obj_str(key)) raise casted[key_casted] = val_casted return casted if not util.is_dict(input_schema["_type"]): raise UserException( "unsupported input type (expected type {}, got {})".format( util.data_type_str(input_schema["_type"]), util.user_obj_str(input) ) ) min_count = input_schema.get("_min_count") if min_count is not None and len(input) < min_count: raise UserException( "map has length {}, but the minimum allowed length is {}".format( len(input), min_count ) ) max_count = input_schema.get("_max_count") if max_count is not None and len(input) > max_count: raise UserException( "map has length {}, but the maximum allowed length is {}".format( len(input), max_count ) ) is_generic_map = False if len(input_schema["_type"]) == 1: input_type_key = next(iter(input_schema["_type"].keys())) if is_compound_type(input_type_key): is_generic_map = True generic_map_key_schema = input_schema_from_type_schema(input_type_key) generic_map_value = input_schema["_type"][input_type_key] if is_generic_map: casted = {} for key, val in input.items(): key_casted = self.populate_values( key, generic_map_key_schema, preserve_column_refs ) try: val_casted = self.populate_values( val, generic_map_value, preserve_column_refs ) except CortexException as e: e.wrap(util.user_obj_str(key)) raise casted[key_casted] = val_casted return casted # fixed map casted = {} for key, val_schema in input_schema["_type"].items(): if key in input: val = input[key] else: if val_schema.get("_optional") is not True: raise UserException("missing key: " + util.user_obj_str(key)) if val_schema.get("_default") is None: continue val = val_schema["_default"] try: val_casted = self.populate_values(val, val_schema, preserve_column_refs) except CortexException as e: e.wrap(util.user_obj_str(key)) raise casted[key] = val_casted return casted if input_schema is None: return input if not util.is_str(input_schema["_type"]): raise UserException( "unsupported input type (expected type {}, got {})".format( util.data_type_str(input_schema["_type"]), util.user_obj_str(input) ) ) return cast_compound_type(input, input_schema["_type"])
def predict(deployment_name, api_name): try: payload = request.get_json() except Exception as e: return "Malformed JSON", status.HTTP_400_BAD_REQUEST ctx = local_cache["ctx"] api = local_cache["api"] response = {} if not util.is_dict(payload) or "samples" not in payload: util.log_pretty(payload, logging_func=logger.error) return prediction_failed( payload, "top level `samples` key not found in request") logger.info("Predicting " + util.pluralize(len(payload["samples"]), "sample", "samples")) predictions = [] samples = payload["samples"] if not util.is_list(samples): util.log_pretty(samples, logging_func=logger.error) return prediction_failed( payload, "expected the value of key `samples` to be a list of json objects") for i, sample in enumerate(payload["samples"]): util.log_indent("sample {}".format(i + 1), 2) if util.is_resource_ref(api["model"]): is_valid, reason = is_valid_sample(sample) if not is_valid: return prediction_failed(sample, reason) for column in local_cache["required_inputs"]: column_type = ctx.get_inferred_column_type(column["name"]) sample[column["name"]] = util.upcast(sample[column["name"]], column_type) try: result = run_predict(sample) except CortexException as e: e.wrap("error", "sample {}".format(i + 1)) logger.error(str(e)) logger.exception( "An error occurred, see `cortex logs -v api {}` for more details." .format(api["name"])) return prediction_failed(sample, str(e)) except Exception as e: logger.exception( "An error occurred, see `cortex logs -v api {}` for more details." .format(api["name"])) # Show signature def for external models (since we don't validate input) schemaStr = "" signature_def = local_cache["metadata"]["signatureDef"] if (not util.is_resource_ref(api["model"]) and signature_def.get("predict") is not None # Just to be safe and signature_def["predict"].get("inputs") is not None # Just to be safe ): schemaStr = "\n\nExpected shema:\n" + util.pp_str( signature_def["predict"]["inputs"]) return prediction_failed(sample, str(e) + schemaStr) predictions.append(result) response["predictions"] = predictions response["resource_id"] = api["id"] return jsonify(response)
def predict(app_name, api_name): try: payload = request.get_json() except Exception as e: return "Malformed JSON", status.HTTP_400_BAD_REQUEST sess = local_cache["sess"] api = local_cache["api"] request_handler = local_cache.get("request_handler") input_metadata = local_cache["input_metadata"] output_metadata = local_cache["output_metadata"] response = {} if not util.is_dict(payload) or "samples" not in payload: util.log_pretty(payload, logging_func=logger.error) return prediction_failed( payload, "top level `samples` key not found in request") logger.info("Predicting " + util.pluralize(len(payload["samples"]), "sample", "samples")) predictions = [] samples = payload["samples"] if not util.is_list(samples): util.log_pretty(samples, logging_func=logger.error) return prediction_failed( payload, "expected the value of key `samples` to be a list of json objects") for i, sample in enumerate(payload["samples"]): util.log_indent("sample {}".format(i + 1), 2) try: util.log_indent("Raw sample:", indent=4) util.log_pretty(sample, indent=6) if request_handler is not None and util.has_function( request_handler, "pre_inference"): sample = request_handler.pre_inference(sample, input_metadata) inference_input = convert_to_onnx_input(sample, input_metadata) model_outputs = sess.run([], inference_input) result = [] for model_output in model_outputs: if type(model_output) is np.ndarray: result.append(model_output.tolist()) else: result.append(model_output) if request_handler is not None and util.has_function( request_handler, "post_inference"): result = request_handler.post_inference( result, output_metadata) util.log_indent("Prediction:", indent=4) util.log_pretty(result, indent=6) prediction = {"prediction": result} except CortexException as e: e.wrap("error", "sample {}".format(i + 1)) logger.error(str(e)) logger.exception( "An error occurred, see `cx logs -v api {}` for more details.". format(api["name"])) return prediction_failed(sample, str(e)) except Exception as e: logger.exception( "An error occurred, see `cx logs -v api {}` for more details.". format(api["name"])) return prediction_failed(sample, str(e)) predictions.append(prediction) response["predictions"] = predictions response["resource_id"] = api["id"] return jsonify(response)