Beispiel #1
0
def get_label_placeholder(model_name, ctx):
    model = ctx.models[model_name]

    target_column_name = util.get_resource_ref(model["target_column"])
    column_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[target_column_name]
                                                ["type"]]
    return tf.placeholder(shape=[None], dtype=column_type)
Beispiel #2
0
    def model_config(self, model_name):
        model = self.models[model_name]
        if model is None:
            return None
        estimator = self.estimators[model["estimator"]]
        target_column = self.columns[util.get_resource_ref(model["target_column"])]

        if estimator.get("target_column") is not None:
            target_col_type = self.get_inferred_column_type(target_column["name"])
            if target_col_type not in estimator["target_column"]:
                raise UserException(
                    "model " + model_name,
                    "target_column",
                    target_column["name"],
                    "unsupported type (expected type {}, got type {})".format(
                        util.data_type_str(estimator["target_column"]),
                        util.data_type_str(target_col_type),
                    ),
                )

        model_config = deepcopy(model)
        config_keys = [
            "name",
            "estimator"
            "estimator_path"
            "target_column"
            "input"
            "training_input"
            "hparams"
            "prediction_key"
            "data_partition_ratio"
            "training"
            "evaluation"
            "tags",
        ]
        util.keep_dict_keys(model_config, config_keys)

        model_config["target_column"] = target_column["name"]
        model_config["input"] = self.populate_values(
            model["input"], estimator["input"], preserve_column_refs=False
        )
        if model.get("training_input") is not None:
            model_config["training_input"] = self.populate_values(
                model["training_input"], estimator["training_input"], preserve_column_refs=False
            )
        if model.get("hparams") is not None:
            model_config["hparams"] = self.populate_values(
                model["hparams"], estimator["hparams"], preserve_column_refs=False
            )

        return model_config
Beispiel #3
0
def create_transformer_inputs_from_map(input, col_value_map):
    if util.is_str(input):
        if util.is_resource_ref(input):
            res_name = util.get_resource_ref(input)
            return col_value_map[res_name]
        return input

    if util.is_list(input):
        replaced = []
        for item in input:
            replaced.append(create_transformer_inputs_from_map(item, col_value_map))
        return replaced

    if util.is_dict(input):
        replaced = {}
        for key, val in input.items():
            key_replaced = create_transformer_inputs_from_map(key, col_value_map)
            val_replaced = create_transformer_inputs_from_map(val, col_value_map)
            replaced[key_replaced] = val_replaced
        return replaced

    return input
Beispiel #4
0
def read_parquet(ctx, spark):
    parquet_config = ctx.environment["data"]
    df = spark.read.parquet(parquet_config["path"])

    alias_map = {}
    for parquet_col_config in parquet_config["schema"]:
        col_name = util.get_resource_ref(parquet_col_config["raw_column"])
        if col_name in ctx.raw_columns:
            alias_map[col_name] = parquet_col_config["parquet_column_name"]

    missing_cols = set(alias_map.keys()) - set(df.columns)
    if len(missing_cols) > 0:
        logger.error("found schema:")
        log_df_schema(df, logger.error)
        raise UserException("missing column(s) in input dataset",
                            str(missing_cols))

    selectExprs = [
        "{} as {}".format(parq_name, col_name)
        for col_name, parq_name in alias_map.items()
    ]

    return df.selectExpr(*selectExprs)
Beispiel #5
0
def read_csv(ctx, spark):
    data_config = ctx.environment["data"]

    csv_config = {
        util.snake_to_camel(param_name): val
        for param_name, val in data_config.get("csv_config", {}).items()
        if val is not None
    }

    df = spark.read.csv(data_config["path"],
                        inferSchema=True,
                        mode="FAILFAST",
                        **csv_config)
    if len(data_config["schema"]) != len(df.columns):
        raise UserException("expected " + len(data_config["schema"]) +
                            " column(s) but got " + len(df.columns))

    col_names = [
        util.get_resource_ref(col_ref) for col_ref in data_config["schema"]
    ]
    renamed_cols = [
        F.col(c).alias(col_names[idx]) for idx, c in enumerate(df.columns)
    ]
    return df.select(*renamed_cols)
Beispiel #6
0
def start(args):
    ctx = Context(s3_path=args.context,
                  cache_dir=args.cache_dir,
                  workload_id=args.workload_id)

    api = ctx.apis_id_map[args.api]
    local_cache["api"] = api
    local_cache["ctx"] = ctx

    try:
        if api.get("request_handler_impl_key") is not None:
            local_cache["request_handler"] = ctx.get_request_handler_impl(
                api["name"])

        if not util.is_resource_ref(api["model"]):
            if api.get("request_handler") is not None:
                package.install_packages(ctx.python_packages, ctx.storage)
            if not os.path.isdir(args.model_dir):
                ctx.storage.download_and_unzip_external(
                    api["model"], args.model_dir)
        else:
            package.install_packages(ctx.python_packages, ctx.storage)
            model_name = util.get_resource_ref(api["model"])
            model = ctx.models[model_name]
            estimator = ctx.estimators[model["estimator"]]

            local_cache["model"] = model
            local_cache["estimator"] = estimator
            local_cache["target_col"] = ctx.columns[util.get_resource_ref(
                model["target_column"])]
            local_cache["target_col_type"] = ctx.get_inferred_column_type(
                util.get_resource_ref(model["target_column"]))

            log_level = "DEBUG"
            if ctx.environment is not None and ctx.environment.get(
                    "log_level") is not None:
                log_level = ctx.environment["log_level"].get(
                    "tensorflow", "DEBUG")
            tf_lib.set_logging_verbosity(log_level)

            if not os.path.isdir(args.model_dir):
                ctx.storage.download_and_unzip(model["key"], args.model_dir)

            for column_name in ctx.extract_column_names(
                [model["input"], model["target_column"]]):
                if ctx.is_transformed_column(column_name):
                    trans_impl, _ = ctx.get_transformer_impl(column_name)
                    local_cache["trans_impls"][column_name] = trans_impl
                    transformed_column = ctx.transformed_columns[column_name]

                    # cache aggregate values
                    for resource_name in util.extract_resource_refs(
                            transformed_column["input"]):
                        if resource_name in ctx.aggregates:
                            ctx.get_obj(ctx.aggregates[resource_name]["key"])

            local_cache["required_inputs"] = tf_lib.get_base_input_columns(
                model["name"], ctx)

            if util.is_dict(model["input"]) and model["input"].get(
                    "target_vocab") is not None:
                local_cache["target_vocab_populated"] = ctx.populate_values(
                    model["input"]["target_vocab"], None, False)
    except CortexException as e:
        e.wrap("error")
        logger.error(str(e))
        logger.exception(
            "An error occurred, see `cortex logs -v api {}` for more details.".
            format(api["name"]))
        sys.exit(1)
    except Exception as e:
        logger.exception(
            "An error occurred, see `cortex logs -v api {}` for more details.".
            format(api["name"]))
        sys.exit(1)

    try:
        validate_model_dir(args.model_dir)
    except Exception as e:
        logger.exception(e)
        sys.exit(1)

    channel = grpc.insecure_channel("localhost:" + str(args.tf_serve_port))
    local_cache["stub"] = prediction_service_pb2_grpc.PredictionServiceStub(
        channel)

    # wait a bit for tf serving to start before querying metadata
    limit = 300
    for i in range(limit):
        try:
            local_cache["metadata"] = run_get_model_metadata()
            break
        except Exception as e:
            if i == limit - 1:
                logger.exception(
                    "An error occurred, see `cortex logs -v api {}` for more details."
                    .format(api["name"]))
                sys.exit(1)

        time.sleep(1)

    serve(app, listen="*:{}".format(args.port))
Beispiel #7
0
    def populate_values(self, input, input_schema, preserve_column_refs):
        if input is None:
            if input_schema is None:
                return None
            if input_schema.get("_allow_null") == True:
                return None
            raise UserException("Null value is not allowed")

        if util.is_resource_ref(input):
            res_name = util.get_resource_ref(input)
            if res_name in self.constants:
                if self.constants[res_name].get("value") is not None:
                    const_val = self.constants[res_name]["value"]
                elif self.constants[res_name].get("path") is not None:
                    const_val = self.storage.get_json_external(self.constants[res_name]["path"])
                try:
                    return self.populate_values(const_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("constant " + res_name)
                    raise

            if res_name in self.aggregates:
                agg_val = self.get_obj(self.aggregates[res_name]["key"])
                try:
                    return self.populate_values(agg_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("aggregate " + res_name)
                    raise

            if res_name in self.columns:
                if input_schema is not None:
                    col_type = self.get_inferred_column_type(res_name)
                    if col_type not in input_schema["_type"]:
                        raise UserException(
                            "column {}: unsupported input type (expected type {}, got type {})".format(
                                res_name,
                                util.data_type_str(input_schema["_type"]),
                                util.data_type_str(col_type),
                            )
                        )
                if preserve_column_refs:
                    return input
                else:
                    return res_name

        if util.is_list(input):
            elem_schema = None
            if input_schema is not None:
                if not util.is_list(input_schema["_type"]):
                    raise UserException(
                        "unsupported input type (expected type {}, got {})".format(
                            util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                        )
                    )
                elem_schema = input_schema["_type"][0]

                min_count = input_schema.get("_min_count")
                if min_count is not None and len(input) < min_count:
                    raise UserException(
                        "list has length {}, but the minimum allowed length is {}".format(
                            len(input), min_count
                        )
                    )

                max_count = input_schema.get("_max_count")
                if max_count is not None and len(input) > max_count:
                    raise UserException(
                        "list has length {}, but the maximum allowed length is {}".format(
                            len(input), max_count
                        )
                    )

            casted = []
            for i, elem in enumerate(input):
                try:
                    casted.append(self.populate_values(elem, elem_schema, preserve_column_refs))
                except CortexException as e:
                    e.wrap("index " + i)
                    raise
            return casted

        if util.is_dict(input):
            if input_schema is None:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(key, None, preserve_column_refs)
                    try:
                        val_casted = self.populate_values(val, None, preserve_column_refs)
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            if not util.is_dict(input_schema["_type"]):
                raise UserException(
                    "unsupported input type (expected type {}, got {})".format(
                        util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                    )
                )

            min_count = input_schema.get("_min_count")
            if min_count is not None and len(input) < min_count:
                raise UserException(
                    "map has length {}, but the minimum allowed length is {}".format(
                        len(input), min_count
                    )
                )

            max_count = input_schema.get("_max_count")
            if max_count is not None and len(input) > max_count:
                raise UserException(
                    "map has length {}, but the maximum allowed length is {}".format(
                        len(input), max_count
                    )
                )

            is_generic_map = False
            if len(input_schema["_type"]) == 1:
                input_type_key = next(iter(input_schema["_type"].keys()))
                if is_compound_type(input_type_key):
                    is_generic_map = True
                    generic_map_key_schema = input_schema_from_type_schema(input_type_key)
                    generic_map_value = input_schema["_type"][input_type_key]

            if is_generic_map:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(
                        key, generic_map_key_schema, preserve_column_refs
                    )
                    try:
                        val_casted = self.populate_values(
                            val, generic_map_value, preserve_column_refs
                        )
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            # fixed map
            casted = {}
            for key, val_schema in input_schema["_type"].items():
                if key in input:
                    val = input[key]
                else:
                    if val_schema.get("_optional") is not True:
                        raise UserException("missing key: " + util.user_obj_str(key))
                    if val_schema.get("_default") is None:
                        continue
                    val = val_schema["_default"]

                try:
                    val_casted = self.populate_values(val, val_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap(util.user_obj_str(key))
                    raise
                casted[key] = val_casted
            return casted

        if input_schema is None:
            return input
        if not util.is_str(input_schema["_type"]):
            raise UserException(
                "unsupported input type (expected type {}, got {})".format(
                    util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                )
            )
        return cast_compound_type(input, input_schema["_type"])
Beispiel #8
0
 def _parse_example(example_proto):
     features = tf.parse_single_example(serialized=example_proto,
                                        features=feature_spec)
     target = features.pop(util.get_resource_ref(model["target_column"]),
                           None)
     return features, target
Beispiel #9
0
def train(model_name, estimator_impl, ctx, model_dir):
    model = ctx.models[model_name]

    util.mkdir_p(model_dir)
    util.rm_dir(model_dir)

    tf_lib.set_logging_verbosity(ctx.environment["log_level"]["tensorflow"])

    run_config = tf.estimator.RunConfig(
        tf_random_seed=model["training"]["tf_random_seed"],
        save_summary_steps=model["training"]["save_summary_steps"],
        save_checkpoints_secs=model["training"]["save_checkpoints_secs"],
        save_checkpoints_steps=model["training"]["save_checkpoints_steps"],
        log_step_count_steps=model["training"]["log_step_count_steps"],
        keep_checkpoint_max=model["training"]["keep_checkpoint_max"],
        keep_checkpoint_every_n_hours=model["training"]
        ["keep_checkpoint_every_n_hours"],
        model_dir=model_dir,
    )

    train_input_fn = generate_input_fn(model_name, ctx, "training",
                                       estimator_impl)
    eval_input_fn = generate_input_fn(model_name, ctx, "evaluation",
                                      estimator_impl)
    serving_input_fn = generate_json_serving_input_fn(model_name, ctx,
                                                      estimator_impl)
    exporter = tf.estimator.FinalExporter("estimator",
                                          serving_input_fn,
                                          as_text=False)

    train_num_steps = model["training"]["num_steps"]
    dataset_metadata = ctx.get_metadata(model["dataset"]["id"])
    if model["training"]["num_epochs"]:
        train_num_steps = (math.ceil(dataset_metadata["training_size"] /
                                     float(model["training"]["batch_size"])) *
                           model["training"]["num_epochs"])

    train_spec = tf.estimator.TrainSpec(train_input_fn,
                                        max_steps=train_num_steps)

    eval_num_steps = model["evaluation"]["num_steps"]
    if model["evaluation"]["num_epochs"]:
        eval_num_steps = (math.ceil(dataset_metadata["eval_size"] /
                                    float(model["evaluation"]["batch_size"])) *
                          model["evaluation"]["num_epochs"])

    eval_spec = tf.estimator.EvalSpec(
        eval_input_fn,
        steps=eval_num_steps,
        exporters=[exporter],
        name="estimator-eval",
        start_delay_secs=model["evaluation"]["start_delay_secs"],
        throttle_secs=model["evaluation"]["throttle_secs"],
    )

    model_config = ctx.model_config(model_name)

    try:
        tf_estimator = estimator_impl.create_estimator(run_config,
                                                       model_config)
    except Exception as e:
        raise UserRuntimeException("model " + model_name) from e

    target_col_name = util.get_resource_ref(model["target_column"])
    if ctx.get_inferred_column_type(
            target_col_name) == consts.COLUMN_TYPE_FLOAT:
        tf_estimator = tf.contrib.estimator.add_metrics(
            tf_estimator, get_regression_eval_metrics)

    tf.estimator.train_and_evaluate(tf_estimator, train_spec, eval_spec)

    return model_dir