def test_wide_model(pd_df, tmp):
    data, users, items = pd_df

    # Test wide model
    # Test if wide column has two original features and one crossed feature
    wide_columns, _ = build_feature_columns(users,
                                            items,
                                            model_type="wide",
                                            crossed_feat_dim=10)
    assert len(wide_columns) == 3
    # Check crossed feature dimension
    assert wide_columns[2].hash_bucket_size == 10
    # Check model type
    model = build_model(os.path.join(tmp, "wide_" + MODEL_DIR),
                        wide_columns=wide_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)
    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        steps=1,
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
def test_wide_deep_model(pd_df, tmp):
    data, users, items = pd_df

    # Test if wide and deep columns have correct features
    wide_columns, deep_columns = build_feature_columns(users,
                                                       items,
                                                       model_type="wide_deep")
    assert len(wide_columns) == 3
    assert len(deep_columns) == 2
    # Check model type
    model = build_model(
        os.path.join(tmp, "wide_deep_" + MODEL_DIR),
        wide_columns=wide_columns,
        deep_columns=deep_columns,
    )
    assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor)
    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        steps=1,
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
Esempio n. 3
0
def test_pandas_input_fn_for_saved_model(pd_df, tmp):
    """Test `export_model` and `pandas_input_fn_for_saved_model`"""
    data, users, items = pd_df
    model_dir = os.path.join(tmp, "model")
    export_dir = os.path.join(tmp, "export")

    _, deep_columns = build_feature_columns(users, items, model_type="deep")

    # Train a model
    model = build_model(
        model_dir,
        deep_columns=deep_columns,
    )
    train_fn = pandas_input_fn(
        df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True
    )
    model.train(input_fn=train_fn, steps=1)

    # Test export_model function
    exported_path = export_model(
        model=model,
        train_input_fn=train_fn,
        eval_input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL),
        tf_feat_cols=deep_columns,
        base_dir=export_dir,
    )
    saved_model = tf.saved_model.load(exported_path, tags="serve")

    # Test pandas_input_fn_for_saved_model with the saved model
    test = data.drop(DEFAULT_RATING_COL, axis=1)
    test.reset_index(drop=True, inplace=True)
    list(
        itertools.islice(
            saved_model.signatures["predict"](
                examples=pandas_input_fn_for_saved_model(
                    df=test,
                    feat_name_type={
                        DEFAULT_USER_COL: int,
                        DEFAULT_ITEM_COL: int,
                        ITEM_FEAT_COL: list,
                    },
                )()["inputs"]
            ),
            len(test),
        )
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.compat.v1.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
Esempio n. 4
0
def test_evaluation_log_hook(pd_df, tmp):
    data, users, items = pd_df

    # Run hook 10 times
    hook_frequency = 10
    train_steps = 10

    _, deep_columns = build_feature_columns(users, items, model_type="deep")

    model = build_model(
        tmp,
        deep_columns=deep_columns,
        save_checkpoints_steps=train_steps // hook_frequency,
    )

    evaluation_logger = MetricsLogger()

    # Train a model w/ the hook
    hooks = [
        evaluation_log_hook(
            model,
            logger=evaluation_logger,
            true_df=data,
            y_col=DEFAULT_RATING_COL,
            eval_df=data.drop(DEFAULT_RATING_COL, axis=1),
            every_n_iter=train_steps // hook_frequency,
            model_dir=tmp,
            eval_fns=[rmse],
        )
    ]
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        hooks=hooks,
        steps=train_steps,
    )

    # Check if hook logged the given metric
    assert rmse.__name__ in evaluation_logger.get_log()
    assert len(evaluation_logger.get_log()[rmse.__name__]) == hook_frequency

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.compat.v1.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
def test_deep_model(pd_df, tmp):
    data, users, items = pd_df

    # Test if deep columns have user and item features
    _, deep_columns = build_feature_columns(users, items, model_type="deep")
    assert len(deep_columns) == 2
    # Check model type
    model = build_model(os.path.join(tmp, "deep_" + MODEL_DIR),
                        deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)
    # Test if model train works
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=1,
                                         shuffle=False))

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()