예제 #1
0
def test_build_model(pd_df):
    data, users, items = pd_df

    # Test wide model
    wide_columns, deep_columns = build_feature_columns(users,
                                                       items,
                                                       model_type='wide')
    model = build_model('wide_' + MODEL_DIR, wide_columns=wide_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)
    model = build_model('wide_' + MODEL_DIR,
                        wide_columns=wide_columns,
                        deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)

    # Test if model train works
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=10,
                                         shuffle=True))
    shutil.rmtree('wide_' + MODEL_DIR, ignore_errors=True)

    # Test deep model
    wide_columns, deep_columns = build_feature_columns(users,
                                                       items,
                                                       model_type='deep')
    model = build_model('deep_' + MODEL_DIR, deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)
    model = build_model('deep_' + MODEL_DIR,
                        wide_columns=wide_columns,
                        deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)

    # Test if model train works
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=10,
                                         shuffle=True))
    shutil.rmtree('deep_' + MODEL_DIR, ignore_errors=True)

    # Test wide_deep model
    wide_columns, deep_columns = build_feature_columns(users,
                                                       items,
                                                       model_type='wide_deep')
    model = build_model('wide_deep_' + MODEL_DIR,
                        wide_columns=wide_columns,
                        deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor)

    # Test if model train works
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=10,
                                         shuffle=True))
    shutil.rmtree('wide_deep_' + MODEL_DIR, ignore_errors=True)
예제 #2
0
def test_pandas_input_fn_for_saved_model(pd_df, tmp):
    """Test `export_model` and `pandas_input_fn_for_saved_model`"""
    data, users, items = pd_df
    model_dir = os.path.join(tmp, "model")
    export_dir = os.path.join(tmp, "export")

    _, deep_columns = build_feature_columns(users, items, model_type="deep")

    # Train a model
    model = build_model(
        model_dir,
        deep_columns=deep_columns,
    )
    train_fn = pandas_input_fn(df=data,
                               y_col=DEFAULT_RATING_COL,
                               batch_size=1,
                               num_epochs=None,
                               shuffle=True)
    model.train(input_fn=train_fn, steps=1)

    # Test export_model function
    exported_path = export_model(
        model=model,
        train_input_fn=train_fn,
        eval_input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL),
        tf_feat_cols=deep_columns,
        base_dir=export_dir,
    )
    saved_model = tf.contrib.estimator.SavedModelEstimator(exported_path)

    # Test pandas_input_fn_for_saved_model with the saved model
    test = data.drop(DEFAULT_RATING_COL, axis=1)
    test.reset_index(drop=True, inplace=True)
    list(
        itertools.islice(
            saved_model.predict(
                pandas_input_fn_for_saved_model(
                    df=test,
                    feat_name_type={
                        DEFAULT_USER_COL: int,
                        DEFAULT_ITEM_COL: int,
                        ITEM_FEAT_COL: list,
                    },
                )),
            len(test),
        ))

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
def test_wide_model(pd_df, tmp):
    data, users, items = pd_df

    # Test wide model
    # Test if wide column has two original features and one crossed feature
    wide_columns, _ = build_feature_columns(users,
                                            items,
                                            model_type="wide",
                                            crossed_feat_dim=10)
    assert len(wide_columns) == 3
    # Check crossed feature dimension
    assert wide_columns[2].hash_bucket_size == 10
    # Check model type
    model = build_model(os.path.join(tmp, "wide_" + MODEL_DIR),
                        wide_columns=wide_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)
    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        steps=1,
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
def test_wide_deep_model(pd_df, tmp):
    data, users, items = pd_df

    # Test if wide and deep columns have correct features
    wide_columns, deep_columns = build_feature_columns(users,
                                                       items,
                                                       model_type="wide_deep")
    assert len(wide_columns) == 3
    assert len(deep_columns) == 2
    # Check model type
    model = build_model(
        os.path.join(tmp, "wide_deep_" + MODEL_DIR),
        wide_columns=wide_columns,
        deep_columns=deep_columns,
    )
    assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor)
    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        steps=1,
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
예제 #5
0
def test_pandas_input_fn(pd_df):
    df, _, _ = pd_df

    # check dataset
    dataset = pandas_input_fn(df)()
    batch = dataset.make_one_shot_iterator().get_next()
    with tf.Session() as sess:
        features = sess.run(batch)

        # check the input function returns all the columns
        assert len(features) == len(df.columns)

        for k, v in features.items():
            assert k in df.columns.values
            # check if a list feature column converted correctly
            if len(v.shape) == 1:
                assert np.array_equal(v, df[k].values)
            elif len(v.shape) == 2:
                assert v.shape[1] == len(df[k][0])

    # check dataset with shuffles
    dataset = pandas_input_fn(df, shuffle=True, seed=SEED)()
    batch = dataset.make_one_shot_iterator().get_next()
    with tf.Session() as sess:
        features = sess.run(batch)
        print(features)
        # check the input function returns all the columns
        assert len(features) == len(df.columns)

        for k, v in features.items():
            assert k in df.columns.values
            # check if a list feature column converted correctly
            if len(v.shape) == 1:
                assert not np.array_equal(v, df[k].values)
            elif len(v.shape) == 2:
                assert v.shape[1] == len(df[k][0])

    # check dataset w/ label
    dataset_with_label = pandas_input_fn(df, y_col=DEFAULT_RATING_COL)()
    batch = dataset_with_label.make_one_shot_iterator().get_next()
    with tf.Session() as sess:
        features, label = sess.run(batch)
        assert (
            len(features) == len(df.columns) - 1
        )  # label should not be in the features
def test_pandas_input_fn(pd_df):
    df, _, _ = pd_df

    input_fn = pandas_input_fn(df)
    sample = input_fn()

    # check the input function returns all the columns
    assert len(df.columns) == len(sample)
    for k, v in sample.items():
        assert k in df.columns.values
        # check if a list feature column converted correctly
        if len(v.shape) == 2:
            assert v.shape[1] == len(df[k][0])

    input_fn_with_label = pandas_input_fn(df, y_col=DEFAULT_RATING_COL)
    X, y = input_fn_with_label()
    features = df.copy()
    features.pop(DEFAULT_RATING_COL)
    assert len(X) == len(features.columns)
def test_build_model(pd_df):
    data, users, items = pd_df

    # Test wide model
    wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide')
    model = build_model('wide_'+MODEL_DIR, wide_columns=wide_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)
    model = build_model('wide_'+MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)

    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True)
    )
    shutil.rmtree('wide_' + MODEL_DIR, ignore_errors=True)

    # Test deep model
    wide_columns, deep_columns = build_feature_columns(users, items, model_type='deep')
    model = build_model('deep_'+MODEL_DIR, deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)
    model = build_model('deep_'+MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)

    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True)
    )
    shutil.rmtree('deep_' + MODEL_DIR, ignore_errors=True)

    # Test wide_deep model
    wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide_deep')
    model = build_model('wide_deep_'+MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor)

    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True)
    )
    shutil.rmtree('wide_deep_'+MODEL_DIR, ignore_errors=True)
예제 #8
0
def test_evaluation_log_hook(pd_df):
    data, users, items = pd_df

    # Run hook 10 times
    hook_frequency = 10
    train_steps = 101

    _, deep_columns = build_feature_columns(users, items, model_type='deep')

    model = build_model('deep_' + MODEL_DIR,
                        deep_columns=deep_columns,
                        save_checkpoints_steps=train_steps // hook_frequency)

    class EvaluationLogger(Logger):
        def __init__(self):
            self.eval_log = {}

        def log(self, metric, value):
            if metric not in self.eval_log:
                self.eval_log[metric] = []
            self.eval_log[metric].append(value)

        def get_log(self):
            return self.eval_log

    evaluation_logger = EvaluationLogger()

    hooks = [
        evaluation_log_hook(
            model,
            logger=evaluation_logger,
            true_df=data,
            y_col=DEFAULT_RATING_COL,
            eval_df=data.drop(DEFAULT_RATING_COL, axis=1),
            every_n_iter=train_steps // hook_frequency,
            model_dir='deep_' + MODEL_DIR,
            eval_fns=[rmse],
        )
    ]
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=None,
                                         shuffle=True),
                hooks=hooks,
                steps=train_steps)
    shutil.rmtree('deep_' + MODEL_DIR, ignore_errors=True)

    # Check if hook logged the given metric
    assert rmse.__name__ in evaluation_logger.get_log()
    assert len(evaluation_logger.get_log()[rmse.__name__]) == hook_frequency
예제 #9
0
def test_evaluation_log_hook(pd_df, tmp):
    data, users, items = pd_df

    # Run hook 10 times
    hook_frequency = 10
    train_steps = 10

    _, deep_columns = build_feature_columns(users, items, model_type="deep")

    model = build_model(
        tmp,
        deep_columns=deep_columns,
        save_checkpoints_steps=train_steps // hook_frequency,
    )

    evaluation_logger = MetricsLogger()

    # Train a model w/ the hook
    hooks = [
        evaluation_log_hook(
            model,
            logger=evaluation_logger,
            true_df=data,
            y_col=DEFAULT_RATING_COL,
            eval_df=data.drop(DEFAULT_RATING_COL, axis=1),
            every_n_iter=train_steps // hook_frequency,
            model_dir=tmp,
            eval_fns=[rmse],
        )
    ]
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        hooks=hooks,
        steps=train_steps,
    )

    # Check if hook logged the given metric
    assert rmse.__name__ in evaluation_logger.get_log()
    assert len(evaluation_logger.get_log()[rmse.__name__]) == hook_frequency

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
def test_deep_model(pd_df, tmp):
    data, users, items = pd_df

    # Test if deep columns have user and item features
    _, deep_columns = build_feature_columns(users, items, model_type="deep")
    assert len(deep_columns) == 2
    # Check model type
    model = build_model(os.path.join(tmp, "deep_" + MODEL_DIR),
                        deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)
    # Test if model train works
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=1,
                                         shuffle=False))

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()
def test_wide_deep(pd_df, tmp):
    """Test `build_feature_columns` and `build_model`"""
    data, users, items = pd_df

    # Test wide model
    # Test if wide column has two original features and one crossed feature
    wide_columns, _ = build_feature_columns(users,
                                            items,
                                            model_type='wide',
                                            crossed_feat_dim=10)
    assert len(wide_columns) == 3
    # Check crossed feature dimension
    assert wide_columns[2].hash_bucket_size == 10
    # Check model type
    model = build_model(os.path.join(tmp, 'wide_' + MODEL_DIR),
                        wide_columns=wide_columns)
    assert isinstance(model, tf.estimator.LinearRegressor)
    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        steps=1,
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()

    # Test deep model
    # Test if deep columns have user and item features
    _, deep_columns = build_feature_columns(users, items, model_type='deep')
    assert len(deep_columns) == 2
    # Check model type
    model = build_model(os.path.join(tmp, 'deep_' + MODEL_DIR),
                        deep_columns=deep_columns)
    assert isinstance(model, tf.estimator.DNNRegressor)
    # Test if model train works
    model.train(input_fn=pandas_input_fn(df=data,
                                         y_col=DEFAULT_RATING_COL,
                                         batch_size=1,
                                         num_epochs=1,
                                         shuffle=False))

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()

    # Test wide and deep model
    # Test if wide and deep columns have correct features
    wide_columns, deep_columns = build_feature_columns(users,
                                                       items,
                                                       model_type='wide_deep')
    assert len(wide_columns) == 3
    assert len(deep_columns) == 2
    # Check model type
    model = build_model(
        os.path.join(tmp, 'wide_deep_' + MODEL_DIR),
        wide_columns=wide_columns,
        deep_columns=deep_columns,
    )
    assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor)
    # Test if model train works
    model.train(
        input_fn=pandas_input_fn(
            df=data,
            y_col=DEFAULT_RATING_COL,
            batch_size=1,
            num_epochs=None,
            shuffle=True,
        ),
        steps=1,
    )

    # Close the event file so that the model folder can be cleaned up.
    summary_writer = tf.summary.FileWriterCache.get(model.model_dir)
    summary_writer.close()