def test_wide_model(pd_df, tmp): data, users, items = pd_df # Test wide model # Test if wide column has two original features and one crossed feature wide_columns, _ = build_feature_columns(users, items, model_type="wide", crossed_feat_dim=10) assert len(wide_columns) == 3 # Check crossed feature dimension assert wide_columns[2].hash_bucket_size == 10 # Check model type model = build_model(os.path.join(tmp, "wide_" + MODEL_DIR), wide_columns=wide_columns) assert isinstance(model, tf.estimator.LinearRegressor) # Test if model train works model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), steps=1, ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_wide_deep_model(pd_df, tmp): data, users, items = pd_df # Test if wide and deep columns have correct features wide_columns, deep_columns = build_feature_columns(users, items, model_type="wide_deep") assert len(wide_columns) == 3 assert len(deep_columns) == 2 # Check model type model = build_model( os.path.join(tmp, "wide_deep_" + MODEL_DIR), wide_columns=wide_columns, deep_columns=deep_columns, ) assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor) # Test if model train works model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), steps=1, ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_pandas_input_fn_for_saved_model(pd_df, tmp): """Test `export_model` and `pandas_input_fn_for_saved_model`""" data, users, items = pd_df model_dir = os.path.join(tmp, "model") export_dir = os.path.join(tmp, "export") _, deep_columns = build_feature_columns(users, items, model_type="deep") # Train a model model = build_model( model_dir, deep_columns=deep_columns, ) train_fn = pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True ) model.train(input_fn=train_fn, steps=1) # Test export_model function exported_path = export_model( model=model, train_input_fn=train_fn, eval_input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL), tf_feat_cols=deep_columns, base_dir=export_dir, ) saved_model = tf.saved_model.load(exported_path, tags="serve") # Test pandas_input_fn_for_saved_model with the saved model test = data.drop(DEFAULT_RATING_COL, axis=1) test.reset_index(drop=True, inplace=True) list( itertools.islice( saved_model.signatures["predict"]( examples=pandas_input_fn_for_saved_model( df=test, feat_name_type={ DEFAULT_USER_COL: int, DEFAULT_ITEM_COL: int, ITEM_FEAT_COL: list, }, )()["inputs"] ), len(test), ) ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.compat.v1.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_evaluation_log_hook(pd_df, tmp): data, users, items = pd_df # Run hook 10 times hook_frequency = 10 train_steps = 10 _, deep_columns = build_feature_columns(users, items, model_type="deep") model = build_model( tmp, deep_columns=deep_columns, save_checkpoints_steps=train_steps // hook_frequency, ) evaluation_logger = MetricsLogger() # Train a model w/ the hook hooks = [ evaluation_log_hook( model, logger=evaluation_logger, true_df=data, y_col=DEFAULT_RATING_COL, eval_df=data.drop(DEFAULT_RATING_COL, axis=1), every_n_iter=train_steps // hook_frequency, model_dir=tmp, eval_fns=[rmse], ) ] model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), hooks=hooks, steps=train_steps, ) # Check if hook logged the given metric assert rmse.__name__ in evaluation_logger.get_log() assert len(evaluation_logger.get_log()[rmse.__name__]) == hook_frequency # Close the event file so that the model folder can be cleaned up. summary_writer = tf.compat.v1.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_deep_model(pd_df, tmp): data, users, items = pd_df # Test if deep columns have user and item features _, deep_columns = build_feature_columns(users, items, model_type="deep") assert len(deep_columns) == 2 # Check model type model = build_model(os.path.join(tmp, "deep_" + MODEL_DIR), deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) # Test if model train works model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=1, shuffle=False)) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()