def test_build_model(pd_df): data, users, items = pd_df # Test wide model wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide') model = build_model('wide_' + MODEL_DIR, wide_columns=wide_columns) assert isinstance(model, tf.estimator.LinearRegressor) model = build_model('wide_' + MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns) assert isinstance(model, tf.estimator.LinearRegressor) # Test if model train works model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True)) shutil.rmtree('wide_' + MODEL_DIR, ignore_errors=True) # Test deep model wide_columns, deep_columns = build_feature_columns(users, items, model_type='deep') model = build_model('deep_' + MODEL_DIR, deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) model = build_model('deep_' + MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) # Test if model train works model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True)) shutil.rmtree('deep_' + MODEL_DIR, ignore_errors=True) # Test wide_deep model wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide_deep') model = build_model('wide_deep_' + MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor) # Test if model train works model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True)) shutil.rmtree('wide_deep_' + MODEL_DIR, ignore_errors=True)
def test_wide_model(pd_df, tmp): data, users, items = pd_df # Test wide model # Test if wide column has two original features and one crossed feature wide_columns, _ = build_feature_columns(users, items, model_type="wide", crossed_feat_dim=10) assert len(wide_columns) == 3 # Check crossed feature dimension assert wide_columns[2].hash_bucket_size == 10 # Check model type model = build_model(os.path.join(tmp, "wide_" + MODEL_DIR), wide_columns=wide_columns) assert isinstance(model, tf.estimator.LinearRegressor) # Test if model train works model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), steps=1, ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_wide_deep_model(pd_df, tmp): data, users, items = pd_df # Test if wide and deep columns have correct features wide_columns, deep_columns = build_feature_columns(users, items, model_type="wide_deep") assert len(wide_columns) == 3 assert len(deep_columns) == 2 # Check model type model = build_model( os.path.join(tmp, "wide_deep_" + MODEL_DIR), wide_columns=wide_columns, deep_columns=deep_columns, ) assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor) # Test if model train works model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), steps=1, ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_build_model(pd_df): data, users, items = pd_df # Test wide model wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide') model = build_model('wide_'+MODEL_DIR, wide_columns=wide_columns) assert isinstance(model, tf.estimator.LinearRegressor) model = build_model('wide_'+MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns) assert isinstance(model, tf.estimator.LinearRegressor) # Test if model train works model.train( input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True) ) shutil.rmtree('wide_' + MODEL_DIR, ignore_errors=True) # Test deep model wide_columns, deep_columns = build_feature_columns(users, items, model_type='deep') model = build_model('deep_'+MODEL_DIR, deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) model = build_model('deep_'+MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) # Test if model train works model.train( input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True) ) shutil.rmtree('deep_' + MODEL_DIR, ignore_errors=True) # Test wide_deep model wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide_deep') model = build_model('wide_deep_'+MODEL_DIR, wide_columns=wide_columns, deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor) # Test if model train works model.train( input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=10, shuffle=True) ) shutil.rmtree('wide_deep_'+MODEL_DIR, ignore_errors=True)
def test_evaluation_log_hook(pd_df): data, users, items = pd_df # Run hook 10 times hook_frequency = 10 train_steps = 101 _, deep_columns = build_feature_columns(users, items, model_type='deep') model = build_model('deep_' + MODEL_DIR, deep_columns=deep_columns, save_checkpoints_steps=train_steps // hook_frequency) class EvaluationLogger(Logger): def __init__(self): self.eval_log = {} def log(self, metric, value): if metric not in self.eval_log: self.eval_log[metric] = [] self.eval_log[metric].append(value) def get_log(self): return self.eval_log evaluation_logger = EvaluationLogger() hooks = [ evaluation_log_hook( model, logger=evaluation_logger, true_df=data, y_col=DEFAULT_RATING_COL, eval_df=data.drop(DEFAULT_RATING_COL, axis=1), every_n_iter=train_steps // hook_frequency, model_dir='deep_' + MODEL_DIR, eval_fns=[rmse], ) ] model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True), hooks=hooks, steps=train_steps) shutil.rmtree('deep_' + MODEL_DIR, ignore_errors=True) # Check if hook logged the given metric assert rmse.__name__ in evaluation_logger.get_log() assert len(evaluation_logger.get_log()[rmse.__name__]) == hook_frequency
def test_pandas_input_fn_for_saved_model(pd_df, tmp): """Test `export_model` and `pandas_input_fn_for_saved_model`""" data, users, items = pd_df model_dir = os.path.join(tmp, "model") export_dir = os.path.join(tmp, "export") _, deep_columns = build_feature_columns(users, items, model_type="deep") # Train a model model = build_model( model_dir, deep_columns=deep_columns, ) train_fn = pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True) model.train(input_fn=train_fn, steps=1) # Test export_model function exported_path = export_model( model=model, train_input_fn=train_fn, eval_input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL), tf_feat_cols=deep_columns, base_dir=export_dir, ) saved_model = tf.contrib.estimator.SavedModelEstimator(exported_path) # Test pandas_input_fn_for_saved_model with the saved model test = data.drop(DEFAULT_RATING_COL, axis=1) test.reset_index(drop=True, inplace=True) list( itertools.islice( saved_model.predict( pandas_input_fn_for_saved_model( df=test, feat_name_type={ DEFAULT_USER_COL: int, DEFAULT_ITEM_COL: int, ITEM_FEAT_COL: list, }, )), len(test), )) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_evaluation_log_hook(pd_df, tmp): data, users, items = pd_df # Run hook 10 times hook_frequency = 10 train_steps = 10 _, deep_columns = build_feature_columns(users, items, model_type="deep") model = build_model( tmp, deep_columns=deep_columns, save_checkpoints_steps=train_steps // hook_frequency, ) evaluation_logger = MetricsLogger() # Train a model w/ the hook hooks = [ evaluation_log_hook( model, logger=evaluation_logger, true_df=data, y_col=DEFAULT_RATING_COL, eval_df=data.drop(DEFAULT_RATING_COL, axis=1), every_n_iter=train_steps // hook_frequency, model_dir=tmp, eval_fns=[rmse], ) ] model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), hooks=hooks, steps=train_steps, ) # Check if hook logged the given metric assert rmse.__name__ in evaluation_logger.get_log() assert len(evaluation_logger.get_log()[rmse.__name__]) == hook_frequency # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_deep_model(pd_df, tmp): data, users, items = pd_df # Test if deep columns have user and item features _, deep_columns = build_feature_columns(users, items, model_type="deep") assert len(deep_columns) == 2 # Check model type model = build_model(os.path.join(tmp, "deep_" + MODEL_DIR), deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) # Test if model train works model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=1, shuffle=False)) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()
def test_wide_deep(pd_df, tmp): """Test `build_feature_columns` and `build_model`""" data, users, items = pd_df # Test wide model # Test if wide column has two original features and one crossed feature wide_columns, _ = build_feature_columns(users, items, model_type='wide', crossed_feat_dim=10) assert len(wide_columns) == 3 # Check crossed feature dimension assert wide_columns[2].hash_bucket_size == 10 # Check model type model = build_model(os.path.join(tmp, 'wide_' + MODEL_DIR), wide_columns=wide_columns) assert isinstance(model, tf.estimator.LinearRegressor) # Test if model train works model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), steps=1, ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close() # Test deep model # Test if deep columns have user and item features _, deep_columns = build_feature_columns(users, items, model_type='deep') assert len(deep_columns) == 2 # Check model type model = build_model(os.path.join(tmp, 'deep_' + MODEL_DIR), deep_columns=deep_columns) assert isinstance(model, tf.estimator.DNNRegressor) # Test if model train works model.train(input_fn=pandas_input_fn(df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=1, shuffle=False)) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close() # Test wide and deep model # Test if wide and deep columns have correct features wide_columns, deep_columns = build_feature_columns(users, items, model_type='wide_deep') assert len(wide_columns) == 3 assert len(deep_columns) == 2 # Check model type model = build_model( os.path.join(tmp, 'wide_deep_' + MODEL_DIR), wide_columns=wide_columns, deep_columns=deep_columns, ) assert isinstance(model, tf.estimator.DNNLinearCombinedRegressor) # Test if model train works model.train( input_fn=pandas_input_fn( df=data, y_col=DEFAULT_RATING_COL, batch_size=1, num_epochs=None, shuffle=True, ), steps=1, ) # Close the event file so that the model folder can be cleaned up. summary_writer = tf.summary.FileWriterCache.get(model.model_dir) summary_writer.close()