def train(model: tf.keras.Model, train_data: List[Tuple[int, int, int]], test_data: List[Tuple[int, int, int]], topk_data: TopkData = None, optimizer=None, epochs=100, batch=512): if optimizer is None: optimizer = tf.keras.optimizers.Adam() train_ds, test_ds = prepare_ds(train_data, test_data, batch) loss_mean_metric = tf.keras.metrics.Mean() auc_metric = tf.keras.metrics.AUC() precision_metric = tf.keras.metrics.Precision() recall_metric = tf.keras.metrics.Recall() loss_object = tf.keras.losses.BinaryCrossentropy() if topk_data: score_fn = get_score_fn(model) def reset_metrics(): for metric in [loss_mean_metric, auc_metric, precision_metric, recall_metric]: tf.py_function(metric.reset_states, [], []) def update_metrics(loss, label, score): loss_mean_metric.update_state(loss) auc_metric.update_state(label, score) precision_metric.update_state(label, score) recall_metric.update_state(label, score) def get_metric_results(): return loss_mean_metric.result(), auc_metric.result(), precision_metric.result(), recall_metric.result() @tf.function def train_batch(ui, label): with tf.GradientTape() as tape: score = model(ui, training=True) loss = loss_object(label, score) + sum(model.losses) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) update_metrics(loss, label, score) @tf.function def test_batch(ui, label): score = model(ui) loss = loss_object(label, score) + sum(model.losses) update_metrics(loss, label, score) for epoch in range(epochs): epoch_start_time = time.time() reset_metrics() for ui, label in train_ds: train_batch(ui, label) train_loss, train_auc, train_precision, train_recall = get_metric_results() reset_metrics() for ui, label in test_ds: test_batch(ui, label) test_loss, test_auc, test_precision, test_recall = get_metric_results() log(epoch, train_loss, train_auc, train_precision, train_recall, test_loss, test_auc, test_precision, test_recall) if topk_data: topk(topk_data, score_fn) print('epoch_time=', time.time() - epoch_start_time, 's', sep='')
def test(model: tf.keras.Model, train_data: List[Tuple[int, int, int]], test_data: List[Tuple[int, int, int]], topk_data: TopkData, loss_object=None, batch=512) -> None: """ 通用测试流程。 :param model: 模型 :param train_data: 训练集 :param test_data: 测试集 :param topk_data: 用于topk评估数据 :param loss_object: 损失函数,默认为BinaryCrossentropy :param batch: 批数量 """ if loss_object is None: loss_object = tf.keras.losses.BinaryCrossentropy() train_ds, test_ds = prepare_ds(train_data, test_data, batch) train_loss, train_auc, train_precision, train_recall = _evaluate( model, train_ds, loss_object) test_loss, test_auc, test_precision, test_recall = _evaluate( model, test_ds, loss_object) log(-1, train_loss, train_auc, train_precision, train_recall, test_loss, test_auc, test_precision, test_recall) topk(topk_data, get_score_fn(model))
def train_model(): def reset_metrics(): for metric in [ loss_mean_metric, auc_metric, precision_metric, recall_metric ]: tf.py_function(metric.reset_states, [], []) def update_metrics(loss, label, score): loss_mean_metric.update_state(loss) auc_metric.update_state(label, score) precision_metric.update_state(label, score) recall_metric.update_state(label, score) def get_metric_results(): return loss_mean_metric.result(), auc_metric.result( ), precision_metric.result(), recall_metric.result() @tf.function def train_batch(ui, label): with tf.GradientTape() as tape: score, predicted_labels = model(ui, training=True) loss = loss_object(label, score) + loss_object( label, predicted_labels) + sum(model.losses) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) update_metrics(loss, label, score) @tf.function def test_batch(ui, label): score, predicted_labels = model(ui) loss = loss_object(label, score) + loss_object( label, predicted_labels) + sum(model.losses) update_metrics(loss, label, score) for epoch in tf.range(epochs): epoch_start_time = time.time() reset_metrics() for ui, label in train_ds: train_batch(ui, label) train_loss, train_auc, train_precision, train_recall = get_metric_results( ) reset_metrics() for ui, label in test_ds: test_batch(ui, label) test_loss, test_auc, test_precision, test_recall = get_metric_results( ) tf.py_function(log, [ epoch, train_loss, train_auc, train_precision, train_recall, test_loss, test_auc, test_precision, test_recall ], []) tf.py_function(lambda: topk(topk_data, score_fn), [], []) print('epoch_time=', time.time() - epoch_start_time, 's', sep='')
def _train_graph(model, train_ds, test_ds, topk_data, optimizer, loss_object, epochs): score_fn = get_score_fn(model) @tf.function def train_batch(ui, label): with tf.GradientTape() as tape: score = tf.squeeze(model(ui, training=True)) loss = loss_object(label, score) + sum(model.losses) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) for epoch in range(epochs): for ui, label in train_ds: train_batch(ui, label) train_loss, train_auc, train_precision, train_recall = _evaluate(model, train_ds, loss_object) test_loss, test_auc, test_precision, test_recall = _evaluate(model, test_ds, loss_object) log(epoch, train_loss, train_auc, train_precision, train_recall, test_loss, test_auc, test_precision, test_recall) topk(topk_data, score_fn)
def train(model: SLIM, topk_data: TopkData, l12=0.01, epochs=1000): optimizer = tf.keras.optimizers.Ftrl(l1_regularization_strength=l12, l2_regularization_strength=l12) score_fn = lambda ui: model( {k: tf.constant(v, dtype=tf.int32) for k, v in ui.items()}) @tf.function def train_step(): with tf.GradientTape() as tape: loss = model.loss(training=True) + sum(model.losses) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return loss for epoch in range(1, epochs + 1): loss = train_step() if epoch == 1 or (epoch < 20 and epoch % 5 == 0) or ( epoch < 100 and epoch % 20 == 0) or epoch % 100 == 0: print('epoch=', epoch, ', loss=', loss.numpy(), sep='') topk(topk_data, score_fn)
def on_epoch_end(self, epoch, logs=None): log(epoch, logs['loss'], logs['AUC'], logs['Precision'], logs['Recall'], logs['val_loss'], logs['val_AUC'], logs['val_Precision'], logs['val_Recall']) topk(self.topk_data, self.score_fn)
if __name__ == '__main__': from Recommender_System.data import data_loader, data_process from Recommender_System.algorithm.ItemCF.tool import item_similarity, user_item_score from Recommender_System.algorithm.common import topk n_user, n_item, train_data, test_data, topk_data = data_process.pack( data_loader.ml100k, negative_sample_ratio=0) W = item_similarity(train_data, n_user, n_item) scores = user_item_score(train_data, n_user, n_item, W, N=10) score_fn = lambda ui: [ scores[u][i] for u, i in zip(ui['user_id'], ui['item_id']) ] topk(topk_data, score_fn)
def on_epoch_end(self, epoch, logs=None): log(epoch, logs['loss'], logs['auc'], logs['precision'], logs['recall'], logs['val_loss'], logs['val_auc'], logs['val_precision'], logs['val_recall']) topk(self.topk_data, self.score_fn)