Exemple #1
0
def train(model: tf.keras.Model, train_data: List[Tuple[int, int, int]], test_data: List[Tuple[int, int, int]],
          topk_data: TopkData = None, optimizer=None, epochs=100, batch=512):
    if optimizer is None:
        optimizer = tf.keras.optimizers.Adam()

    train_ds, test_ds = prepare_ds(train_data, test_data, batch)

    loss_mean_metric = tf.keras.metrics.Mean()
    auc_metric = tf.keras.metrics.AUC()
    precision_metric = tf.keras.metrics.Precision()
    recall_metric = tf.keras.metrics.Recall()
    loss_object = tf.keras.losses.BinaryCrossentropy()
    if topk_data:
        score_fn = get_score_fn(model)

    def reset_metrics():
        for metric in [loss_mean_metric, auc_metric, precision_metric, recall_metric]:
            tf.py_function(metric.reset_states, [], [])

    def update_metrics(loss, label, score):
        loss_mean_metric.update_state(loss)
        auc_metric.update_state(label, score)
        precision_metric.update_state(label, score)
        recall_metric.update_state(label, score)

    def get_metric_results():
        return loss_mean_metric.result(), auc_metric.result(), precision_metric.result(), recall_metric.result()

    @tf.function
    def train_batch(ui, label):
        with tf.GradientTape() as tape:
            score = model(ui, training=True)
            loss = loss_object(label, score) + sum(model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        update_metrics(loss, label, score)

    @tf.function
    def test_batch(ui, label):
        score = model(ui)
        loss = loss_object(label, score) + sum(model.losses)
        update_metrics(loss, label, score)

    for epoch in range(epochs):
        epoch_start_time = time.time()

        reset_metrics()
        for ui, label in train_ds:
            train_batch(ui, label)
        train_loss, train_auc, train_precision, train_recall = get_metric_results()

        reset_metrics()
        for ui, label in test_ds:
            test_batch(ui, label)
        test_loss, test_auc, test_precision, test_recall = get_metric_results()

        log(epoch, train_loss, train_auc, train_precision, train_recall, test_loss, test_auc, test_precision, test_recall)
        if topk_data:
            topk(topk_data, score_fn)
        print('epoch_time=', time.time() - epoch_start_time, 's', sep='')
Exemple #2
0
def test(model: tf.keras.Model,
         train_data: List[Tuple[int, int, int]],
         test_data: List[Tuple[int, int, int]],
         topk_data: TopkData,
         loss_object=None,
         batch=512) -> None:
    """
    通用测试流程。

    :param model: 模型
    :param train_data: 训练集
    :param test_data: 测试集
    :param topk_data: 用于topk评估数据
    :param loss_object: 损失函数,默认为BinaryCrossentropy
    :param batch: 批数量
    """
    if loss_object is None:
        loss_object = tf.keras.losses.BinaryCrossentropy()

    train_ds, test_ds = prepare_ds(train_data, test_data, batch)
    train_loss, train_auc, train_precision, train_recall = _evaluate(
        model, train_ds, loss_object)
    test_loss, test_auc, test_precision, test_recall = _evaluate(
        model, test_ds, loss_object)
    log(-1, train_loss, train_auc, train_precision, train_recall, test_loss,
        test_auc, test_precision, test_recall)
    topk(topk_data, get_score_fn(model))
Exemple #3
0
    def train_model():
        def reset_metrics():
            for metric in [
                    loss_mean_metric, auc_metric, precision_metric,
                    recall_metric
            ]:
                tf.py_function(metric.reset_states, [], [])

        def update_metrics(loss, label, score):
            loss_mean_metric.update_state(loss)
            auc_metric.update_state(label, score)
            precision_metric.update_state(label, score)
            recall_metric.update_state(label, score)

        def get_metric_results():
            return loss_mean_metric.result(), auc_metric.result(
            ), precision_metric.result(), recall_metric.result()

        @tf.function
        def train_batch(ui, label):
            with tf.GradientTape() as tape:
                score, predicted_labels = model(ui, training=True)
                loss = loss_object(label, score) + loss_object(
                    label, predicted_labels) + sum(model.losses)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))
            update_metrics(loss, label, score)

        @tf.function
        def test_batch(ui, label):
            score, predicted_labels = model(ui)
            loss = loss_object(label, score) + loss_object(
                label, predicted_labels) + sum(model.losses)
            update_metrics(loss, label, score)

        for epoch in tf.range(epochs):
            epoch_start_time = time.time()

            reset_metrics()
            for ui, label in train_ds:
                train_batch(ui, label)
            train_loss, train_auc, train_precision, train_recall = get_metric_results(
            )

            reset_metrics()
            for ui, label in test_ds:
                test_batch(ui, label)
            test_loss, test_auc, test_precision, test_recall = get_metric_results(
            )

            tf.py_function(log, [
                epoch, train_loss, train_auc, train_precision, train_recall,
                test_loss, test_auc, test_precision, test_recall
            ], [])
            tf.py_function(lambda: topk(topk_data, score_fn), [], [])

            print('epoch_time=', time.time() - epoch_start_time, 's', sep='')
Exemple #4
0
def _train_graph(model, train_ds, test_ds, topk_data, optimizer, loss_object, epochs):
    score_fn = get_score_fn(model)

    @tf.function
    def train_batch(ui, label):
        with tf.GradientTape() as tape:
            score = tf.squeeze(model(ui, training=True))
            loss = loss_object(label, score) + sum(model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    for epoch in range(epochs):
        for ui, label in train_ds:
            train_batch(ui, label)

        train_loss, train_auc, train_precision, train_recall = _evaluate(model, train_ds, loss_object)
        test_loss, test_auc, test_precision, test_recall = _evaluate(model, test_ds, loss_object)

        log(epoch, train_loss, train_auc, train_precision, train_recall, test_loss, test_auc, test_precision, test_recall)
        topk(topk_data, score_fn)
def train(model: SLIM, topk_data: TopkData, l12=0.01, epochs=1000):
    optimizer = tf.keras.optimizers.Ftrl(l1_regularization_strength=l12,
                                         l2_regularization_strength=l12)
    score_fn = lambda ui: model(
        {k: tf.constant(v, dtype=tf.int32)
         for k, v in ui.items()})

    @tf.function
    def train_step():
        with tf.GradientTape() as tape:
            loss = model.loss(training=True) + sum(model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

    for epoch in range(1, epochs + 1):
        loss = train_step()
        if epoch == 1 or (epoch < 20 and epoch % 5 == 0) or (
                epoch < 100 and epoch % 20 == 0) or epoch % 100 == 0:
            print('epoch=', epoch, ', loss=', loss.numpy(), sep='')
            topk(topk_data, score_fn)
Exemple #6
0
    def on_epoch_end(self, epoch, logs=None):
        log(epoch, logs['loss'], logs['AUC'], logs['Precision'],
            logs['Recall'], logs['val_loss'], logs['val_AUC'],
            logs['val_Precision'], logs['val_Recall'])

        topk(self.topk_data, self.score_fn)
Exemple #7
0
if __name__ == '__main__':
    from Recommender_System.data import data_loader, data_process
    from Recommender_System.algorithm.ItemCF.tool import item_similarity, user_item_score
    from Recommender_System.algorithm.common import topk

    n_user, n_item, train_data, test_data, topk_data = data_process.pack(
        data_loader.ml100k, negative_sample_ratio=0)

    W = item_similarity(train_data, n_user, n_item)
    scores = user_item_score(train_data, n_user, n_item, W, N=10)

    score_fn = lambda ui: [
        scores[u][i] for u, i in zip(ui['user_id'], ui['item_id'])
    ]
    topk(topk_data, score_fn)
Exemple #8
0
    def on_epoch_end(self, epoch, logs=None):
        log(epoch, logs['loss'], logs['auc'], logs['precision'], logs['recall'],
            logs['val_loss'], logs['val_auc'], logs['val_precision'], logs['val_recall'])

        topk(self.topk_data, self.score_fn)