Beispiel #1
0
def run_ncf(_):
    """Run NCF training and eval with Keras."""
    # TODO(seemuch): Support different train and eval batch sizes
    if FLAGS.eval_batch_size != FLAGS.batch_size:
        tf.logging.warning(
            "The Keras implementation of NCF currently does not support batch_size "
            "!= eval_batch_size ({} vs. {}). Overriding eval_batch_size to match "
            "batch_size".format(FLAGS.eval_batch_size, FLAGS.batch_size))
        FLAGS.eval_batch_size = FLAGS.batch_size

    params = ncf_common.parse_flags(FLAGS)
    batch_size = params["batch_size"]

    # ncf_common rounds eval_batch_size (this is needed due to a reshape during
    # eval). This carries over that rounding to batch_size as well.
    params['batch_size'] = params['eval_batch_size']

    num_users, num_items, num_train_steps, num_eval_steps, producer = (
        ncf_common.get_inputs(params))

    params["num_users"], params["num_items"] = num_users, num_items
    producer.start()
    model_helpers.apply_clean(flags.FLAGS)

    batches_per_step = params["batches_per_step"]
    train_input_dataset, eval_input_dataset = _get_train_and_eval_data(
        producer, params)
    # It is required that for distributed training, the dataset must call
    # batch(). The parameter of batch() here is the number of replicas involed,
    # such that each replica evenly gets a slice of data.
    train_input_dataset = train_input_dataset.batch(batches_per_step)
    eval_input_dataset = eval_input_dataset.batch(batches_per_step)

    strategy = ncf_common.get_distribution_strategy(params)
    with distribution_utils.get_strategy_scope(strategy):
        keras_model = _get_keras_model(params)
        optimizer = ncf_common.get_optimizer(params)
        time_callback = keras_utils.TimeHistory(batch_size, FLAGS.log_steps)

        keras_model.compile(loss=_keras_loss,
                            metrics=[_get_metric_fn(params)],
                            optimizer=optimizer)

        history = keras_model.fit(
            train_input_dataset,
            epochs=FLAGS.train_epochs,
            callbacks=[IncrementEpochCallback(producer), time_callback],
            verbose=2)

        tf.logging.info("Training done. Start evaluating")

        eval_results = keras_model.evaluate(eval_input_dataset,
                                            steps=num_eval_steps,
                                            verbose=2)

    tf.logging.info("Keras evaluation is done.")

    stats = build_stats(history, eval_results, time_callback)
    return stats
Beispiel #2
0
def run_ncf(_):
    """Run NCF training and eval with Keras."""
    # TODO(seemuch): Support different train and eval batch sizes
    if FLAGS.eval_batch_size != FLAGS.batch_size:
        tf.logging.warning(
            "The Keras implementation of NCF currently does not support batch_size "
            "!= eval_batch_size ({} vs. {}). Overriding eval_batch_size to match "
            "batch_size".format(FLAGS.eval_batch_size, FLAGS.batch_size))
        FLAGS.eval_batch_size = FLAGS.batch_size

    params = ncf_common.parse_flags(FLAGS)

    # ncf_common rounds eval_batch_size (this is needed due to a reshape during
    # eval). This carries over that rounding to batch_size as well.
    params['batch_size'] = params['eval_batch_size']

    num_users, num_items, num_train_steps, num_eval_steps, producer = (
        ncf_common.get_inputs(params))

    params["num_users"], params["num_items"] = num_users, num_items
    producer.start()
    model_helpers.apply_clean(flags.FLAGS)

    keras_model = _get_keras_model(params)
    optimizer = ncf_common.get_optimizer(params)

    keras_model.compile(loss=_keras_loss,
                        metrics=[_get_metric_fn(params)],
                        optimizer=optimizer)

    train_input_dataset, eval_input_dataset = _get_train_and_eval_data(
        producer, params)

    keras_model.fit(train_input_dataset,
                    epochs=FLAGS.train_epochs,
                    callbacks=[IncrementEpochCallback(producer)],
                    verbose=2)

    tf.logging.info("Training done. Start evaluating")

    eval_results = keras_model.evaluate(eval_input_dataset,
                                        steps=num_eval_steps,
                                        verbose=2)

    tf.logging.info("Keras evaluation is done.")

    return eval_results
def run_ncf(_):
  """Run NCF training and eval with Keras."""
  params = ncf_common.parse_flags(FLAGS)

  num_users, num_items, num_train_steps, num_eval_steps, producer = (
      ncf_common.get_inputs(params))

  params["num_users"], params["num_items"] = num_users, num_items
  producer.start()
  model_helpers.apply_clean(flags.FLAGS)

  keras_model = _get_keras_model(params)
  optimizer = ncf_common.get_optimizer(params)

  keras_model.compile(
      loss=_keras_loss,
      metrics=[_get_metric_fn(params)],
      optimizer=optimizer)

  train_input_dataset, eval_input_dataset = _get_train_and_eval_data(
      producer, params)

  keras_model.fit(
      train_input_dataset,
      epochs=FLAGS.train_epochs,
      callbacks=[IncrementEpochCallback(producer)],
      verbose=2)

  tf.logging.info("Training done. Start evaluating")

  eval_results = keras_model.evaluate(
      eval_input_dataset,
      steps=num_eval_steps,
      verbose=2)

  tf.logging.info("Keras evaluation is done.")

  return eval_results
Beispiel #4
0
def neumf_model_fn(features, labels, mode, params):
    """Model Function for NeuMF estimator."""
    if params.get("use_seed"):
        tf.set_random_seed(stat_utils.random_int32())

    users = features[movielens.USER_COLUMN]
    items = features[movielens.ITEM_COLUMN]

    user_input = tf.keras.layers.Input(tensor=users)
    item_input = tf.keras.layers.Input(tensor=items)
    logits = construct_model(user_input, item_input, params).output

    # Softmax with the first column of zeros is equivalent to sigmoid.
    softmax_logits = ncf_common.convert_to_softmax_logits(logits)

    if mode == tf.estimator.ModeKeys.EVAL:
        duplicate_mask = tf.cast(features[rconst.DUPLICATE_MASK], tf.float32)
        return _get_estimator_spec_with_metrics(
            logits,
            softmax_logits,
            duplicate_mask,
            params["num_neg"],
            params["match_mlperf"],
            use_tpu_spec=params["use_xla_for_gpu"])

    elif mode == tf.estimator.ModeKeys.TRAIN:
        labels = tf.cast(labels, tf.int32)
        valid_pt_mask = features[rconst.VALID_POINT_MASK]

        mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_NAME, value="adam")
        mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_LR,
                                value=params["learning_rate"])
        mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_BETA1,
                                value=params["beta1"])
        mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_BETA2,
                                value=params["beta2"])
        mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_EPSILON,
                                value=params["epsilon"])

        optimizer = ncf_common.get_optimizer(params)

        mlperf_helper.ncf_print(key=mlperf_helper.TAGS.MODEL_HP_LOSS_FN,
                                value=mlperf_helper.TAGS.BCE)

        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=softmax_logits,
                                                      weights=tf.cast(
                                                          valid_pt_mask,
                                                          tf.float32))

        # This tensor is used by logging hooks.
        tf.identity(loss, name="cross_entropy")

        global_step = tf.train.get_global_step()
        tvars = tf.trainable_variables()
        gradients = optimizer.compute_gradients(
            loss, tvars, colocate_gradients_with_ops=True)
        gradients = _sparse_to_dense_grads(gradients)
        minimize_op = optimizer.apply_gradients(gradients,
                                                global_step=global_step,
                                                name="train")
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        train_op = tf.group(minimize_op, update_ops)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    else:
        raise NotImplementedError
Beispiel #5
0
def neumf_model_fn(features, labels, mode, params):
  """Model Function for NeuMF estimator."""
  if params.get("use_seed"):
    tf.set_random_seed(stat_utils.random_int32())

  users = features[movielens.USER_COLUMN]
  items = features[movielens.ITEM_COLUMN]

  user_input = tf.keras.layers.Input(tensor=users)
  item_input = tf.keras.layers.Input(tensor=items)
  logits = construct_model(user_input, item_input, params).output

  # Softmax with the first column of zeros is equivalent to sigmoid.
  softmax_logits = ncf_common.convert_to_softmax_logits(logits)

  if mode == tf.estimator.ModeKeys.EVAL:
    duplicate_mask = tf.cast(features[rconst.DUPLICATE_MASK], tf.float32)
    return _get_estimator_spec_with_metrics(
        logits,
        softmax_logits,
        duplicate_mask,
        params["num_neg"],
        params["match_mlperf"],
        use_tpu_spec=params["use_xla_for_gpu"])

  elif mode == tf.estimator.ModeKeys.TRAIN:
    labels = tf.cast(labels, tf.int32)
    valid_pt_mask = features[rconst.VALID_POINT_MASK]

    mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_NAME, value="adam")
    mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_LR,
                            value=params["learning_rate"])
    mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_BETA1,
                            value=params["beta1"])
    mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_BETA2,
                            value=params["beta2"])
    mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_EPSILON,
                            value=params["epsilon"])

    optimizer = ncf_common.get_optimizer(params)

    mlperf_helper.ncf_print(key=mlperf_helper.TAGS.MODEL_HP_LOSS_FN,
                            value=mlperf_helper.TAGS.BCE)

    loss = tf.losses.sparse_softmax_cross_entropy(
        labels=labels,
        logits=softmax_logits,
        weights=tf.cast(valid_pt_mask, tf.float32)
    )

    # This tensor is used by logging hooks.
    tf.identity(loss, name="cross_entropy")

    global_step = tf.train.get_global_step()
    tvars = tf.trainable_variables()
    gradients = optimizer.compute_gradients(
        loss, tvars, colocate_gradients_with_ops=True)
    gradients = _sparse_to_dense_grads(gradients)
    minimize_op = optimizer.apply_gradients(
        gradients, global_step=global_step, name="train")
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    train_op = tf.group(minimize_op, update_ops)

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  else:
    raise NotImplementedError