def _test_build_parsing_transforming_serving_input_fn(self, shape):
    basedir = tempfile.mkdtemp()

    raw_metadata = dataset_metadata.DatasetMetadata(
        schema=_make_raw_schema(shape, should_add_unused_feature=True))

    transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel')
    _write_transform_savedmodel(
        transform_savedmodel_dir, should_add_unused_feature=True)

    serving_input_fn = (
        input_fn_maker.build_parsing_transforming_serving_input_fn(
            raw_metadata=raw_metadata,
            transform_savedmodel_dir=transform_savedmodel_dir,
            raw_label_keys=['raw_label'],  # Labels are excluded
            raw_feature_keys=['raw_a', 'raw_b'],
            convert_scalars_to_vectors=True))

    examples = [_create_serialized_example(d)
                for d in [
                    {'raw_a': 15, 'raw_b': 6},
                    {'raw_a': 12, 'raw_b': 17}]]

    with tf.Graph().as_default():
      with tf.Session().as_default() as session:
        outputs, labels, inputs = serving_input_fn()

        self.assertItemsEqual(
            set(outputs.keys()),
            {'transformed_a', 'transformed_b', 'transformed_label'})
        self.assertEqual(labels, None)
        self.assertEqual(set(inputs.keys()), {'examples'})

        feed_inputs = {inputs['examples']: examples}
        transformed_a, transformed_b = session.run(
            [outputs['transformed_a'], outputs['transformed_b']],
            feed_dict=feed_inputs)

    batch_shape = (len(examples), 1)
    sparse_batch_shape = batch_shape

    if not shape:
      # transformed_b is sparse so _convert_scalars_to_vectors did not fix it
      sparse_batch_shape = sparse_batch_shape[:1]
      transformed_b_dict = dict(zip([tuple(x + [0])
                                     for x in transformed_b.indices.tolist()],
                                    transformed_b.values.tolist()))
    else:
      transformed_b_dict = dict(zip([tuple(x)
                                     for x in transformed_b.indices.tolist()],
                                    transformed_b.values.tolist()))

    self.assertEqual(batch_shape, tuple(transformed_a.shape))
    self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape))

    self.assertEqual(21, transformed_a[0][0])
    self.assertEqual(9, transformed_b_dict[(0, 0)])
    self.assertEqual(29, transformed_a[1][0])
    self.assertEqual(-5, transformed_b_dict[(1, 0)])
Exemple #2
0
  def make_experiment(output_dir):
    """Function that creates an experiment http://goo.gl/HcKHlT.

    Args:
      output_dir: The directory where the training output should be written.
    Returns:
      A `tf.contrib.learn.Experiment`.
    """

    estimator = tf.contrib.learn.Estimator(
        model_fn=model_builder(hparams=args),
        model_dir=output_dir)

    train_input_fn = make_input_fn(
        mode=tf.contrib.learn.ModeKeys.TRAIN,
        eval_type=args.eval_type,
        data_file_pattern=args.train_data_paths,
        randomize_input=args.randomize_input,
        batch_size=args.batch_size,
        queue_capacity=4 * args.batch_size)

    eval_input_fn = make_input_fn(
        mode=tf.contrib.learn.ModeKeys.EVAL,
        eval_type=args.eval_type,
        data_file_pattern=args.eval_data_paths,
        batch_size=args.eval_batch_size,
        queue_capacity=4 * args.eval_batch_size)

    raw_metadata = metadata_io.read_metadata(args.raw_metadata_path)
    # Both ratings and candidate features are not needed for serving.
    raw_label_keys = [LABEL_RATING_SCORE]
    # For serving, we only need query features.
    raw_feature_keys = [QUERY_RATED_MOVIE_IDS,
                        QUERY_RATED_MOVIE_SCORES,
                        QUERY_RATED_GENRE_IDS,
                        QUERY_RATED_GENRE_FREQS,
                        QUERY_RATED_GENRE_AVG_SCORES]
    serving_input_fn = (
        input_fn_maker.build_parsing_transforming_serving_input_fn(
            raw_metadata,
            args.transform_savedmodel,
            raw_label_keys=raw_label_keys,
            raw_feature_keys=raw_feature_keys))

    export_strategy = tf.contrib.learn.utils.make_export_strategy(
        serving_input_fn,
        default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE)

    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_steps=(args.train_steps or
                     args.num_epochs * args.train_set_size // args.batch_size),
        eval_steps=args.eval_steps,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        eval_metrics=create_evaluation_metrics(args.eval_type),
        export_strategies=[export_strategy],
        # Do not remove this is needed until b/36498507 is fixed.
        min_eval_frequency=1000)
Exemple #3
0
    def make_experiment(output_dir):
        """Function that creates an experiment http://goo.gl/HcKHlT.

    Args:
      output_dir: The directory where the training output should be written.
    Returns:
      A `tf.contrib.learn.Experiment`.
    """

        estimator = tf.contrib.learn.Estimator(
            model_fn=model_builder(hparams=args), model_dir=output_dir)

        train_input_fn = make_input_fn(mode=tf.contrib.learn.ModeKeys.TRAIN,
                                       eval_type=args.eval_type,
                                       data_file_pattern=args.train_data_paths,
                                       randomize_input=args.randomize_input,
                                       batch_size=args.batch_size,
                                       queue_capacity=4 * args.batch_size)

        eval_input_fn = make_input_fn(mode=tf.contrib.learn.ModeKeys.EVAL,
                                      eval_type=args.eval_type,
                                      data_file_pattern=args.eval_data_paths,
                                      batch_size=args.eval_batch_size,
                                      queue_capacity=4 * args.eval_batch_size)

        raw_metadata = metadata_io.read_metadata(args.raw_metadata_path)
        # Both ratings and candidate features are not needed for serving.
        raw_label_keys = [LABEL_RATING_SCORE]
        # For serving, we only need query features.
        raw_feature_keys = [
            QUERY_RATED_MOVIE_IDS, QUERY_RATED_MOVIE_SCORES,
            QUERY_RATED_GENRE_IDS, QUERY_RATED_GENRE_FREQS,
            QUERY_RATED_GENRE_AVG_SCORES
        ]
        serving_input_fn = (
            input_fn_maker.build_parsing_transforming_serving_input_fn(
                raw_metadata,
                args.transform_savedmodel,
                raw_label_keys=raw_label_keys,
                raw_feature_keys=raw_feature_keys))

        export_strategy = tf.contrib.learn.utils.make_export_strategy(
            serving_input_fn,
            default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE)

        return tf.contrib.learn.Experiment(
            estimator=estimator,
            train_steps=(args.train_steps or args.num_epochs *
                         args.train_set_size // args.batch_size),
            eval_steps=args.eval_steps,
            train_input_fn=train_input_fn,
            eval_input_fn=eval_input_fn,
            eval_metrics=create_evaluation_metrics(args.eval_type),
            export_strategies=[export_strategy],
            # Do not remove this is needed until b/36498507 is fixed.
            min_eval_frequency=1000)
Exemple #4
0
    def _test_build_parsing_transforming_serving_input_fn_with_label(
            self, raw_schema):
        basedir = tempfile.mkdtemp()

        raw_metadata = dataset_metadata.DatasetMetadata(schema=raw_schema)

        transform_savedmodel_dir = os.path.join(basedir,
                                                'transform-savedmodel')
        _write_transform_savedmodel(transform_savedmodel_dir)

        serving_input_fn = (
            input_fn_maker.build_parsing_transforming_serving_input_fn(
                raw_metadata=raw_metadata,
                transform_savedmodel_dir=transform_savedmodel_dir,
                raw_label_keys=[],  # Test labels are in output
                raw_feature_keys=None))

        examples = [
            _create_serialized_example(d) for d in [{
                'raw_a': 15,
                'raw_b': 6,
                'raw_label': 1
            }, {
                'raw_a': 12,
                'raw_b': 17,
                'raw_label': 2
            }]
        ]

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                outputs, labels, inputs = serving_input_fn()
                feed_inputs = {inputs['examples']: examples}
                transformed_a, transformed_b, transformed_label = session.run(
                    [
                        outputs['transformed_a'], outputs['transformed_b'],
                        outputs['transformed_label']
                    ],
                    feed_dict=feed_inputs)

        self.assertEqual(21, transformed_a[0][0])
        self.assertEqual(9, transformed_b[0][0])
        self.assertEqual(1000, transformed_label[0][0])
        self.assertEqual(29, transformed_a[1][0])
        self.assertEqual(-5, transformed_b[1][0])
        self.assertEqual(2000, transformed_label[1][0])
        self.assertEqual(
            set(outputs.keys()),
            {'transformed_a', 'transformed_b', 'transformed_label'})
        self.assertEqual(labels, None)
        self.assertEqual(set(inputs.keys()), {'examples'})
Exemple #5
0
def get_serving_input_fn(input_dir):
    """Creates operations to ingest data for inference

    Args:
      input_dir: Directory containing tf.Transform metadata and transform_fn.
    Returns:
      A serving input function.
    """
    raw_metadata = metadata_io.read_metadata(
        posixpath.join(input_dir, constants.RAW_METADATA_DIR))
    transform_fn_path = posixpath.join(input_dir, constants.TRANSFORM_FN_DIR)
    return input_fn_maker.build_parsing_transforming_serving_input_fn(
        raw_metadata=raw_metadata,
        transform_savedmodel_dir=transform_fn_path,
        raw_label_keys=[constants.LABEL_COLUMN])
    def test_build_parsing_transforming_serving_input_fn(self):
        basedir = tempfile.mkdtemp()

        raw_metadata = dataset_metadata.DatasetMetadata(
            schema=_make_raw_schema())

        transform_savedmodel_dir = os.path.join(basedir,
                                                'transform-savedmodel')
        _write_transform_savedmodel(transform_savedmodel_dir)

        serving_input_fn = (
            input_fn_maker.build_parsing_transforming_serving_input_fn(
                raw_metadata=raw_metadata,
                transform_savedmodel_dir=transform_savedmodel_dir,
                raw_label_keys=['raw_label'],
                raw_feature_keys=['raw_a', 'raw_b']))

        examples = [
            _create_serialized_example(d) for d in [{
                'raw_a': 15,
                'raw_b': 5
            }, {
                'raw_a': 12,
                'raw_b': 17
            }]
        ]

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                outputs, _, inputs = serving_input_fn()
                feed_inputs = {inputs['examples']: examples}
                transformed_a, transformed_b = session.run(
                    [outputs['transformed_a'], outputs['transformed_b']],
                    feed_dict=feed_inputs)

        self.assertEqual(20, transformed_a[0][0])
        self.assertEqual(10, transformed_b[0][0])
        self.assertEqual(29, transformed_a[1][0])
        self.assertEqual(-5, transformed_b[1][0])
Exemple #7
0
    def get_experiment(output_dir):
        """Function that creates an experiment http://goo.gl/HcKHlT.

    Args:
      output_dir: The directory where the training output should be written.
    Returns:
      A `tf.contrib.learn.Experiment`.
    """

        columns = feature_columns(args.model_type, vocab_sizes, use_crosses)

        runconfig = tf.contrib.learn.RunConfig()
        cluster = runconfig.cluster_spec
        num_table_shards = max(1, runconfig.num_ps_replicas * 3)
        num_partitions = max(
            1, 1 + cluster.num_tasks('worker')
            if cluster and 'worker' in cluster.jobs else 0)

        model_dir = os.path.join(output_dir, MODEL_DIR)
        if args.model_type == LINEAR:
            estimator = tf.contrib.learn.LinearRegressor(
                model_dir=model_dir,
                feature_columns=columns,
                optimizer=tf.contrib.linear_optimizer.SDCAOptimizer(
                    example_id_column=KEY_FEATURE_COLUMN,
                    symmetric_l2_regularization=args.l2_regularization,
                    num_loss_partitions=num_partitions,  # workers
                    num_table_shards=num_table_shards))  # ps
        elif args.model_type == DEEP:
            estimator = tf.contrib.learn.DNNRegressor(
                hidden_units=args.hidden_units,
                feature_columns=columns,
                model_dir=model_dir)

        transformed_metadata = metadata_io.read_metadata(
            args.transformed_metadata_path)
        raw_metadata = metadata_io.read_metadata(args.raw_metadata_path)
        serving_input_fn = (
            input_fn_maker.build_parsing_transforming_serving_input_fn(
                raw_metadata,
                args.transform_savedmodel,
                raw_label_keys=[TARGET_FEATURE_COLUMN]))
        export_strategy = tf.contrib.learn.utils.make_export_strategy(
            serving_input_fn,
            exports_to_keep=5,
            default_output_alternative_key=None)

        train_input_fn = get_transformed_reader_input_fn(
            transformed_metadata, args.train_data_paths, args.batch_size,
            tf.contrib.learn.ModeKeys.TRAIN)

        eval_input_fn = get_transformed_reader_input_fn(
            transformed_metadata, args.eval_data_paths, args.batch_size,
            tf.contrib.learn.ModeKeys.EVAL)

        return tf.contrib.learn.Experiment(
            estimator=estimator,
            train_steps=(args.train_steps or args.num_epochs *
                         args.train_set_size // args.batch_size),
            eval_steps=args.eval_steps,
            train_input_fn=train_input_fn,
            eval_input_fn=eval_input_fn,
            export_strategies=export_strategy,
            min_eval_frequency=500)
Exemple #8
0
  def get_experiment(output_dir):
    """Function that creates an experiment http://goo.gl/HcKHlT.

    Args:
      output_dir: The directory where the training output should be written.
    Returns:
      A `tf.contrib.learn.Experiment`.
    """

    columns = feature_columns(args.model_type, vocab_sizes, use_crosses)

    runconfig = tf.contrib.learn.RunConfig()
    cluster = runconfig.cluster_spec
    num_table_shards = max(1, runconfig.num_ps_replicas * 3)
    num_partitions = max(1, 1 + cluster.num_tasks('worker') if cluster and
                         'worker' in cluster.jobs else 0)

    model_dir = os.path.join(output_dir, MODEL_DIR)
    if args.model_type == LINEAR:
      estimator = tf.contrib.learn.LinearRegressor(
          model_dir=model_dir,
          feature_columns=columns,
          optimizer=tf.contrib.linear_optimizer.SDCAOptimizer(
              example_id_column=KEY_FEATURE_COLUMN,
              symmetric_l2_regularization=args.l2_regularization,
              num_loss_partitions=num_partitions,  # workers
              num_table_shards=num_table_shards))  # ps
    elif args.model_type == DEEP:
      estimator = tf.contrib.learn.DNNRegressor(
          hidden_units=args.hidden_units,
          feature_columns=columns,
          model_dir=model_dir)

    transformed_metadata = metadata_io.read_metadata(
        args.transformed_metadata_path)
    raw_metadata = metadata_io.read_metadata(args.raw_metadata_path)
    serving_input_fn = (
        input_fn_maker.build_parsing_transforming_serving_input_fn(
            raw_metadata,
            args.transform_savedmodel,
            raw_label_keys=[TARGET_FEATURE_COLUMN]))
    export_strategy = tf.contrib.learn.utils.make_export_strategy(
        serving_input_fn, exports_to_keep=5,
        default_output_alternative_key=None)

    train_input_fn = get_transformed_reader_input_fn(
        transformed_metadata, args.train_data_paths, args.batch_size,
        tf.contrib.learn.ModeKeys.TRAIN)

    eval_input_fn = get_transformed_reader_input_fn(
        transformed_metadata, args.eval_data_paths, args.batch_size,
        tf.contrib.learn.ModeKeys.EVAL)

    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_steps=(args.train_steps or
                     args.num_epochs * args.train_set_size // args.batch_size),
        eval_steps=args.eval_steps,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        export_strategies=export_strategy,
        min_eval_frequency=500)
    def get_experiment(output_dir):
        """Function that creates an experiment http://goo.gl/HcKHlT.

    Args:
      output_dir: The directory where the training output should be written.
    Returns:
      A `tf.contrib.learn.Experiment`.
    """

        wide_columns, deep_columns = get_feature_columns(
            args.model_type, linear_use_crosses, deep_embedding_size_factor)

        runconfig = tf.contrib.learn.RunConfig()
        cluster = runconfig.cluster_spec
        num_table_shards = max(1, runconfig.num_ps_replicas * 3)
        num_partitions = max(
            1, 1 + cluster.num_tasks('worker')
            if cluster and 'worker' in cluster.jobs else 0)

        deep_hidden_units = list(
            [int(n) for n in args.deep_hidden_units.split(' ')])

        if args.model_type == WIDE:

            estimator = tf.contrib.learn.LinearClassifier(
                model_dir=output_dir,
                feature_columns=wide_columns,
                optimizer=tf.train.FtrlOptimizer(
                    learning_rate=args.linear_learning_rate,
                    l1_regularization_strength=args.linear_l1_regularization,
                    l2_regularization_strength=args.linear_l1_regularization))
        elif args.model_type == DEEP:
            estimator = tf.contrib.learn.DNNClassifier(
                hidden_units=deep_hidden_units,
                feature_columns=deep_columns,
                model_dir=output_dir,
                dropout=args.deep_dropout,
                optimizer=tf.train.ProximalAdagradOptimizer(
                    learning_rate=args.deep_learning_rate,
                    initial_accumulator_value=0.1,
                    l1_regularization_strength=args.deep_l1_regularization,
                    l2_regularization_strength=args.deep_l2_regularization,
                    use_locking=False))

        elif args.model_type == WIDE_N_DEEP:
            estimator = tf.contrib.learn.DNNLinearCombinedClassifier(
                model_dir=output_dir,
                linear_feature_columns=wide_columns,
                linear_optimizer=tf.train.FtrlOptimizer(
                    learning_rate=args.linear_learning_rate,
                    l1_regularization_strength=args.linear_l1_regularization,
                    l2_regularization_strength=args.linear_l1_regularization),
                dnn_feature_columns=deep_columns,
                dnn_hidden_units=deep_hidden_units,
                dnn_dropout=args.deep_dropout,
                dnn_optimizer=tf.train.ProximalAdagradOptimizer(
                    learning_rate=args.deep_learning_rate,
                    initial_accumulator_value=0.1,
                    l1_regularization_strength=args.deep_l1_regularization,
                    l2_regularization_strength=args.deep_l2_regularization,
                    use_locking=False))

        transformed_metadata = metadata_io.read_metadata(
            args.transformed_metadata_path)
        raw_metadata = metadata_io.read_metadata(args.raw_metadata_path)
        serving_input_fn = (
            input_fn_maker.build_parsing_transforming_serving_input_fn(
                raw_metadata,
                args.transform_savedmodel,
                raw_label_keys=[LABEL_COLUMN]))
        export_strategy = (tf.contrib.learn.utils.make_export_strategy(
            serving_input_fn,
            exports_to_keep=5,
            default_output_alternative_key=None))

        train_input_fn = get_transformed_reader_input_fn(
            transformed_metadata, args.train_data_paths, args.train_batch_size,
            tf.contrib.learn.ModeKeys.TRAIN)

        eval_input_fn = get_transformed_reader_input_fn(
            transformed_metadata, args.eval_data_paths, args.eval_batch_size,
            tf.contrib.learn.ModeKeys.EVAL)

        train_set_size = args.train_set_size

        eval_metrics = {
            'MAP':
            metric_spec.MetricSpec(metric_fn=map_custom_metric,
                                   prediction_key="logistic",
                                   weight_key=DISPLAY_ID_COLUMN)
        }
        if args.full_evaluation_after_training:
            eval_steps = int(
                math.ceil(args.eval_set_size / float(args.eval_batch_size)))
            min_eval_frequency = None

            #Adding a metric that compute the MAP over the predictions, considering leaked clicks
            eval_metrics['MAP_with_Leaked_Clicks'] = metric_spec.MetricSpec(
                metric_fn=map_with_leak_custom_metric,
                prediction_key="logistic",
                weight_key=DISPLAY_ID_AND_IS_LEAK_ENCODED_COLUMN)
        else:
            eval_steps = args.eval_steps
            min_eval_frequency = 2000

        return tf.contrib.learn.Experiment(
            estimator=estimator,
            train_steps=(args.train_steps or args.num_epochs *
                         train_set_size // args.train_batch_size),
            eval_steps=eval_steps,
            train_input_fn=train_input_fn,
            eval_input_fn=eval_input_fn,
            min_eval_frequency=min_eval_frequency,
            export_strategies=export_strategy,
            eval_metrics=eval_metrics)