Ejemplo n.º 1
0
    def _test_build_default_transforming_serving_input_fn(
            self, shape, feed_input_values):
        basedir = tempfile.mkdtemp()

        raw_metadata = dataset_metadata.DatasetMetadata(
            schema=_make_raw_schema(shape))

        transform_savedmodel_dir = os.path.join(basedir,
                                                'transform-savedmodel')
        _write_transform_savedmodel(transform_savedmodel_dir)

        serving_input_fn = (
            input_fn_maker.build_default_transforming_serving_input_fn(
                raw_metadata=raw_metadata,
                raw_label_keys=['raw_label'],
                raw_feature_keys=['raw_a', 'raw_b'],
                transform_savedmodel_dir=transform_savedmodel_dir))

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                outputs, _, inputs = serving_input_fn()
                feed_inputs = {
                    inputs['raw_a']: feed_input_values[0],
                    inputs['raw_b']: feed_input_values[1]
                }
                transformed_a, transformed_b = session.run(
                    [outputs['transformed_a'], outputs['transformed_b']],
                    feed_dict=feed_inputs)

        self.assertEqual(21, transformed_a[0][0])
        self.assertEqual(9, transformed_b[0][0])
        self.assertEqual(29, transformed_a[1][0])
        self.assertEqual(-5, transformed_b[1][0])
Ejemplo n.º 2
0
  def _test_build_default_transforming_serving_input_fn(
      self, shape, feed_input_values):
    basedir = tempfile.mkdtemp()

    raw_metadata = dataset_metadata.DatasetMetadata(
        schema=_make_raw_schema(shape, should_add_unused_feature=True))

    transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel')
    _write_transform_savedmodel(
        transform_savedmodel_dir, should_add_unused_feature=True)

    serving_input_fn = (
        input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_metadata,
            raw_label_keys=['raw_label'],
            raw_feature_keys=['raw_a', 'raw_b'],
            transform_savedmodel_dir=transform_savedmodel_dir,
            convert_scalars_to_vectors=True))

    with tf.Graph().as_default():
      with tf.Session().as_default() as session:
        outputs, labels, inputs = serving_input_fn()

        self.assertItemsEqual(
            set(outputs.keys()),
            {'transformed_a', 'transformed_b', 'transformed_label'})
        self.assertEqual(labels, None)
        self.assertEqual(set(inputs.keys()), {'raw_a', 'raw_b'})

        feed_inputs = {inputs['raw_a']: feed_input_values[0],
                       inputs['raw_b']: feed_input_values[1]}
        transformed_a, transformed_b = session.run(
            [outputs['transformed_a'], outputs['transformed_b']],
            feed_dict=feed_inputs)

        with self.assertRaises(Exception):
          session.run(outputs['transformed_label'])

    batch_shape = (len(feed_input_values[0]), 1)
    sparse_batch_shape = batch_shape

    if not shape:
      # transformed_b is sparse so _convert_scalars_to_vectors did not fix it
      sparse_batch_shape = sparse_batch_shape[:1]
      transformed_b_dict = dict(zip([tuple(x + [0])
                                     for x in transformed_b.indices.tolist()],
                                    transformed_b.values.tolist()))
    else:
      transformed_b_dict = dict(zip([tuple(x)
                                     for x in transformed_b.indices.tolist()],
                                    transformed_b.values.tolist()))

    self.assertEqual(batch_shape, tuple(transformed_a.shape))
    self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape))

    self.assertEqual(21, transformed_a[0][0])
    self.assertEqual(9, transformed_b_dict[(0, 0)])
    self.assertEqual(29, transformed_a[1][0])
    self.assertEqual(-5, transformed_b_dict[(1, 0)])
Ejemplo n.º 3
0
    def train_and_evaluate(output_dir):
        review_column = feature_column.sparse_column_with_integerized_feature(
            const.REVIEW_COLUMN, bucket_size=vocab_size + 1, combiner='sum')
        weighted_reviews = feature_column.weighted_sparse_column(
            review_column, const.REVIEW_WEIGHT)

        estimator = learn.LinearClassifier(
            feature_columns=[weighted_reviews],
            n_classes=2,
            model_dir=output_dir,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=30))

        transformed_metadata = metadata_io.read_metadata(
            transformed_metadata_dir)
        raw_metadata = metadata_io.read_metadata(raw_metadata_dir)

        train_input_fn = input_fn_maker.build_training_input_fn(
            transformed_metadata,
            transformed_train_file_pattern,
            training_batch_size=train_batch_size,
            label_keys=[const.LABEL_COLUMN])

        eval_input_fn = input_fn_maker.build_training_input_fn(
            transformed_metadata,
            transformed_test_file_pattern,
            training_batch_size=1,
            label_keys=[const.LABEL_COLUMN])

        serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_metadata,
            transform_savedmodel_dir=output_dir + '/transform_fn',
            raw_label_keys=[],
            raw_feature_keys=[const.REVIEW_COLUMN])

        export_strategy = saved_model_export_utils.make_export_strategy(
            serving_input_fn,
            exports_to_keep=5,
            default_output_alternative_key=None)

        return tf.contrib.learn.Experiment(estimator=estimator,
                                           train_steps=train_num_epochs *
                                           num_train_instances /
                                           train_batch_size,
                                           eval_steps=num_test_instances,
                                           train_input_fn=train_input_fn,
                                           eval_input_fn=eval_input_fn,
                                           export_strategies=export_strategy,
                                           min_eval_frequency=500)
Ejemplo n.º 4
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern, transformed_metadata_dir,
                       serving_graph_dir):
    """Train the model on training data and evaluate on test data.

  Args:
    transformed_train_filepattern: File pattern for transformed training data
        shards
    transformed_test_filepattern: File pattern for transformed test data shards
    transformed_metadata_dir: Directory containing transformed data metadata
    serving_graph_dir: Directory to save the serving graph

  Returns:
    The results from the estimator's 'evaluate' method
  """

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS
    ]

    # Wrap categorical columns.  Note the combiner is irrelevant since the input
    # only has one value set per feature per instance.
    one_hot_columns = [
        feature_column.sparse_column_with_integerized_feature(
            key, bucket_size=bucket_size, combiner='sum')
        for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_filepattern,
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=[LABEL_COLUMN])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES /
                  TRAIN_BATCH_SIZE)

    # Write the serving graph to disk for use in tf.serving
    in_columns = [
        'age', 'workclass', 'education', 'education-num', 'marital-status',
        'occupation', 'relationship', 'race', 'sex', 'capital-gain',
        'capital-loss', 'hours-per-week', 'native-country'
    ]

    if not serving_graph_dir is None:
        serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_data_metadata,
            transform_savedmodel_dir=serving_graph_dir + '/transform_fn',
            raw_label_keys=[],
            raw_feature_keys=in_columns)
        estimator.export_savedmodel(serving_graph_dir, serving_input_fn)

    # Evaluate model on test dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_test_filepattern,
        training_batch_size=1,
        label_keys=[LABEL_COLUMN])

    return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_TEST_INSTANCES)
Ejemplo n.º 5
0
    def _test_build_default_transforming_serving_input_fn_with_label(
            self, shape, feed_input_values):
        # TODO(b/123241798): use TEST_TMPDIR
        basedir = tempfile.mkdtemp()

        raw_metadata = dataset_metadata.DatasetMetadata(
            schema=_make_raw_schema(shape))

        transform_savedmodel_dir = os.path.join(basedir,
                                                'transform-savedmodel')
        _write_transform_savedmodel(transform_savedmodel_dir)

        serving_input_fn = (
            input_fn_maker.build_default_transforming_serving_input_fn(
                raw_metadata=raw_metadata,
                raw_label_keys=[],  # Test labels are in output
                raw_feature_keys=None,
                transform_savedmodel_dir=transform_savedmodel_dir,
                convert_scalars_to_vectors=True))

        with tf.Graph().as_default():
            with tf.compat.v1.Session().as_default() as session:
                outputs, labels, inputs = serving_input_fn()

                self.assertCountEqual(
                    set(outputs.keys()),
                    {'transformed_a', 'transformed_b', 'transformed_label'})
                self.assertEqual(labels, None)
                self.assertEqual(set(inputs.keys()),
                                 {'raw_a', 'raw_b', 'raw_label'})

                feed_inputs = {
                    inputs['raw_a']: feed_input_values[0],
                    inputs['raw_b']: feed_input_values[1],
                    inputs['raw_label']: feed_input_values[2]
                }
                transformed_a, transformed_b, transformed_label = session.run(
                    [
                        outputs['transformed_a'], outputs['transformed_b'],
                        outputs['transformed_label']
                    ],
                    feed_dict=feed_inputs)

        batch_shape = (len(feed_input_values[0]), 1)
        sparse_batch_shape = batch_shape

        if not shape:
            # transformed_b is sparse so _convert_scalars_to_vectors did not fix it
            sparse_batch_shape = sparse_batch_shape[:1]
            transformed_b_dict = dict(
                zip([tuple(x + [0]) for x in transformed_b.indices.tolist()],
                    transformed_b.values.tolist()))
        else:
            transformed_b_dict = dict(
                zip([tuple(x) for x in transformed_b.indices.tolist()],
                    transformed_b.values.tolist()))

        self.assertEqual(batch_shape, tuple(transformed_a.shape))
        self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape))
        self.assertEqual(batch_shape, tuple(transformed_label.shape))

        self.assertEqual(21, transformed_a[0][0])
        self.assertEqual(9, transformed_b_dict[(0, 0)])
        self.assertEqual(1000, transformed_label[0][0])
        self.assertEqual(29, transformed_a[1][0])
        self.assertEqual(-5, transformed_b_dict[(1, 0)])
        self.assertEqual(2000, transformed_label[1][0])
Ejemplo n.º 6
0
    def export_fn(estimator,
                  export_dir_base,
                  checkpoint_path=None,
                  eval_result=None):
        with ops.Graph().as_default() as g:
            contrib_variables.create_global_step(g)

            input_ops = input_fn_maker.build_default_transforming_serving_input_fn(
                raw_metadata=raw_metadata,
                transform_savedmodel_dir=os.path.join(args.analysis_output_dir,
                                                      TRANSFORM_FN_DIR),
                raw_label_keys=[target_name],
                raw_feature_keys=csv_header,
                convert_scalars_to_vectors=True)()

            model_fn_ops = estimator._call_model_fn(
                input_ops.features, None, model_fn_lib.ModeKeys.INFER)
            output_fetch_tensors = make_prediction_output_tensors(
                args=args,
                features=features,
                input_ops=input_ops,
                model_fn_ops=model_fn_ops,
                keep_target=keep_target)

            signature_def_map = {
                'serving_default':
                signature_def_utils.predict_signature_def(
                    input_ops.default_inputs, output_fetch_tensors)
            }

            if not checkpoint_path:
                # Locate the latest checkpoint
                checkpoint_path = saver.latest_checkpoint(estimator._model_dir)
            if not checkpoint_path:
                raise ValueError("Couldn't find trained model at %s." %
                                 estimator._model_dir)

            export_dir = saved_model_export_utils.get_timestamped_export_dir(
                export_dir_base)

            with tf_session.Session('') as session:
                variables.local_variables_initializer()
                data_flow_ops.tables_initializer()
                saver_for_restore = saver.Saver(variables.global_variables(),
                                                sharded=True)
                saver_for_restore.restore(session, checkpoint_path)

                init_op = control_flow_ops.group(
                    variables.local_variables_initializer(),
                    data_flow_ops.tables_initializer())

                # Perform the export
                builder = saved_model_builder.SavedModelBuilder(export_dir)
                builder.add_meta_graph_and_variables(
                    session, [tag_constants.SERVING],
                    signature_def_map=signature_def_map,
                    assets_collection=ops.get_collection(
                        ops.GraphKeys.ASSET_FILEPATHS),
                    legacy_init_op=init_op)
                builder.save(False)

            # Add the extra assets
            if assets_extra:
                assets_extra_path = os.path.join(
                    compat.as_bytes(export_dir),
                    compat.as_bytes('assets.extra'))
                for dest_relative, source in assets_extra.items():
                    dest_absolute = os.path.join(
                        compat.as_bytes(assets_extra_path),
                        compat.as_bytes(dest_relative))
                    dest_path = os.path.dirname(dest_absolute)
                    file_io.recursive_create_dir(dest_path)
                    file_io.copy(source, dest_absolute)

        # only keep the last 3 models
        saved_model_export_utils.garbage_collect_exports(export_dir_base,
                                                         exports_to_keep=3)

        # save the last model to the model folder.
        # export_dir_base = A/B/intermediate_models/
        if keep_target:
            final_dir = os.path.join(args.job_dir, 'evaluation_model')
        else:
            final_dir = os.path.join(args.job_dir, 'model')
        if file_io.is_directory(final_dir):
            file_io.delete_recursively(final_dir)
        file_io.recursive_create_dir(final_dir)
        recursive_copy(export_dir, final_dir)

        return export_dir