Esempio n. 1
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern, transformed_metadata_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
  """Train the model on training data and evaluate on test data.

  Args:
    transformed_train_filepattern: File pattern for transformed training data
        shards
    transformed_test_filepattern: File pattern for transformed test data shards
    transformed_metadata_dir: Directory containing transformed data metadata
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """

  # Wrap scalars as real valued columns.
  real_valued_columns = [feature_column.real_valued_column(key)
                         for key in NUMERIC_COLUMNS]

  # Wrap categorical columns.  Note the combiner is irrelevant since the input
  # only has one value set per feature per instance.
  one_hot_columns = [
      feature_column.sparse_column_with_integerized_feature(
          key, bucket_size=bucket_size, combiner='sum')
      for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)]

  estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

  transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
  train_input_fn = input_fn_maker.build_training_input_fn(
      transformed_metadata,
      transformed_train_filepattern,
      training_batch_size=TRAIN_BATCH_SIZE,
      label_keys=[LABEL_COLUMN])

  # Estimate the model using the default optimizer.
  estimator.fit(
      input_fn=train_input_fn,
      max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE)

  # Evaluate model on test dataset.
  eval_input_fn = input_fn_maker.build_training_input_fn(
      transformed_metadata,
      transformed_test_filepattern,
      training_batch_size=1,
      label_keys=[LABEL_COLUMN])

  return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Esempio n. 2
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern,
                       transformed_metadata_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on evaluation data.

  Args:
    transformed_train_filepattern: Base filename for transformed training data
        shards
    transformed_test_filepattern: Base filename for transformed evaluation data
        shards
    transformed_metadata_dir: Directory containing transformed data metadata
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """
    # Unrecognized tokens are represented by -1, but
    # sparse_column_with_integerized_feature uses the mod operator to map integers
    # to the range [0, bucket_size).  By choosing bucket_size=VOCAB_SIZE + 1, we
    # represent unrecognized tokens as VOCAB_SIZE.
    review_column = feature_column.sparse_column_with_integerized_feature(
        REVIEW_COLUMN, bucket_size=VOCAB_SIZE + 1, combiner='sum')
    weighted_reviews = feature_column.weighted_sparse_column(
        review_column, REVIEW_WEIGHT)

    estimator = learn.LinearClassifier([weighted_reviews])

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_filepattern,
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=[LABEL_COLUMN])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * num_train_instances /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on eval dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_test_filepattern,
        training_batch_size=1,
        label_keys=[LABEL_COLUMN])

    return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Esempio n. 3
0
def train_and_evaluate(transformed_train_data_base, transformed_eval_data_base,
                       transformed_metadata_dir):
    """Train the model on training data and evaluate on evaluation data.

  Args:
    transformed_train_data_base: Base filename for transformed training data
        shards
    transformed_eval_data_base: Base filename for cleaned evaluation data
        shards
    transformed_metadata_dir: Directory containing transformed data metadata.

  Returns:
    The results from the estimator's 'evaluate' method.
  """

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS
    ]

    # Wrap categorical columns.
    one_hot_columns = [
        feature_column.sparse_column_with_integerized_feature(
            key, bucket_size=bucket_size)
        for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_data_base + '*',
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=['label'])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on eval dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_eval_data_base + '*',
        training_batch_size=1,
        label_keys=['label'])

    return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_EVAL_INSTANCES)
Esempio n. 4
0
    def train_and_evaluate(output_dir):
        review_column = feature_column.sparse_column_with_integerized_feature(
            const.REVIEW_COLUMN, bucket_size=vocab_size + 1, combiner='sum')
        weighted_reviews = feature_column.weighted_sparse_column(
            review_column, const.REVIEW_WEIGHT)

        estimator = learn.LinearClassifier(
            feature_columns=[weighted_reviews],
            n_classes=2,
            model_dir=output_dir,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=30))

        transformed_metadata = metadata_io.read_metadata(
            transformed_metadata_dir)
        raw_metadata = metadata_io.read_metadata(raw_metadata_dir)

        train_input_fn = input_fn_maker.build_training_input_fn(
            transformed_metadata,
            transformed_train_file_pattern,
            training_batch_size=train_batch_size,
            label_keys=[const.LABEL_COLUMN])

        eval_input_fn = input_fn_maker.build_training_input_fn(
            transformed_metadata,
            transformed_test_file_pattern,
            training_batch_size=1,
            label_keys=[const.LABEL_COLUMN])

        serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_metadata,
            transform_savedmodel_dir=output_dir + '/transform_fn',
            raw_label_keys=[],
            raw_feature_keys=[const.REVIEW_COLUMN])

        export_strategy = saved_model_export_utils.make_export_strategy(
            serving_input_fn,
            exports_to_keep=5,
            default_output_alternative_key=None)

        return tf.contrib.learn.Experiment(estimator=estimator,
                                           train_steps=train_num_epochs *
                                           num_train_instances /
                                           train_batch_size,
                                           eval_steps=num_test_instances,
                                           train_input_fn=train_input_fn,
                                           eval_input_fn=eval_input_fn,
                                           export_strategies=export_strategy,
                                           min_eval_frequency=500)
Esempio n. 5
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern, transformed_metadata_dir,
                       serving_graph_dir):
    """Train the model on training data and evaluate on test data.

  Args:
    transformed_train_filepattern: File pattern for transformed training data
        shards
    transformed_test_filepattern: File pattern for transformed test data shards
    transformed_metadata_dir: Directory containing transformed data metadata
    serving_graph_dir: Directory to save the serving graph

  Returns:
    The results from the estimator's 'evaluate' method
  """

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS
    ]

    # Wrap categorical columns.  Note the combiner is irrelevant since the input
    # only has one value set per feature per instance.
    one_hot_columns = [
        feature_column.sparse_column_with_integerized_feature(
            key, bucket_size=bucket_size, combiner='sum')
        for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_filepattern,
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=[LABEL_COLUMN])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES /
                  TRAIN_BATCH_SIZE)

    # Write the serving graph to disk for use in tf.serving
    in_columns = [
        'age', 'workclass', 'education', 'education-num', 'marital-status',
        'occupation', 'relationship', 'race', 'sex', 'capital-gain',
        'capital-loss', 'hours-per-week', 'native-country'
    ]

    if not serving_graph_dir is None:
        serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_data_metadata,
            transform_savedmodel_dir=serving_graph_dir + '/transform_fn',
            raw_label_keys=[],
            raw_feature_keys=in_columns)
        estimator.export_savedmodel(serving_graph_dir, serving_input_fn)

    # Evaluate model on test dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_test_filepattern,
        training_batch_size=1,
        label_keys=[LABEL_COLUMN])

    return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_TEST_INSTANCES)