예제 #1
0
def train(pipeline,
          output_dir,
          train_args_dict,
          train_features=None,
          eval_features=None,
          metadata=None):
    if not train_features:
        train_features = (pipeline
                          | 'ReadTrain' >> io.LoadFeatures(
                              os.path.join(output_dir, 'features_train*')))
    if not eval_features:
        eval_features = (pipeline
                         | 'ReadEval' >> io.LoadFeatures(
                             os.path.join(output_dir, 'features_eval*')))
    if not metadata:
        metadata = (pipeline
                    | 'ReadMetadata' >> io.LoadMetadata(
                        os.path.join(output_dir, METADATA_FILE_NAME)))

    trained_model, results = ((train_features, eval_features)
                              | 'Train' >> ml.Train(**train_args_dict))

    trained_model | 'SaveModel' >> io.SaveModel(
        os.path.join(output_dir, 'saved_model'))
    results | io.SaveTrainingJobResult(
        os.path.join(output_dir, 'train_results'))

    return trained_model, results
예제 #2
0
def evaluate(pipeline, output_dir, trained_model=None, eval_features=None):
    if not eval_features:
        eval_features = (pipeline
                         | 'ReadEval' >> io.LoadFeatures(
                             os.path.join(output_dir, 'features_eval*')))
    if not trained_model:
        trained_model = (pipeline
                         | 'LoadModel' >> io.LoadModel(
                             os.path.join(output_dir, 'saved_model')))

    # Run our evaluation data through a Batch Evaluation, then pull out just
    # the expected and predicted target values.
    vocab_loader = LazyVocabLoader(os.path.join(output_dir,
                                                METADATA_FILE_NAME))

    evaluations = (
        eval_features
        | 'Evaluate' >> ml.Evaluate(trained_model)
        | 'CreateEvaluations' >> beam.Map(make_evaluation_dict, vocab_loader))
    coder = io.CsvCoder(column_names=[
        'key', 'target', 'predicted', 'score', 'target_label',
        'predicted_label', 'all_scores'
    ],
                        numeric_column_names=['target', 'predicted', 'score'])
    (evaluations
     | 'WriteEvaluation' >> beam.io.textio.WriteToText(os.path.join(
         output_dir, 'model_evaluations'),
                                                       file_name_suffix='.csv',
                                                       coder=coder))
    return evaluations
예제 #3
0
def train(pipeline, train_features=None, eval_features=None, metadata=None):
  if not train_features:
    train_features = (
        pipeline
        | 'ReadTrain'
        >> io.LoadFeatures(os.path.join(args.output_dir, 'features_train*')))
  if not eval_features:
    eval_features = (
        pipeline
        | 'ReadEval'
        >> io.LoadFeatures(os.path.join(args.output_dir, 'features_eval*')))

  trained_model, results = ((train_features, eval_features)
                            | ml.Train(**get_train_parameters(metadata)))

  trained_model | 'SaveModel' >> io.SaveModel(os.path.join(args.output_dir,
                                                           'saved_model'))
  results | io.SaveTrainingJobResult(os.path.join(args.output_dir,
                                                  'train_results'))

  return trained_model, results
예제 #4
0
def evaluate(pipeline, trained_model=None, eval_features=None):
  if not eval_features:
    eval_features = (
        pipeline
        | 'ReadEval'
        >> io.LoadFeatures(os.path.join(args.output_dir, 'features_eval*')))
  if not trained_model:
    trained_model = (pipeline
                     | 'LoadModel'
                     >> io.LoadModel(os.path.join(args.output_dir,
                                                  'saved_model')))

  # Run our evaluation data through a Batch Evaluation, then pull out just
  # the expected and predicted target values.
  evaluations = (eval_features
                 | 'Evaluate' >> ml.Evaluate(trained_model)
                 | beam.Map('CreateEvaluations', make_evaluation_dict))

  coder = io.CsvCoder(['key', 'target', 'predicted', 'score'],
                      ['target', 'predicted', 'score'])
  write_text_file(evaluations, 'WriteEvaluation', 'model_evaluations', coder)
  return evaluations