Ejemplo n.º 1
0
def train(pipeline,
          output_dir,
          train_args_dict,
          train_features=None,
          eval_features=None,
          metadata=None):
    if not train_features:
        train_features = (pipeline
                          | 'ReadTrain' >> io.LoadFeatures(
                              os.path.join(output_dir, 'features_train*')))
    if not eval_features:
        eval_features = (pipeline
                         | 'ReadEval' >> io.LoadFeatures(
                             os.path.join(output_dir, 'features_eval*')))
    if not metadata:
        metadata = (pipeline
                    | 'ReadMetadata' >> io.LoadMetadata(
                        os.path.join(output_dir, METADATA_FILE_NAME)))

    trained_model, results = ((train_features, eval_features)
                              | 'Train' >> ml.Train(**train_args_dict))

    trained_model | 'SaveModel' >> io.SaveModel(
        os.path.join(output_dir, 'saved_model'))
    results | io.SaveTrainingJobResult(
        os.path.join(output_dir, 'train_results'))

    return trained_model, results
Ejemplo n.º 2
0
def model_analysis(pipeline, output_dir, evaluation_data=None, metadata=None):
    if not metadata:
        metadata = (pipeline
                    | 'LoadMetadataForAnalysis' >> io.LoadMetadata(
                        os.path.join(output_dir, METADATA_FILE_NAME)))
    if not evaluation_data:
        coder = io.CsvCoder(
            column_names=[
                'key', 'target', 'predicted', 'score', 'target_label',
                'predicted_label', 'all_scores'
            ],
            numeric_column_names=['target', 'predicted', 'score'])
        evaluation_data = (
            pipeline
            | 'ReadEvaluation' >> beam.io.ReadFromText(
                os.path.join(output_dir, 'model_evaluations*'), coder=coder))
    confusion_matrix, precision_recall, logloss = (
        evaluation_data
        | 'AnalyzeModel' >> ml.AnalyzeModel(metadata))

    confusion_matrix | io.SaveConfusionMatrixCsv(
        os.path.join(output_dir, 'analyzer_cm.csv'))
    precision_recall | io.SavePrecisionRecallCsv(
        os.path.join(output_dir, 'analyzer_pr.csv'))
    (logloss
     | 'WriteLogLoss' >> beam.io.WriteToText(
         os.path.join(output_dir, 'analyzer_logloss'), file_name_suffix='.csv')
     )

    return confusion_matrix, precision_recall, logloss
Ejemplo n.º 3
0
def model_analysis(pipeline, evaluation_data=None, metadata=None):
  if not metadata:
    metadata = pipeline | io.LoadMetadata(
        os.path.join(args.output_dir, "metadata.yaml"))
  if not evaluation_data:
    coder = io.CsvCoder(['key', 'target', 'predicted', 'score'],
                        ['target', 'predicted', 'score'])
    evaluation_data = read_text_file(pipeline, 'ReadEvaluation',
                                     'model_evaluations', coder=coder)
  confusion_matrix, precision_recall, logloss = (
        evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata))

  confusion_matrix | io.SaveConfusionMatrixCsv(
      os.path.join(args.output_dir, 'analyzer_cm.csv'))
  precision_recall | io.SavePrecisionRecallCsv(
      os.path.join(args.output_dir, 'analyzer_pr.csv'))
  write_text_file(logloss, 'Write Log Loss', 'analyzer_logloss.csv')
  return confusion_matrix, precision_recall, logloss