Beispiel #1
0
def model_analysis(pipeline, output_dir, evaluation_data=None, metadata=None):
    if not metadata:
        metadata = (pipeline
                    | 'LoadMetadataForAnalysis' >> io.LoadMetadata(
                        os.path.join(output_dir, METADATA_FILE_NAME)))
    if not evaluation_data:
        coder = io.CsvCoder(
            column_names=[
                'key', 'target', 'predicted', 'score', 'target_label',
                'predicted_label', 'all_scores'
            ],
            numeric_column_names=['target', 'predicted', 'score'])
        evaluation_data = (
            pipeline
            | 'ReadEvaluation' >> beam.io.ReadFromText(
                os.path.join(output_dir, 'model_evaluations*'), coder=coder))
    confusion_matrix, precision_recall, logloss = (
        evaluation_data
        | 'AnalyzeModel' >> ml.AnalyzeModel(metadata))

    confusion_matrix | io.SaveConfusionMatrixCsv(
        os.path.join(output_dir, 'analyzer_cm.csv'))
    precision_recall | io.SavePrecisionRecallCsv(
        os.path.join(output_dir, 'analyzer_pr.csv'))
    (logloss
     | 'WriteLogLoss' >> beam.io.WriteToText(
         os.path.join(output_dir, 'analyzer_logloss'), file_name_suffix='.csv')
     )

    return confusion_matrix, precision_recall, logloss
Beispiel #2
0
def model_analysis(pipeline, evaluation_data=None, metadata=None):
  if not metadata:
    metadata = pipeline | io.LoadMetadata(
        os.path.join(args.output_dir, "metadata.yaml"))
  if not evaluation_data:
    coder = io.CsvCoder(['key', 'target', 'predicted', 'score'],
                        ['target', 'predicted', 'score'])
    evaluation_data = read_text_file(pipeline, 'ReadEvaluation',
                                     'model_evaluations', coder=coder)
  confusion_matrix, precision_recall, logloss = (
        evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata))

  confusion_matrix | io.SaveConfusionMatrixCsv(
      os.path.join(args.output_dir, 'analyzer_cm.csv'))
  precision_recall | io.SavePrecisionRecallCsv(
      os.path.join(args.output_dir, 'analyzer_pr.csv'))
  write_text_file(logloss, 'Write Log Loss', 'analyzer_logloss.csv')
  return confusion_matrix, precision_recall, logloss