def model_analysis(pipeline, output_dir, evaluation_data=None, metadata=None): if not metadata: metadata = (pipeline | 'LoadMetadataForAnalysis' >> io.LoadMetadata( os.path.join(output_dir, METADATA_FILE_NAME))) if not evaluation_data: coder = io.CsvCoder( column_names=[ 'key', 'target', 'predicted', 'score', 'target_label', 'predicted_label', 'all_scores' ], numeric_column_names=['target', 'predicted', 'score']) evaluation_data = ( pipeline | 'ReadEvaluation' >> beam.io.ReadFromText( os.path.join(output_dir, 'model_evaluations*'), coder=coder)) confusion_matrix, precision_recall, logloss = ( evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata)) confusion_matrix | io.SaveConfusionMatrixCsv( os.path.join(output_dir, 'analyzer_cm.csv')) precision_recall | io.SavePrecisionRecallCsv( os.path.join(output_dir, 'analyzer_pr.csv')) (logloss | 'WriteLogLoss' >> beam.io.WriteToText( os.path.join(output_dir, 'analyzer_logloss'), file_name_suffix='.csv') ) return confusion_matrix, precision_recall, logloss
def model_analysis(pipeline, evaluation_data=None, metadata=None): if not metadata: metadata = pipeline | io.LoadMetadata( os.path.join(args.output_dir, "metadata.yaml")) if not evaluation_data: coder = io.CsvCoder(['key', 'target', 'predicted', 'score'], ['target', 'predicted', 'score']) evaluation_data = read_text_file(pipeline, 'ReadEvaluation', 'model_evaluations', coder=coder) confusion_matrix, precision_recall, logloss = ( evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata)) confusion_matrix | io.SaveConfusionMatrixCsv( os.path.join(args.output_dir, 'analyzer_cm.csv')) precision_recall | io.SavePrecisionRecallCsv( os.path.join(args.output_dir, 'analyzer_pr.csv')) write_text_file(logloss, 'Write Log Loss', 'analyzer_logloss.csv') return confusion_matrix, precision_recall, logloss