def model_analysis(pipeline, output_dir, evaluation_data=None, metadata=None): if not metadata: metadata = (pipeline | 'LoadMetadataForAnalysis' >> io.LoadMetadata( os.path.join(output_dir, METADATA_FILE_NAME))) if not evaluation_data: coder = io.CsvCoder( column_names=[ 'key', 'target', 'predicted', 'score', 'target_label', 'predicted_label', 'all_scores' ], numeric_column_names=['target', 'predicted', 'score']) evaluation_data = ( pipeline | 'ReadEvaluation' >> beam.io.ReadFromText( os.path.join(output_dir, 'model_evaluations*'), coder=coder)) confusion_matrix, precision_recall, logloss = ( evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata)) confusion_matrix | io.SaveConfusionMatrixCsv( os.path.join(output_dir, 'analyzer_cm.csv')) precision_recall | io.SavePrecisionRecallCsv( os.path.join(output_dir, 'analyzer_pr.csv')) (logloss | 'WriteLogLoss' >> beam.io.WriteToText( os.path.join(output_dir, 'analyzer_logloss'), file_name_suffix='.csv') ) return confusion_matrix, precision_recall, logloss
def evaluate(pipeline, output_dir, trained_model=None, eval_features=None): if not eval_features: eval_features = (pipeline | 'ReadEval' >> io.LoadFeatures( os.path.join(output_dir, 'features_eval*'))) if not trained_model: trained_model = (pipeline | 'LoadModel' >> io.LoadModel( os.path.join(output_dir, 'saved_model'))) # Run our evaluation data through a Batch Evaluation, then pull out just # the expected and predicted target values. vocab_loader = LazyVocabLoader(os.path.join(output_dir, METADATA_FILE_NAME)) evaluations = ( eval_features | 'Evaluate' >> ml.Evaluate(trained_model) | 'CreateEvaluations' >> beam.Map(make_evaluation_dict, vocab_loader)) coder = io.CsvCoder(column_names=[ 'key', 'target', 'predicted', 'score', 'target_label', 'predicted_label', 'all_scores' ], numeric_column_names=['target', 'predicted', 'score']) (evaluations | 'WriteEvaluation' >> beam.io.textio.WriteToText(os.path.join( output_dir, 'model_evaluations'), file_name_suffix='.csv', coder=coder)) return evaluations
def model_analysis(pipeline, evaluation_data=None, metadata=None): if not metadata: metadata = pipeline | io.LoadMetadata( os.path.join(args.output_dir, "metadata.yaml")) if not evaluation_data: coder = io.CsvCoder(['key', 'target', 'predicted', 'score'], ['target', 'predicted', 'score']) evaluation_data = read_text_file(pipeline, 'ReadEvaluation', 'model_evaluations', coder=coder) confusion_matrix, precision_recall, logloss = ( evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata)) confusion_matrix | io.SaveConfusionMatrixCsv( os.path.join(args.output_dir, 'analyzer_cm.csv')) precision_recall | io.SavePrecisionRecallCsv( os.path.join(args.output_dir, 'analyzer_pr.csv')) write_text_file(logloss, 'Write Log Loss', 'analyzer_logloss.csv') return confusion_matrix, precision_recall, logloss
def evaluate(pipeline, trained_model=None, eval_features=None): if not eval_features: eval_features = ( pipeline | 'ReadEval' >> io.LoadFeatures(os.path.join(args.output_dir, 'features_eval*'))) if not trained_model: trained_model = (pipeline | 'LoadModel' >> io.LoadModel(os.path.join(args.output_dir, 'saved_model'))) # Run our evaluation data through a Batch Evaluation, then pull out just # the expected and predicted target values. evaluations = (eval_features | 'Evaluate' >> ml.Evaluate(trained_model) | beam.Map('CreateEvaluations', make_evaluation_dict)) coder = io.CsvCoder(['key', 'target', 'predicted', 'score'], ['target', 'predicted', 'score']) write_text_file(evaluations, 'WriteEvaluation', 'model_evaluations', coder) return evaluations