def train(pipeline, output_dir, train_args_dict, train_features=None, eval_features=None, metadata=None): if not train_features: train_features = (pipeline | 'ReadTrain' >> io.LoadFeatures( os.path.join(output_dir, 'features_train*'))) if not eval_features: eval_features = (pipeline | 'ReadEval' >> io.LoadFeatures( os.path.join(output_dir, 'features_eval*'))) if not metadata: metadata = (pipeline | 'ReadMetadata' >> io.LoadMetadata( os.path.join(output_dir, METADATA_FILE_NAME))) trained_model, results = ((train_features, eval_features) | 'Train' >> ml.Train(**train_args_dict)) trained_model | 'SaveModel' >> io.SaveModel( os.path.join(output_dir, 'saved_model')) results | io.SaveTrainingJobResult( os.path.join(output_dir, 'train_results')) return trained_model, results
def model_analysis(pipeline, output_dir, evaluation_data=None, metadata=None): if not metadata: metadata = (pipeline | 'LoadMetadataForAnalysis' >> io.LoadMetadata( os.path.join(output_dir, METADATA_FILE_NAME))) if not evaluation_data: coder = io.CsvCoder( column_names=[ 'key', 'target', 'predicted', 'score', 'target_label', 'predicted_label', 'all_scores' ], numeric_column_names=['target', 'predicted', 'score']) evaluation_data = ( pipeline | 'ReadEvaluation' >> beam.io.ReadFromText( os.path.join(output_dir, 'model_evaluations*'), coder=coder)) confusion_matrix, precision_recall, logloss = ( evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata)) confusion_matrix | io.SaveConfusionMatrixCsv( os.path.join(output_dir, 'analyzer_cm.csv')) precision_recall | io.SavePrecisionRecallCsv( os.path.join(output_dir, 'analyzer_pr.csv')) (logloss | 'WriteLogLoss' >> beam.io.WriteToText( os.path.join(output_dir, 'analyzer_logloss'), file_name_suffix='.csv') ) return confusion_matrix, precision_recall, logloss
def model_analysis(pipeline, evaluation_data=None, metadata=None): if not metadata: metadata = pipeline | io.LoadMetadata( os.path.join(args.output_dir, "metadata.yaml")) if not evaluation_data: coder = io.CsvCoder(['key', 'target', 'predicted', 'score'], ['target', 'predicted', 'score']) evaluation_data = read_text_file(pipeline, 'ReadEvaluation', 'model_evaluations', coder=coder) confusion_matrix, precision_recall, logloss = ( evaluation_data | 'AnalyzeModel' >> ml.AnalyzeModel(metadata)) confusion_matrix | io.SaveConfusionMatrixCsv( os.path.join(args.output_dir, 'analyzer_cm.csv')) precision_recall | io.SavePrecisionRecallCsv( os.path.join(args.output_dir, 'analyzer_pr.csv')) write_text_file(logloss, 'Write Log Loss', 'analyzer_logloss.csv') return confusion_matrix, precision_recall, logloss