def train(pipeline, output_dir, train_args_dict, train_features=None, eval_features=None, metadata=None): if not train_features: train_features = (pipeline | 'ReadTrain' >> io.LoadFeatures( os.path.join(output_dir, 'features_train*'))) if not eval_features: eval_features = (pipeline | 'ReadEval' >> io.LoadFeatures( os.path.join(output_dir, 'features_eval*'))) if not metadata: metadata = (pipeline | 'ReadMetadata' >> io.LoadMetadata( os.path.join(output_dir, METADATA_FILE_NAME))) trained_model, results = ((train_features, eval_features) | 'Train' >> ml.Train(**train_args_dict)) trained_model | 'SaveModel' >> io.SaveModel( os.path.join(output_dir, 'saved_model')) results | io.SaveTrainingJobResult( os.path.join(output_dir, 'train_results')) return trained_model, results
def evaluate(pipeline, output_dir, trained_model=None, eval_features=None): if not eval_features: eval_features = (pipeline | 'ReadEval' >> io.LoadFeatures( os.path.join(output_dir, 'features_eval*'))) if not trained_model: trained_model = (pipeline | 'LoadModel' >> io.LoadModel( os.path.join(output_dir, 'saved_model'))) # Run our evaluation data through a Batch Evaluation, then pull out just # the expected and predicted target values. vocab_loader = LazyVocabLoader(os.path.join(output_dir, METADATA_FILE_NAME)) evaluations = ( eval_features | 'Evaluate' >> ml.Evaluate(trained_model) | 'CreateEvaluations' >> beam.Map(make_evaluation_dict, vocab_loader)) coder = io.CsvCoder(column_names=[ 'key', 'target', 'predicted', 'score', 'target_label', 'predicted_label', 'all_scores' ], numeric_column_names=['target', 'predicted', 'score']) (evaluations | 'WriteEvaluation' >> beam.io.textio.WriteToText(os.path.join( output_dir, 'model_evaluations'), file_name_suffix='.csv', coder=coder)) return evaluations
def train(pipeline, train_features=None, eval_features=None, metadata=None): if not train_features: train_features = ( pipeline | 'ReadTrain' >> io.LoadFeatures(os.path.join(args.output_dir, 'features_train*'))) if not eval_features: eval_features = ( pipeline | 'ReadEval' >> io.LoadFeatures(os.path.join(args.output_dir, 'features_eval*'))) trained_model, results = ((train_features, eval_features) | ml.Train(**get_train_parameters(metadata))) trained_model | 'SaveModel' >> io.SaveModel(os.path.join(args.output_dir, 'saved_model')) results | io.SaveTrainingJobResult(os.path.join(args.output_dir, 'train_results')) return trained_model, results
def evaluate(pipeline, trained_model=None, eval_features=None): if not eval_features: eval_features = ( pipeline | 'ReadEval' >> io.LoadFeatures(os.path.join(args.output_dir, 'features_eval*'))) if not trained_model: trained_model = (pipeline | 'LoadModel' >> io.LoadModel(os.path.join(args.output_dir, 'saved_model'))) # Run our evaluation data through a Batch Evaluation, then pull out just # the expected and predicted target values. evaluations = (eval_features | 'Evaluate' >> ml.Evaluate(trained_model) | beam.Map('CreateEvaluations', make_evaluation_dict)) coder = io.CsvCoder(['key', 'target', 'predicted', 'score'], ['target', 'predicted', 'score']) write_text_file(evaluations, 'WriteEvaluation', 'model_evaluations', coder) return evaluations