def benchmarkMiniPipelineUnbatched(self): """Benchmark an unbatched "mini" TFMA - predict, slice and compute metrics. Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time taken for the whole pipeline. """ self._init_model() pipeline = beam.Pipeline(runner=fn_api_runner.FnApiRunner()) raw_data = (pipeline | "Examples" >> beam.Create( self._dataset.read_raw_dataset(deserialize=False, limit=MAX_NUM_EXAMPLES)) | "InputsToExtracts" >> tfma.InputsToExtracts()) _ = (raw_data | "InputExtractor" >> input_extractor.InputExtractor( eval_config=self._eval_config).ptransform | "V2PredictExtractor" >> predict_extractor_v2.PredictExtractor( eval_config=self._eval_config, eval_shared_model=self._eval_shared_model).ptransform | "SliceKeyExtractor" >> tfma.extractors.SliceKeyExtractor().ptransform | "V2ComputeMetricsAndPlots" >> metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=self._eval_config, eval_shared_model=self._eval_shared_model).ptransform) start = time.time() result = pipeline.run() result.wait_until_finish() end = time.time() delta = end - start self.report_benchmark( iters=1, wall_time=delta, extras={ "num_examples": self._dataset.num_examples(limit=MAX_NUM_EXAMPLES) })
def benchmarkMiniPipeline(self): """Benchmark a "mini" version of TFMA - predict, slice and compute metrics. Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time taken for the whole pipeline. """ pipeline = self._create_beam_pipeline() raw_data = (pipeline | "Examples" >> beam.Create( self._dataset.read_raw_dataset(deserialize=False, limit=MAX_NUM_EXAMPLES)) | "InputsToExtracts" >> tfma.InputsToExtracts()) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=self._dataset.tfma_saved_model_path()) _ = (raw_data | "PredictExtractor" >> tfma.extractors.PredictExtractor( eval_shared_model=eval_shared_model).ptransform | "SliceKeyExtractor" >> tfma.extractors.SliceKeyExtractor().ptransform | "ComputeMetricsAndPlots" >> tfma.evaluators.MetricsAndPlotsEvaluatorV1( eval_shared_model=eval_shared_model).ptransform) start = time.time() result = pipeline.run() result.wait_until_finish() end = time.time() delta = end - start self.report_benchmark( iters=1, wall_time=delta, extras={ "num_examples": self._dataset.num_examples(limit=MAX_NUM_EXAMPLES) })
def process_tfma(schema_file, big_query_table=None, eval_model_dir=None, max_eval_rows=None, pipeline_args=None, publish_to_bq=False, project=None, metrics_table=None, metrics_dataset=None): """Runs a batch job to evaluate the eval_model against the given input. Args: schema_file: A file containing a text-serialized Schema that describes the eval data. big_query_table: A BigQuery table name specified as DATASET.TABLE which should be the input for evaluation. This can only be set if input_csv is None. eval_model_dir: A directory where the eval model is located. max_eval_rows: Number of rows to query from BigQuery. pipeline_args: additional DataflowRunner or DirectRunner args passed to the beam pipeline. publish_to_bq: project: metrics_dataset: metrics_table: Raises: ValueError: if input_csv and big_query_table are not specified correctly. """ if big_query_table is None: raise ValueError( '--big_query_table should be provided.') slice_spec = [ tfma.slicer.SingleSliceSpec(), tfma.slicer.SingleSliceSpec(columns=['trip_start_hour']) ] metrics_namespace = metrics_table schema = taxi.read_schema(schema_file) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=eval_model_dir, add_metrics_callbacks=[ tfma.post_export_metrics.calibration_plot_and_prediction_histogram(), tfma.post_export_metrics.auc_plots() ]) metrics_monitor = None if publish_to_bq: metrics_monitor = MetricsReader( publish_to_bq=publish_to_bq, project_name=project, bq_table=metrics_table, bq_dataset=metrics_dataset, filters=MetricsFilter().with_namespace(metrics_namespace) ) pipeline = beam.Pipeline(argv=pipeline_args) query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True) raw_feature_spec = taxi.get_raw_feature_spec(schema) raw_data = ( pipeline | 'ReadBigQuery' >> ReadFromBigQuery(query=query, project=project, use_standard_sql=True) | 'Measure time: Start' >> beam.ParDo(MeasureTime(metrics_namespace)) | 'CleanData' >> beam.Map(lambda x: ( taxi.clean_raw_data_dict(x, raw_feature_spec)))) # Examples must be in clean tf-example format. coder = taxi.make_proto_coder(schema) # Prepare arguments for Extract, Evaluate and Write steps extractors = tfma.default_extractors( eval_shared_model=eval_shared_model, slice_spec=slice_spec, desired_batch_size=None, materialize=False) evaluators = tfma.default_evaluators( eval_shared_model=eval_shared_model, desired_batch_size=None, num_bootstrap_samples=1) _ = ( raw_data | 'ToSerializedTFExample' >> beam.Map(coder.encode) | 'Extract Results' >> tfma.InputsToExtracts() | 'Extract and evaluate' >> tfma.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators) | 'Map Evaluations to PCollection' >> MapEvalToPCollection() | 'Measure time: End' >> beam.ParDo( MeasureTime(metrics_namespace)) ) result = pipeline.run() result.wait_until_finish() if metrics_monitor: metrics_monitor.publish_metrics(result)