def run_fn(fn_args: TrainerFnArgs): BATCH_SIZE = 65536 tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, tf_transform_output, BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, BATCH_SIZE) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = _build_keras() log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs') tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir, update_freq='batch') model.fit(train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, # callbacks=[tensorboard_callback]) callbacks=[]) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec( shape=[None], dtype=tf.string, name='examples')), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def trainer_fn(hparams, schema): train_batch_size = 32 eval_batch_size = 32 tf_transform_output = tft.TFTransformOutput(hparams.transform_output) train_input_fn = lambda: _input_fn( hparams.train_files, tf_transform_output, batch_size=train_batch_size) eval_input_fn = lambda: _input_fn( hparams.eval_files, tf_transform_output, batch_size=eval_batch_size) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=hparams.train_steps) serving_receiver_fn = lambda: _example_serving_receiver_fn( tf_transform_output, schema) exporter = tf.estimator.FinalExporter('cifar-10', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec(eval_input_fn, steps=hparams.eval_steps, exporters=[exporter], name='cifar-10') estimator = _build_estimator() receiver_fn = lambda: _eval_input_receiver_fn(tf_transform_output, schema) return { 'estimator': estimator, 'train_spec': train_spec, 'eval_spec': eval_spec, 'eval_input_receiver_fn': receiver_fn }
def export_serving_model(classifier, serving_model_dir, raw_schema_location, tft_output_dir): raw_schema = tfdv.load_schema_text(raw_schema_location) raw_feature_spec = schema_utils.schema_as_feature_spec( raw_schema).feature_spec tft_output = tft.TFTransformOutput(tft_output_dir) features_input_signature = { feature_name: tf.TensorSpec(shape=(None, 1), dtype=spec.dtype, name=feature_name) for feature_name, spec in raw_feature_spec.items() if feature_name in features.FEATURE_NAMES } signatures = { "serving_default": _get_serve_features_fn( classifier, tft_output).get_concrete_function(features_input_signature), "serving_tf_example": _get_serve_tf_examples_fn(classifier, tft_output, raw_feature_spec).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")), } logging.info("Model export started...") classifier.save(serving_model_dir, signatures=signatures) logging.info("Model export completed.")
def tuner_fn(fn_args: FnArgs) -> TunerFnResult: train_files = fn_args.train_files eval_files = fn_args.eval_files tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path) hparams = _get_hyperparameters() tuner = kerastuner.Hyperband(hypermodel=_build_keras_model, hyperparameters=hparams, objective=kerastuner.Objective( 'binary_accuracy', 'max'), factor=3, max_epochs=2, directory=fn_args.working_dir, project_name='ftfx:simple_e2e') train_dataset = _input_fn(train_files, tf_transform_output) eval_dataset = _input_fn(eval_files, tf_transform_output) return TunerFnResult(tuner=tuner, fit_kwargs={ 'x': train_dataset, 'validation_data': eval_dataset, 'steps_per_epoch': fn_args.train_steps, 'validation_steps': fn_args.eval_steps })
def run_fn(fn_args): tft_transform_output = tft.TFTransformOutput(fn_args.transform_output) print(tf_transform_output) raise Exception("aslçdjfaksldjf")
def trainer_fn(hparams, schema): """Build the estimator using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. schema: Holds the schema of the training examples. Returns: A dict of the following: - estimator: The estimator that will be used for training and eval. - train_spec: Spec for training. - eval_spec: Spec for eval. - eval_input_receiver_fn: Input function for eval. """ train_batch_size = 32 eval_batch_size = 32 tf_transform_output = tft.TFTransformOutput(hparams.transform_output) train_input_fn = lambda: _input_fn( # pylint: disable=g-long-lambda hparams.train_files, tf_transform_output, batch_size=train_batch_size) eval_input_fn = lambda: _input_fn( # pylint: disable=g-long-lambda hparams.eval_files, tf_transform_output, batch_size=eval_batch_size) train_spec = tf.estimator.TrainSpec( # pylint: disable=g-long-lambda train_input_fn, max_steps=hparams.train_steps) serving_receiver_fn = lambda: _example_serving_receiver_fn( # pylint: disable=g-long-lambda tf_transform_output, schema) exporter = tf.estimator.FinalExporter('cifar-10', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec(eval_input_fn, steps=hparams.eval_steps, exporters=[exporter], name='cifar-10') run_config = tf.estimator.RunConfig(save_checkpoints_steps=999, keep_checkpoint_max=1) run_config = run_config.replace(model_dir=hparams.serving_model_dir) estimator = tf.keras.estimator.model_to_estimator( keras_model=_keras_model_builder(), config=run_config) # Create an input receiver for TFMA processing receiver_fn = lambda: _eval_input_receiver_fn( # pylint: disable=g-long-lambda tf_transform_output, schema) return { 'estimator': estimator, 'train_spec': train_spec, 'eval_spec': eval_spec, 'eval_input_receiver_fn': receiver_fn }
def testWriteTransformFn(self): transform_output_dir = os.path.join(self.get_temp_dir(), 'output') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) metadata = beam_metadata_io.BeamDatasetMetadata( _TEST_METADATA_WITH_FUTURES, { 'a': pipeline | 'CreateA' >> beam.Create([3]), }) _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(transform_output_dir)) # Test reading with TFTransformOutput tf_transform_output = tft.TFTransformOutput(transform_output_dir) metadata = tf_transform_output.transformed_metadata self.assertEqual(metadata, _TEST_METADATA) transform_fn_dir = tf_transform_output.transform_savedmodel_dir self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir))
def run_fn(fn_args): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, tf_transform_output, constants.TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, constants.EVAL_BATCH_SIZE) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = _build_keras_model(hidden_units=constants.HIDDEN_UNITS, learning_rate=constants.LEARNING_RATE) model.fit(train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def example_serving_receiver_fn(tf_transform_dir, raw_feature_spec, target_feature, feature_id): """Creates serving function that is used during inference. Args: tf_transform_dir: A directory in which the tf.Transform model was written during the preprocessing step. raw_feature_spec: A dictionary of raw feature spec for input data. target_feature: Key for target feature. feature_id: Key for id field in input data. Returns: An instance of tf.estimator.export.ServingInputReceiver that parses input data by applying transformation from saved tf.Transform graph. """ if target_feature in raw_feature_spec: raw_feature_spec.pop(target_feature) raw_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( raw_feature_spec, default_batch_size=None) serving_input_receiver = raw_input_fn() features = serving_input_receiver.features transform_output = tft.TFTransformOutput(tf_transform_dir) transformed_features = transform_output.transform_raw_features(features) transformed_features[feature_id] = (convert_sparse_to_dense( transformed_features[feature_id])) return tf.estimator.export.ServingInputReceiver( transformed_features, serving_input_receiver.receiver_tensors)
def run_fn(fn_args: tfx.components.FnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, base.TRAIN_BATCH_SIZE) eval_dataset = input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, base.EVAL_BATCH_SIZE) if fn_args.hyperparameters: hparams = kt.HyperParameters.from_config(fn_args.hyperparameters) else: # This is a shown case when hyperparameters is decided and Tuner is removed # from the pipeline. User can also inline the hyperparameters directly in # _make_keras_model. hparams = _get_hyperparameters() model = _make_keras_model(hparams) # Write logs to path tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=fn_args.model_run_dir, update_freq='batch') model.fit( train_dataset, validation_data=eval_dataset, callbacks=[tensorboard_callback]) signatures = base.make_serving_signatures(model, tf_transform_output) model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def testWriteTransformFn(self): transform_output_dir = os.path.join(self.get_temp_dir(), 'output') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) # Combine test metadata with a dict of PCollections resolving futures. deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create( [_TEST_METADATA_COMPLETE]) metadata = beam_metadata_io.BeamDatasetMetadata( _TEST_METADATA, deferred_metadata) _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(transform_output_dir)) # Test reading with TFTransformOutput tf_transform_output = tft.TFTransformOutput(transform_output_dir) metadata = tf_transform_output.transformed_metadata self.assertEqual(metadata, _TEST_METADATA_COMPLETE) transform_fn_dir = tf_transform_output.transform_savedmodel_dir self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir))
def run_fn(fn_args: TrainerFnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, tf_transform_output, 40) eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, 40) model = _build_keras_model() model.fit( train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec( shape=[None], dtype=tf.string, name='examples')), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def train(self): # the graph preprocessed by TFT preprocessing tf_transform_output = tft.TFTransformOutput(cfg.TARGET_DIR) # Generate all `input_fn`s for the tf estimator train_input_fn = self.model.make_training_input_fn( tf_transform_output, cfg.exp_log_data_file_train_tfrecord + '*', cfg.TRAIN_BATCH_SIZE) eval_input_fn = self.model.make_training_input_fn( tf_transform_output, cfg.exp_log_data_file_eval_tfrecord + '*', cfg.EVAL_BATCH_SIZE) make_serving_input_fn = self.model.make_serving_input_fn( tf_transform_output) estimator = tf.estimator.LinearClassifier( feature_columns=self.model.create_feature_columns( tf_transform_output)) estimator.train(train_input_fn, steps=cfg.TRAIN_MAX_STEPS) eval_evalset_result = estimator.evaluate(eval_input_fn, steps=cfg.EVAL_STEPS, name='eval') print eval_evalset_result estimator.export_savedmodel(cfg.TARGET_DIR, make_serving_input_fn, strip_default_attrs=True)
def trainer_fn(trainer_fn_args, schema): """Build the estimator using the high level API. Args: trainer_fn_args: Holds args used to train the model as name/value pairs. schema: Holds the schema of the training examples. Returns: A dict of the following: - estimator: The estimator that will be used for training and eval. - train_spec: Spec for training. - eval_spec: Spec for eval. - eval_input_receiver_fn: Input function for eval. """ tf_transform_output = tft.TFTransformOutput( trainer_fn_args.transform_output) train_input_fn = lambda: _input_fn( # pylint: disable=g-long-lambda trainer_fn_args.train_files, tf_transform_output, batch_size=hparams.TRAIN_BATCH_SIZE) eval_input_fn = lambda: _input_fn( # pylint: disable=g-long-lambda trainer_fn_args.eval_files, tf_transform_output, batch_size=hparams.EVAL_BATCH_SIZE) train_spec = tf.estimator.TrainSpec( # pylint: disable=g-long-lambda train_input_fn, max_steps=trainer_fn_args.train_steps) serving_receiver_fn = lambda: _example_serving_receiver_fn( # pylint: disable=g-long-lambda tf_transform_output, schema) exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec(eval_input_fn, steps=trainer_fn_args.eval_steps, exporters=[exporter], name='chicago-taxi-eval') run_config = tf.estimator.RunConfig(save_checkpoints_steps=999, keep_checkpoint_max=1) run_config = run_config.replace( model_dir=trainer_fn_args.serving_model_dir) estimator = _build_estimator(hidden_units=hparams.HIDDEN_UNITS, config=run_config) # Create an input receiver for TFMA processing receiver_fn = lambda: _eval_input_receiver_fn( # pylint: disable=g-long-lambda tf_transform_output, schema) return { 'estimator': estimator, 'train_spec': train_spec, 'eval_spec': eval_spec, 'eval_input_receiver_fn': receiver_fn }
def write_projector_metadata(metadata_dir, tft_dir): """Write a metadata file to use in tensorboard to visualize embeddings. Tensorboard expects a .tsv (tab-seperated values) file encoding information about each sample. A header is required if there is more than one column. Args: metadata_dir: the directory where the projector config protobuf is written. tft_dir: the directory where tft outputs are written. Returns: A tuple of user and item indices: user_indices: indices of users that were sampled. item_indices: indices of items that were sampled. """ tft_output = tft.TFTransformOutput(tft_dir) user_indices, user_metadata = _sample_vocab(tft_output, constants.USER_VOCAB_NAME, "user", constants.NUM_PROJECTOR_USERS) item_indices, item_metadata = _sample_vocab(tft_output, constants.ITEM_VOCAB_NAME, "item", constants.NUM_PROJECTOR_ITEMS) metadata = user_metadata + item_metadata metadata_path = os.path.join(metadata_dir, constants.PROJECTOR_PATH) tf.gfile.MakeDirs(metadata_dir) with tf.gfile.GFile(metadata_path, "w+") as f: f.write("label\tname\n") f.write("\n".join(["\t".join(sample) for sample in metadata])) return user_indices, item_indices
def run_fn(fn_args: tfx.components.FnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = base.input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, base.TRAIN_BATCH_SIZE) eval_dataset = base.input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, base.EVAL_BATCH_SIZE) model = _make_trained_model(train_dataset, eval_dataset, num_epochs=1, steps_per_epoch=fn_args.train_steps, eval_steps_per_epoch=fn_args.eval_steps, tensorboard_log_dir=fn_args.model_run_dir) # TODO(b/180721874): batch polymorphic model not yet supported. signatures = base.make_serving_signatures(model, tf_transform_output, serving_batch_size=1) tf.saved_model.save(model, fn_args.serving_model_dir, signatures=signatures)
def evaluate(classifier, data_accessor, eval_data_dir, tft_output_dir, hyperparameters): """ Args: classifier: data_accessor: eval_data_dir: tft_output_dir: hyperparameters: Returns: evaluation_metrics: """ logging.info("Loading tft output from %s", tft_output_dir) tft_output = tft.TFTransformOutput(tft_output_dir) schema = tft_output.transformed_metadata.schema logging.info("Model evaluation started...") eval_dataset = model_input.get_dataset( file_pattern=eval_data_dir, data_accessor=data_accessor, schema=schema, batch_size=hyperparameters["batch_size"], ) evaluation_metrics = classifier.evaluate(eval_dataset) logging.info("Model evaluation completed.") return evaluation_metrics
def run_fn(fn_args): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, tf_transform_output, 40) eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, 40) model = get_model() log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs') tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir, update_freq='batch') model.fit( train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, callbacks=[tensorboard_callback]) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec( shape=[None], dtype=tf.string, name='examples')), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def run_fn(self): tf_transform_output = tft.TFTransformOutput(self.transform_output) train_dataset = self.input_fn(self.train_files, tf_transform_output) eval_dataset = self.input_fn(self.eval_files, tf_transform_output) model = self.model_fn(train_dataset=train_dataset, eval_dataset=eval_dataset) signatures = { 'serving_default': self._get_serve_tf_examples_fn( model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')), 'zen_eval': self._get_zen_eval_tf_examples_fn( model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')) } model.save(self.serving_model_dir, save_format='tf', signatures=signatures)
def run_fn(fn_args: tfx.components.FnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ if fn_args.transform_output is None: # Transform is not used. tf_transform_output = None schema = tfx.utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema()) feature_list = features.FEATURE_KEYS label_key = features.LABEL_KEY else: tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) schema = tf_transform_output.transformed_metadata.schema feature_list = [ features.transformed_name(f) for f in features.FEATURE_KEYS ] label_key = features.transformed_name(features.LABEL_KEY) mirrored_strategy = tf.distribute.MirroredStrategy() train_batch_size = (constants.TRAIN_BATCH_SIZE * mirrored_strategy.num_replicas_in_sync) eval_batch_size = (constants.EVAL_BATCH_SIZE * mirrored_strategy.num_replicas_in_sync) train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, schema, label_key, batch_size=train_batch_size) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, schema, label_key, batch_size=eval_batch_size) with mirrored_strategy.scope(): model = _build_keras_model(feature_list) # Write logs to path tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=fn_args.model_run_dir, update_freq='batch') model.fit(train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, callbacks=[tensorboard_callback]) signatures = { 'serving_default': _get_tf_examples_serving_signature(model, schema, tf_transform_output), 'transform_features': _get_transform_features_signature(model, schema, tf_transform_output), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def run_fn(fn_args: TrainerFnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train and tune the model as name/value pairs. See https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, EVAL_BATCH_SIZE) if fn_args.hyperparameters: hparams = kerastuner.HyperParameters.from_config( fn_args.hyperparameters) else: # This is a shown case when hyperparameters is decided and Tuner is removed # from the pipeline. User can also inline the hyperparameters directly in # _build_keras_model. hparams = _get_hyperparameters() absl.logging.info('HyperParameters for training: %s' % hparams.get_config()) # Distribute training over multiple replicas on the same machine. mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = _build_keras_model(hparams=hparams, tf_transform_output=tf_transform_output) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=LOCAL_LOG_DIR, update_freq='batch') model.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, verbose=2, callbacks=[tensorboard_callback]) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures) if fn_args.serving_model_dir.startswith('gs://'): _copy_tensorboard_logs(LOCAL_LOG_DIR, fn_args.serving_model_dir + '/logs')
def run_fn(fn_args: tfx.components.FnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, batch_size=_TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, batch_size=_EVAL_BATCH_SIZE) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = _build_keras_model() model.fit(train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, verbose=2) signatures = { 'serving_default': _get_inference_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.int64, name=_CUR_PAGE_FEATURE_KEY), tf.TensorSpec(shape=[None], dtype=tf.int64, name=_SESSION_INDEX_FEATURE_KEY)), } # Create the saved_model in a temporary directory. temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, 'temp') model.save(temp_saving_model_dir, save_format='tf', signatures=signatures) # Convert the saved_model to a tfjs model and store it in the final directory. tfrw = rewriter_factory.create_rewriter(rewriter_factory.TFJS_REWRITER, name='tfjs_rewriter') converters.rewrite_saved_model(temp_saving_model_dir, fn_args.serving_model_dir, tfrw, rewriter.ModelType.TFJS_MODEL) # Copy the vocabulary computed by transform to the final directory. # The vocabulary is not included in the original savedmodel because vocab # lookups are currently not supported in TFJS and are expected to be done # independently by client code. fileio.copy(tf_transform_output.vocabulary_file_by_name(_VOCAB_FILENAME), os.path.join(fn_args.serving_model_dir, _VOCAB_FILENAME)) fileio.rmtree(temp_saving_model_dir)
def tuner_fn(fn_args: FnArgs) -> TunerFnResult: """Build the tuner using the CloudTuner API. Args: fn_args: Holds args as name/value pairs. See https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs. - transform_graph_path: optional transform graph produced by TFT. - custom_config: An optional dictionary passed to the component. In this example, it contains the dict ai_platform_tuning_args. - working_dir: working dir for tuning. - train_files: List of file paths containing training tf.Example data. - eval_files: List of file paths containing eval tf.Example data. - train_steps: number of train steps. - eval_steps: number of eval steps. Returns: A namedtuple contains the following: - tuner: A BaseTuner that will be used for tuning. - fit_kwargs: Args to pass to tuner's run_trial function for fitting the model , e.g., the training and validation dataset. Required args depend on the above tuner's implementation. """ transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path) # CloudTuner is a subclass of kerastuner.Tuner which inherits from # BaseTuner. tuner = CloudTuner( _build_keras_model, # The project/region configuations for Cloud Vizier service and its trial # executions. Note: this example uses the same configuration as the # CAIP Training service for distributed tuning flock management to view # all of the pipeline's jobs and resources in the same project. It can # also be configured separately. project_id=fn_args.custom_config['ai_platform_tuning_args']['project'], region=fn_args.custom_config['ai_platform_tuning_args']['region'], objective=kerastuner.Objective('val_sparse_categorical_accuracy', 'max'), hyperparameters=_get_hyperparameters(), max_trials=8, # Optional. directory=fn_args.working_dir) train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, transform_graph, batch_size=_TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, transform_graph, batch_size=_EVAL_BATCH_SIZE) return TunerFnResult(tuner=tuner, fit_kwargs={ 'x': train_dataset, 'validation_data': eval_dataset, 'steps_per_epoch': fn_args.train_steps, 'validation_steps': fn_args.eval_steps })
def train_and_evaluate(model_dir, input_feature_spec, target, train_files_pattern, eval_files_pattern, batch_size=64, train_max_steps=1000): """ Trains and evaluates the estimator given. The input functions are generated by the preprocessing function. """ # specify where model is stored if tf.io.gfile.exists(model_dir): tf.io.gfile.rmtree(model_dir) run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=model_dir) # this will give us a more granular visualization of the training run_config = run_config.replace(save_summary_steps=1) # no build in RNN estimator in TF yet model = make_simple_rnn() model.compile(loss=tf.losses.MeanSquaredError(), optimizer=tf.optimizers.Adam(), metrics=[tf.metrics.MeanAbsoluteError()]) estimator = tf.keras.estimator.model_to_estimator( keras_model=model ) # wrapper around output of tf.Transform tft_output = tft.TFTransformOutput(os.path.split(train_files_pattern)[0]) feature_spec = tft_output.transformed_feature_spec() # Create the training and evaluation specifications train_spec = tf.estimator.TrainSpec( input_fn=make_input_fn( tfrecord_pattern=train_files_pattern, feature_spec=feature_spec, target=target, batch_size=batch_size, mode=tf.estimator.ModeKeys.TRAIN), max_steps=train_max_steps ) eval_spec = tf.estimator.EvalSpec( input_fn=make_input_fn( tfrecord_pattern=eval_files_pattern, feature_spec=feature_spec, target=target, batch_size=batch_size, mode=tf.estimator.ModeKeys.EVAL)) # train and evaluate the model tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) # export saved model estimator.export_saved_model( model_dir, serving_input_receiver_fn=make_serving_input_fn( tft_output, input_feature_spec, target ))
def predict(self): tf_transform_output = tft.TFTransformOutput(TARGET_DIR) eval_sample_input_fn = self.model.make_training_input_fn( tf_transform_output, exp_log_data_file_train_tfrecord + '*', 1) predict_fn = predictor.from_saved_model(TARGET_DIR) predictions = predict_fn(xx) print(predictions['scores'])
def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES, num_test_instances=NUM_TEST_INSTANCES): """Train the model on training data and evaluate on test data. Args: working_dir: Directory to read transformed data and metadata from and to write exported model to. num_train_instances: Number of instances in train set num_test_instances: Number of instances in test set Returns: The results from the estimator's 'evaluate' method """ tf_transform_output = tft.TFTransformOutput(working_dir) # Wrap scalars as real valued columns. real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in NUMERIC_FEATURE_KEYS ] # Wrap categorical columns. one_hot_columns = [ tf.feature_column.categorical_column_with_vocabulary_file( key=key, vocabulary_file=tf_transform_output.vocabulary_file_by_name( vocab_filename=key)) for key in CATEGORICAL_FEATURE_KEYS ] run_config = tf.estimator.RunConfig() estimator = tf.estimator.LinearClassifier( feature_columns=real_valued_columns + one_hot_columns, config=run_config) # Fit the model using the default optimizer. train_input_fn = _make_training_input_fn( tf_transform_output, os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'), batch_size=TRAIN_BATCH_SIZE) estimator.train(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE) # Evaluate model on test dataset. eval_input_fn = _make_training_input_fn( tf_transform_output, os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'), batch_size=1) # Export the model. serving_input_fn = _make_serving_input_fn(tf_transform_output) exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR) estimator.export_savedmodel(exported_model_dir, serving_input_fn) return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
def train_and_maybe_evaluate(hparams): """Run the training and evaluate using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. Returns: The estimator that was used for training (and maybe eval) """ schema = taxi.read_schema(hparams.schema_file) tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir) train_input = lambda: model.input_fn( hparams.train_files, tf_transform_output, batch_size=TRAIN_BATCH_SIZE ) eval_input = lambda: model.input_fn( hparams.eval_files, tf_transform_output, batch_size=EVAL_BATCH_SIZE ) train_spec = tf.estimator.TrainSpec( train_input, max_steps=hparams.train_steps) serving_receiver_fn = lambda: model.example_serving_receiver_fn( tf_transform_output, schema) exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec( eval_input, steps=hparams.eval_steps, exporters=[exporter], name='chicago-taxi-eval') run_config = tf.estimator.RunConfig( save_checkpoints_steps=999, keep_checkpoint_max=1) serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR) run_config = run_config.replace(model_dir=serving_model_dir) estimator = model.build_estimator( tf_transform_output, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i)) for i in range(NUM_DNN_LAYERS) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) return estimator
def train_and_evaluate( work_dir, input_feature_spec, labels, train_files_pattern, eval_files_pattern, batch_size=64, train_max_steps=1000): """Trains and evaluates the estimator given. The input functions are generated by the preprocessing function. """ model_dir = os.path.join(work_dir, 'model') if tf.gfile.Exists(model_dir): tf.gfile.DeleteRecursively(model_dir) # Specify where to store our model run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=model_dir) # This will give us a more granular visualization of the training run_config = run_config.replace(save_summary_steps=10) # Create a Deep Neural Network Regressor estimator estimator = tf.estimator.DNNRegressor( feature_columns=[ tf.feature_column.numeric_column('NormalizedC', dtype=tf.float32), tf.feature_column.numeric_column('NormalizedH', dtype=tf.float32), tf.feature_column.numeric_column('NormalizedO', dtype=tf.float32), tf.feature_column.numeric_column('NormalizedN', dtype=tf.float32), ], hidden_units=[128, 64], dropout=0.5, config=run_config) # Get the transformed feature_spec tft_output = tft.TFTransformOutput(work_dir) feature_spec = tft_output.transformed_feature_spec() # Create the training and evaluation specifications train_spec = tf.estimator.TrainSpec( input_fn=make_train_input_fn( feature_spec, labels, train_files_pattern, batch_size), max_steps=train_max_steps) exporter = tf.estimator.FinalExporter( 'final', make_serving_input_fn(tft_output, input_feature_spec, labels)) eval_spec = tf.estimator.EvalSpec( input_fn=make_eval_input_fn( feature_spec, labels, eval_files_pattern, batch_size), exporters=[exporter]) # Train and evaluate the model tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run_fn(fn_args: TrainerFnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn( fn_args.train_files, fn_args.data_accessor, tf_transform_output, batch_size=_TRAIN_BATCH_SIZE) eval_dataset = _input_fn( fn_args.eval_files, fn_args.data_accessor, tf_transform_output, batch_size=_EVAL_BATCH_SIZE) if fn_args.hyperparameters: hparams = kerastuner.HyperParameters.from_config(fn_args.hyperparameters) else: # This is a shown case when hyperparameters is decided and Tuner is removed # from the pipeline. User can also inline the hyperparameters directly in # _build_keras_model. hparams = _get_hyperparameters() absl.logging.info('HyperParameters for training: %s' % hparams.get_config()) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = _build_keras_model(hparams) steps_per_epoch = _TRAIN_DATA_SIZE / _TRAIN_BATCH_SIZE # Write logs to path tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=fn_args.model_run_dir, update_freq='batch') model.fit( train_dataset, epochs=int(fn_args.train_steps / steps_per_epoch), steps_per_epoch=steps_per_epoch, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, callbacks=[tensorboard_callback]) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec( shape=[None], dtype=tf.string, name='examples')), } model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
def tuner_fn(fn_args: TrainerFnArgs) -> TunerFnResult: """Build the tuner using the KerasTuner API. Args: fn_args: Holds args as name/value pairs. - working_dir: working dir for tuning. - train_files: List of file paths containing training tf.Example data. - eval_files: List of file paths containing eval tf.Example data. - train_steps: number of train steps. - eval_steps: number of eval steps. - schema_path: optional schema of the input data. - transform_graph_path: optional transform graph produced by TFT. Returns: A namedtuple contains the following: - tuner: A BaseTuner that will be used for tuning. - fit_kwargs: Args to pass to tuner's run_trial function for fitting the model , e.g., the training and validation dataset. Required args depend on the above tuner's implementation. """ transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path) # Construct a build_keras_model_fn that just takes hyperparams from get_hyperparameters as input. build_keras_model_fn = functools.partial( _build_keras_model, tf_transform_output=transform_graph) # BayesianOptimization is a subclass of kerastuner.Tuner which inherits from BaseTuner. tuner = kerastuner.BayesianOptimization( build_keras_model_fn, max_trials=10, hyperparameters=_get_hyperparameters(), # New entries allowed for n_units hyperparameter construction conditional on n_layers selected. # allow_new_entries=True, # tune_new_entries=True, objective=kerastuner.Objective('val_sparse_categorical_accuracy', 'max'), directory=fn_args.working_dir, project_name='covertype_tuning') train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, transform_graph, batch_size=TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, transform_graph, batch_size=EVAL_BATCH_SIZE) return TunerFnResult(tuner=tuner, fit_kwargs={ 'x': train_dataset, 'validation_data': eval_dataset, 'steps_per_epoch': fn_args.train_steps, 'validation_steps': fn_args.eval_steps })