def __init__(self, saved_model_dir, input_schema, output_schema, tf_config): self.saved_model_dir = saved_model_dir self.graph = tf.Graph() self.tf_config = tf_config with self.graph.as_default(): with tf.Session(config=tf_config): inputs, outputs = saved_transform_io.partially_apply_saved_transform( saved_model_dir, {}) input_schema_keys = input_schema.column_schemas.keys() output_schema_keys = output_schema.column_schemas.keys() extra_input_keys = set(input_schema_keys).difference( inputs.keys()) if extra_input_keys: raise ValueError( 'Input schema contained keys not in graph: %s' % input_schema_keys) extra_output_keys = set(output_schema_keys).difference( outputs.keys()) if extra_output_keys: raise ValueError( 'Output schema contained keys not in graph: %s' % extra_output_keys) self.inputs = {key: inputs[key] for key in input_schema_keys} self.outputs = { key: outputs[key] for key in output_schema_keys }
def _example_serving_receiver_fn(transform_output, schema): """Build the serving in inputs. Args: transform_output: directory in which the tf-transform model was written during the preprocessing step. schema: the schema of the input data. Returns: Tensorflow graph which parses examples, applying tf-transform to them. """ raw_feature_spec = _get_raw_feature_spec(schema) raw_feature_spec.pop(_LABEL_KEY) raw_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( raw_feature_spec, default_batch_size=None) serving_input_receiver = raw_input_fn() _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(transform_output, transform_fn_io.TRANSFORM_FN_DIR), serving_input_receiver.features)) return tf.estimator.export.ServingInputReceiver( transformed_features, serving_input_receiver.receiver_tensors)
def eval_input_receiver_fn(tf_transform_dir, schema, target): """Build everything needed for the tf-model-analysis to run the model. Args: tf_transform_dir: directory in which the tf-transform model was written during the preprocessing step. schema: the raw data schema. target: name of the target column. Returns: EvalInputReceiver function, which contains: - Tensorflow graph which parses raw untranformed features, applies the tf-transform preprocessing operators. - Set of raw, untransformed features. - Label against which predictions will be compared. """ raw_metadata = make_tft_input_metadata(schema) raw_feature_spec = raw_metadata.schema.as_feature_spec() serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') features = tf.parse_example(serialized_tf_example, raw_feature_spec) _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(tf_transform_dir, transform_fn_io.TRANSFORM_FN_DIR), features)) receiver_tensors = {'examples': serialized_tf_example} return tfma.export.EvalInputReceiver(features=transformed_features, receiver_tensors=receiver_tensors, labels=transformed_features[target])
def replace_tensors_with_constant_values(saved_model_dir, bound_saved_model_dir, input_value_mapping): """Takes a SavedModel and replaces some inputs with constant values. Replaces some inputs from the SavedModel with constant tensors constructed based on `tensor_value_mapping`. Args: saved_model_dir: The directory of a SavedModel. bound_saved_model_dir: The directory to which to write the SavedModel with some inputs bound to constants. input_value_mapping: A map from inputs to `ConstantTensorValue`s. """ with tf.Graph().as_default(): # Create constant tensors representing bound inputs. bound_input_tensors = { key: tf.constant(value.value, value.dtype) for key, value in six.iteritems(input_value_mapping) } with tf.Session() as session: input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform( saved_model_dir, bound_input_tensors)) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, bound_saved_model_dir)
def __init__(self, saved_model_dir, input_schema, exclude_outputs, tf_config): self.saved_model_dir = saved_model_dir graph = tf.Graph() self.session = tf.Session(graph=graph, config=tf_config) with graph.as_default(): with self.session.as_default(): inputs, outputs = saved_transform_io.partially_apply_saved_transform( saved_model_dir, {}) self.session.run(tf.global_variables_initializer()) self.session.run(tf.tables_initializer()) input_schema_keys = input_schema.column_schemas.keys() extra_input_keys = set(input_schema_keys).difference(inputs.keys()) if extra_input_keys: raise ValueError('Input schema contained keys not in graph: %s' % input_schema_keys) extra_output_keys = set(exclude_outputs).difference(outputs.keys()) if extra_output_keys: raise ValueError('Excluded outputs contained keys not in graph: %s' % exclude_outputs) non_excluded_output_keys = set( outputs.keys()).difference(exclude_outputs) self.inputs = {key: inputs[key] for key in input_schema_keys} self.outputs = {key: outputs[key] for key in non_excluded_output_keys}
def default_transforming_serving_input_receiver_fn(): """Serving Input Receiver that applies transforms to raw data in Tensors.""" raw_serving_features = { k: v for k, v in six.iteritems( raw_metadata.schema.as_batched_placeholders()) if k in include_raw_keys } sparse_serving_features = [ t for t in raw_serving_features if isinstance(t, tf.SparseTensor) ] if sparse_serving_features: raise ValueError( "Feeding sparse tensors directly at serving time is not " "supported.") _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_serving_features)) if convert_scalars_to_vectors: transformed_features = _convert_scalars_to_vectors( transformed_features) return tf.estimator.export.ServingInputReceiver( transformed_features, raw_serving_features)
def default_transforming_serving_input_receiver_fn(): """Serving input_fn that applies transforms to raw data in Tensors.""" record_defaults = [] for k in raw_keys: if column_schemas[k].representation.default_value is not None: # Note that 0 and '' are valid defaults. value = tf.constant( [column_schemas[k].representation.default_value], dtype=column_schemas[k].domain.dtype) else: value = tf.constant([], dtype=column_schemas[k].domain.dtype) record_defaults.append(value) placeholder = tf.placeholder(dtype=tf.string, shape=(None, ), name="csv_input_placeholder") parsed_tensors = tf.decode_csv(placeholder, record_defaults, field_delim=field_delim) raw_serving_features = {k: v for k, v in zip(raw_keys, parsed_tensors)} _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_serving_features)) if convert_scalars_to_vectors: transformed_features = _convert_scalars_to_vectors( transformed_features) return tf.estimator.export.ServingInputReceiver( transformed_features, {"csv_example": placeholder})
def parsing_transforming_serving_input_fn(): """Serving input_fn that applies transforms to raw data in tf.Examples.""" raw_input_fn = input_fn_utils.build_parsing_serving_input_fn( raw_serving_feature_spec, default_batch_size=None) raw_features, _, inputs = raw_input_fn() _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_features)) return input_fn_utils.InputFnOps(transformed_features, None, inputs)
def csv_serving_input_fn(): """Build the serving inputs.""" csv_row = tf.placeholder(shape=[None], dtype=tf.string) features = parse_csv(csv_row) _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(working_dir, transform_fn_io.TRANSFORM_FN_DIR), features)) return tf.estimator.export.ServingInputReceiver( transformed_features, {'csv_row': csv_row})
def replace_tensors_with_constant_values( saved_model_dir, tensor_value_mapping, serialized_tf_config): tf_config = _maybe_deserialize_tf_config( serialized_tf_config) with tf.Session(config=tf_config) as session: temp_dir = _make_unique_temp_dir(base_temp_dir) input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform( saved_model_dir, {}, tensor_value_mapping)) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, temp_dir) return temp_dir
def replace_tensors_with_constant_values(saved_model_dir, tensor_value_mapping): """Replaces specified `Tensor`s with constant values. Constants are accepted as Python values; these are automatically wrapped in `tf.constant()`. This method creates its own temp dir, and is therefore idempotent since any retry will use a different temp dir. Args: saved_model_dir: A SavedModel directory providing a transform graph. The MetaGraphDef and signature are selected from the SavedModel using keys defined in `../constants.py` ('transform' and 'transform_signature', respectively). tensor_value_mapping: a dict of tensor names to values to use in place of those tensors. Returns: The directory name containing the updated SavedModel. Raises: RuntimeError: if there is no default graph available to which to apply the transform. """ graph = tf.get_default_graph() if graph is None: raise RuntimeError('replace_tensors_with_constant_values() ' 'requires a default graph.') tensor_replacement_map = {} for orig_tensor_name, ( value, is_asset) in six.iteritems(tensor_value_mapping): new_tensor = tf.constant(value) if is_asset: # Any newly frozen constant tensors containing filenames must be # added to the ASSET_FILENAMES collection. graph.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, new_tensor) tensor_replacement_map[orig_tensor_name] = new_tensor with tf.Session() as session: temp_dir = _make_unique_temp_dir(self._base_temp_dir) input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform( saved_model_dir, {}, tensor_replacement_map)) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, temp_dir) return temp_dir
def parsing_transforming_serving_input_receiver_fn(): """Serving input_fn that applies transforms to raw data in tf.Examples.""" raw_input_fn = input_fn_utils.build_parsing_serving_input_fn( raw_serving_feature_spec, default_batch_size=None) raw_features, _, inputs = raw_input_fn() _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_features)) if convert_scalars_to_vectors: transformed_features = _convert_scalars_to_vectors( transformed_features) return tf.estimator.export.ServingInputReceiver( transformed_features, inputs)
def _input_fn(): # placeholders for all the raw inputs feature_placeholders = { column_name: tf.placeholder(tf.float32, [None]) for column_name in 'pickuplon,pickuplat,dropofflat,dropofflon'.split(',') } feature_placeholders['passengers'] = tf.placeholder(tf.int64, [None]) feature_placeholders['dayofweek'] = tf.placeholder(tf.string, [None]) feature_placeholders['hourofday'] = tf.placeholder(tf.int64, [None]) feature_placeholders['key'] = tf.placeholder(tf.string, [None]) # transform using the saved model in transform_fn _, features = saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, feature_placeholders ) return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
def _input_fn(): # placeholders for all the raw inputs feature_placeholders = { column_name: tf.placeholder(tf.float32, [None]) for column_name in 'pickuplon,pickuplat,dropofflat,dropofflon'.split(',') } feature_placeholders['passengers'] = tf.placeholder(tf.int64, [None]) feature_placeholders['dayofweek'] = tf.placeholder(tf.string, [None]) feature_placeholders['hourofday'] = tf.placeholder(tf.int64, [None]) feature_placeholders['key'] = tf.placeholder(tf.string, [None]) # transform using the saved model in transform_fn _, features = saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, feature_placeholders ) return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
def _make_training_input_fn(working_dir, csv_file, batch_size): dataset = (tf.data.TextLineDataset(csv_file, buffer_size=8 * 1048576)) dataset = dataset.shuffle(NUM_TRAIN_INSTANCES) dataset = dataset.apply( tf.contrib.data.map_and_batch(file_decode_csv, batch_size, num_parallel_batches=4)) dataset = dataset.prefetch(4) raw_features, raw_label = dataset.make_one_shot_iterator().get_next() _, transformed_features = saved_transform_io.partially_apply_saved_transform( os.path.join(working_dir, transform_fn_io.TRANSFORM_FN_DIR), raw_features) return transformed_features, raw_label
def load_transform_fn_def(saved_model_dir): """Loads a TransformFnDef into a graph. Similar to apply_transform_fn_def except it loads input placeholders and returns a column to tensor mapping for inputs. Args: saved_model_dir: The location of the SavedModel. Returns: A pair of dicts, for inputs and outputs, whose keys are column names and whose values are `Tensor`s or `SparseTensor`s representing these columns. """ with tf.Session(): return saved_transform_io.partially_apply_saved_transform( saved_model_dir, {})
def _serving_fn(): raw_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(raw_placeholder_spec) raw_features, recevier_tensors, _ = raw_input_fn() # apply tranform_fn on raw features _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(params.TRANSFORM_ARTIFACTS_DIR, transform_fn_io.TRANSFORM_FN_DIR), raw_features) ) # apply the process_features function to transformed features transformed_features = input.process_features(transformed_features) return tf.estimator.export.ServingInputReceiver( transformed_features, raw_features)
def _eval_input_receiver_fn(transform_output, schema): """Build everything needed for the tf-model-analysis to run the model. Args: transform_output: directory in which the tf-transform model was written during the preprocessing step. schema: the schema of the input data. Returns: EvalInputReceiver function, which contains: - Tensorflow graph which parses raw untransformed features, applies the tf-transform preprocessing operators. - Set of raw, untransformed features. - Label against which predictions will be compared. """ # Notice that the inputs are raw features, not transformed features here. raw_feature_spec = _get_raw_feature_spec(schema) serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') # Add a parse_example operator to the tensorflow graph, which will parse # raw, untransformed, tf examples. features = tf.parse_example(serialized_tf_example, raw_feature_spec) # Now that we have our raw examples, process them through the tf-transform # function computed during the preprocessing step. _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(transform_output, transform_fn_io.TRANSFORM_FN_DIR), features)) # The key name MUST be 'examples'. receiver_tensors = {'examples': serialized_tf_example} # NOTE: Model is driven by transformed features (since training works on the # materialized output of TFT, but slicing will happen on raw features. features.update(transformed_features) return tfma.export.EvalInputReceiver( features=features, receiver_tensors=receiver_tensors, labels=transformed_features[_transformed_name("training_masks"), _transformed_name("geo_maps"), _transformed_name("score_maps")])
def serving_input_fn(): input_features = {} for feature_name in input_feature_spec: if feature_name in labels: continue dtype = input_feature_spec[feature_name].dtype input_features[feature_name] = tf.placeholder(dtype, shape=[None], name=feature_name) if transform_fn_dir: _, inputs = saved_transform_io.partially_apply_saved_transform( transform_fn_dir, input_features) else: inputs = input_features return tf.estimator.export.ServingInputReceiver(inputs, input_features)
def transform_raw_features(self, raw_features): """Takes a dict of tensors representing raw features and transforms them. Takes a dictionary of `Tensor`s or `SparseTensor`s that represent the raw features, and applies the transformation defined by tf.Transform. Args: raw_features: A dict whose keys are feature names and values are `Tensor`s or `SparseTensor`s. Returns: A dict whose keys are feature names and values are `Tensor`s or `SparseTensor`s representing transformed features. """ _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( self.transform_savedmodel_dir, raw_features)) return transformed_features
def _serving_input_fn(): """Applies transforms to raw data in json-example strings.""" json_example_placeholder = tf.placeholder(tf.string, shape=[None]) example_strings = tf.decode_json_example(json_example_placeholder) raw_features = tf.parse_example(example_strings, raw_serving_feature_spec) inputs = {"json_example": json_example_placeholder} _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_features)) if convert_scalars_to_vectors: transformed_features = _convert_scalars_to_vectors( transformed_features) return input_fn_utils.InputFnOps(transformed_features, None, inputs)
def serving_input_fn(): """Input function for serving.""" # Get raw features by generating the basic serving input_fn and calling it. # Here we generate an input_fn that expects a parsed Example proto to be fed # to the model at serving time. See also # input_fn_utils.build_default_serving_input_fn. raw_input_fn = input_fn_utils.build_parsing_serving_input_fn( raw_feature_spec) raw_features, _, default_inputs = raw_input_fn() # Apply the transform function that was used to generate the materialized # data. _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(working_dir, transform_fn_io.TRANSFORM_FN_DIR), raw_features)) return input_fn_utils.InputFnOps(transformed_features, None, default_inputs)
def _eval_input_receiver_fn(transform_output, schema): """Build everything needed for the tf-model-analysis to run the model. Args: transform_output: directory in which the tf-transform model was written during the preprocessing step. schema: the schema of the input data. Returns: EvalInputReceiver function, which contains: - Tensorflow graph which parses raw untransformed features, applies the tf-transform preprocessing operators. - Set of raw, untransformed features. - Label against which predictions will be compared. """ # Notice that the inputs are raw features, not transformed features here. raw_feature_spec = _get_raw_feature_spec(schema) serialized_tf_example = tf.placeholder( dtype=tf.string, shape=[None], name='input_example_tensor') # Add a parse_example operator to the tensorflow graph, which will parse # raw, untransformed, tf examples. features = tf.parse_example(serialized_tf_example, raw_feature_spec) # Now that we have our raw examples, process them through the tf-transform # function computed during the preprocessing step. _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(transform_output, transform_fn_io.TRANSFORM_FN_DIR), features)) # The key name MUST be 'examples'. receiver_tensors = {'examples': serialized_tf_example} # NOTE: Model is driven by transformed features (since training works on the # materialized output of TFT, but slicing will happen on raw features. features.update(transformed_features) return tfma.export.EvalInputReceiver( features=features, receiver_tensors=receiver_tensors, labels=transformed_features[_transformed_name(_LABEL_KEY)])
def default_transforming_serving_input_fn(): """Serving input_fn that applies transforms to raw data in Tensors.""" raw_serving_features = { k: v for k, v in raw_metadata.schema.as_batched_placeholders().items() if k in raw_feature_keys } sparse_serving_features = [ t for t in raw_serving_features if isinstance(t, tf.SparseTensor) ] if sparse_serving_features: raise ValueError( "Feeding sparse tensors directly at serving time is not " "supported.") _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_serving_features)) return input_fn_utils.InputFnOps(transformed_features, None, raw_serving_features)
def raw_training_input_fn(): """Training input function that reads raw data and applies transforms.""" if key_feature_name is not None: keys, raw_data = tf.contrib.learn.io.read_keyed_batch_features( raw_data_file_pattern, training_batch_size, raw_feature_spec, reader, **read_batch_features_args) else: raw_data = tf.contrib.learn.io.read_batch_features( raw_data_file_pattern, training_batch_size, raw_feature_spec, reader, **read_batch_features_args) _, transformed_data = saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, raw_data) transformed_features = { k: v for k, v in six.iteritems(transformed_data) if k in transformed_feature_keys } transformed_labels = { k: v for k, v in six.iteritems(transformed_data) if k in transformed_label_keys } if convert_scalars_to_vectors: transformed_features = _convert_scalars_to_vectors( transformed_features) transformed_labels = _convert_scalars_to_vectors( transformed_labels) if key_feature_name is not None: transformed_features[key_feature_name] = keys if not transformed_labels: transformed_labels = None elif len(transformed_labels) == 1: (_, transformed_labels), = transformed_labels.items() return transformed_features, transformed_labels
def eval_input_receiver_fn(tf_transform_dir): """Build everything needed for the tf-model-analysis to run the model. Args: tf_transform_dir: directory in which the tf-transform model was written during the preprocessing step. Returns: EvalInputReceiver function, which contains: - Tensorflow graph which parses raw untranformed features, applies the tf-transform preprocessing operators. - Set of raw, untransformed features. - Label against which predictions will be compared. """ # Notice that the inputs are raw features, not transformed features here. raw_feature_spec = taxi.get_raw_feature_spec() serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') # Add a parse_example operator to the tensorflow graph, which will parse # raw, untransformed, tf examples. features = tf.parse_example(serialized_tf_example, raw_feature_spec) # Now that we have our raw examples, process them through the tf-transform # function computed during the preprocessing step. _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(tf_transform_dir, transform_fn_io.TRANSFORM_FN_DIR), features)) # The key name MUST be 'examples'. receiver_tensors = {'examples': serialized_tf_example} return tfma.export.EvalInputReceiver( features=transformed_features, receiver_tensors=receiver_tensors, labels=transformed_features[taxi.LABEL_KEY])
def analysis_input_fn(): # Get the raw feature spec for analysis raw_feature_spec = data_formatter.RAW_DATA_METADATA.schema.as_feature_spec() serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None]) # A tf.parse_example operator will parse raw input files according to the analysis # spec `raw_feature_spec`. features = tf.parse_example(serialized_tf_example, raw_feature_spec) # Now that we have our raw examples, process them through the tf-transform # function computed during the preprocessing step. _, transformed_features = (saved_transform_io.partially_apply_saved_transform( os.path.join(tf_transform_dir, transform_fn_io.TRANSFORM_FN_DIR), features)) # Remove target keys from feature list # todo: not sure how to filter Target keys : enabled_target_keys? # [transformed_features.pop(key) for key in data_formatter.TARGET_KEYS] # Restriction by tfma: key ust be `SignatureKeys.EXAMPLES` receiver_tensors = {SignatureKeys.INPUT: serialized_tf_example} return tf.estimator.export.ServingInputReceiver(transformed_features, receiver_tensors)
def _input_fn(): feature_placeholders = { column_name: tf.placeholder(tf.string, [None]) for column_name in STRING_COLS } feature_placeholders.update({ column_name: tf.placeholder(tf.float32, [None]) for column_name in NUMERIC_COLS }) feature_placeholders.pop(LABEL_COL) _, features = saved_transform_io.partially_apply_saved_transform( transform_savedmodel_dir, feature_placeholders) # so that outputs are consistently in lists if len(PASSTHROUGH_COLS) > 0: for col in PASSTHROUGH_COLS: features[col] = tf.expand_dims(tf.identity( feature_placeholders[col]), axis=1) return tf.estimator.export.ServingInputReceiver( features, feature_placeholders)
def input_fn(): """Serving input function that reads raw data and applies transforms.""" raw_placeholder_spec = RAW_DATA_METADATA.schema.as_batched_placeholders( ) # remove label key that is not going to be available at seving raw_placeholder_spec.pop(LABEL_KEY) # we are defining the feature_column (raw_featutes) and the tensor # (receiver_tensors) for the raw data raw_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( raw_placeholder_spec) raw_features, receiver_tensors, _ = raw_input_fn() # we are tranforming the raw_features with the graph written by # preprocess.py to transform_fn_io.TRANSFORM_FN_DIR and that was used to # write the tf records. This helps avoiding training/serving skew _, transformed_features = ( saved_transform_io.partially_apply_saved_transform( os.path.join(tft_working_dir, transform_fn_io.TRANSFORM_FN_DIR), raw_features)) return tf.estimator.export.ServingInputReceiver( transformed_features, receiver_tensors)