def test_init_noop_preprocessor_raises(self, spec_or_tensors): spec_or_tensors_fn = lambda _: spec_or_tensors with self.assertRaises(ValueError): noop_preprocessor.NoOpPreprocessor(spec_or_tensors_fn, mock_labels_fn) with self.assertRaises(ValueError): noop_preprocessor.NoOpPreprocessor(mock_features_fn, spec_or_tensors_fn)
def test_remove_optional(self): preprocessor = noop_preprocessor.NoOpPreprocessor( model_feature_specification_fn=lambda mode: _FEATURE_SPEC_NO_CAST, model_label_specification_fn=lambda mode: _LABEL_SPEC_NO_CAST) tpu_preprocessor = tpu_preprocessor_wrapper.TPUPreprocessorWrapper( preprocessor=preprocessor) self.assertDictEqual( tpu_preprocessor.get_in_feature_specification(_MODE_TRAIN), preprocessor.get_in_feature_specification(_MODE_TRAIN)) self.assertDictEqual( tpu_preprocessor.get_in_label_specification(_MODE_TRAIN), preprocessor.get_in_label_specification(_MODE_TRAIN)) out_feature_spec = tensorspec_utils.replace_dtype( preprocessor.get_out_feature_specification(_MODE_TRAIN), from_dtype=tf.float32, to_dtype=tf.bfloat16) del out_feature_spec['optional_value'] self.assertDictEqual( tpu_preprocessor.get_out_feature_specification(_MODE_TRAIN), out_feature_spec) out_label_spec = tensorspec_utils.replace_dtype( preprocessor.get_out_label_specification(_MODE_TRAIN), from_dtype=tf.float32, to_dtype=tf.bfloat16) del out_label_spec['optional_value'] self.assertDictEqual( tpu_preprocessor.get_out_label_specification(_MODE_TRAIN), out_label_spec)
def test_cast_bfloat16_success(self): preprocessor = noop_preprocessor.NoOpPreprocessor( model_feature_specification_fn=lambda mode: _FEATURE_SPEC_CAST, model_label_specification_fn=lambda mode: _LABEL_SPEC_CAST) tpu_preprocessor = tpu_preprocessor_wrapper.TPUPreprocessorWrapper( preprocessor=preprocessor) # The spec structure elements with bfloat16 are converted to float32 within # the TPUPreprocessorWrapper such that we can create proper parser and # do CPU preprocessing. feature_spec = preprocessor.get_in_feature_specification(_MODE_TRAIN) feature_spec.data_bfloat16 = tensorspec_utils.ExtendedTensorSpec.from_spec( spec=feature_spec.data_bfloat16, dtype=tf.float32) label_spec = preprocessor.get_in_label_specification(_MODE_TRAIN) label_spec.optional_value = tensorspec_utils.ExtendedTensorSpec.from_spec( spec=label_spec.optional_value, dtype=tf.float32) self.assertDictEqual( tpu_preprocessor.get_in_feature_specification(_MODE_TRAIN), feature_spec) self.assertDictEqual( tpu_preprocessor.get_in_label_specification(_MODE_TRAIN), label_spec) out_feature_spec = preprocessor.get_out_feature_specification( _MODE_TRAIN) del out_feature_spec['optional_value'] out_label_spec = preprocessor.get_out_label_specification(_MODE_TRAIN) del out_label_spec['optional_value'] self.assertDictEqual( tpu_preprocessor.get_out_feature_specification(_MODE_TRAIN), out_feature_spec) self.assertDictEqual( tpu_preprocessor.get_out_label_specification(_MODE_TRAIN), out_label_spec) features = tensorspec_utils.make_placeholders( tpu_preprocessor.get_in_feature_specification(_MODE_TRAIN), batch_size=2) labels = tensorspec_utils.make_placeholders( tpu_preprocessor.get_in_label_specification(_MODE_TRAIN), batch_size=2) # Make sure features and labels are transformed correctly. Basically # float32 is replaced with bfloat16 for the specs which ask for bfloat16. out_features, out_labels = tpu_preprocessor.preprocess( features=features, labels=labels, mode=_MODE_TRAIN) for ref_key, ref_value in out_features.items(): self.assertEqual(out_features[ref_key].dtype, ref_value.dtype) for ref_key, ref_value in out_labels.items(): self.assertEqual(out_labels[ref_key].dtype, ref_value.dtype) # Make sure features without labels are transformed correctly. Basically # float32 is replaced with bfloat16 for the specs which ask for bfloat16. out_features, out_labels = tpu_preprocessor.preprocess( features=features, labels=None, mode=_MODE_TRAIN) self.assertIsNone(out_labels) for ref_key, ref_value in out_features.items(): self.assertEqual(out_features[ref_key].dtype, ref_value.dtype)
def test_set_preprocess_fn(self): mock_input_generator = mocks.MockInputGenerator(batch_size=BATCH_SIZE) preprocessor = noop_preprocessor.NoOpPreprocessor() with self.assertRaises(ValueError): # This should raise since we pass a function with `mode` not already # filled in either by a closure or functools.partial. mock_input_generator.set_preprocess_fn(preprocessor.preprocess) preprocess_fn = functools.partial(preprocessor.preprocess, labels=None) with self.assertRaises(ValueError): # This should raise since we pass a partial function but `mode` # is not abstracted away. mock_input_generator.set_preprocess_fn(preprocess_fn)
def test_init_noop_preprocessor(self): noop_preprocessor.NoOpPreprocessor(mock_features_fn, mock_labels_fn)
def test_noop_preprocessor_preprocess_fn(self): def preprocess(preprocessor, feature_spec, label_spec, flatten=False): with tf.Session() as sess: feature_placeholders = tensorspec_utils.make_placeholders( feature_spec, batch_size=1) label_placeholders = None if label_spec is not None: label_placeholders = tensorspec_utils.make_placeholders( label_spec, batch_size=1) # Normally we want our features and labels to be flattened. # However we support not flattened hierarchies as well. if flatten: feature_placeholders = tensorspec_utils.flatten_spec_structure( feature_placeholders) if label_spec is not None: label_placeholders = tensorspec_utils.flatten_spec_structure( label_placeholders) (features_preprocessed, labels_preprocessed) = preprocessor.preprocess( features=feature_placeholders, labels=label_placeholders, mode=tf.estimator.ModeKeys.TRAIN) # We create a mapping of {key: np.array} or a namedtuple spec structure. np_feature_spec = tensorspec_utils.make_random_numpy( feature_spec, batch_size=1) if label_placeholders is not None: np_label_spec = tensorspec_utils.make_random_numpy( label_spec, batch_size=1) # We create our feed dict which basically consists of # {placeholders: np.array}. feed_dict = tensorspec_utils.map_feed_dict(feature_placeholders, np_feature_spec, ignore_batch=True) if label_placeholders is not None: feed_dict = tensorspec_utils.map_feed_dict(label_placeholders, np_label_spec, feed_dict, ignore_batch=True) fetch_results = [features_preprocessed] if label_placeholders is not None: fetch_results.append(labels_preprocessed) np_preprocessed = sess.run( fetch_results, feed_dict=feed_dict) np_features_preprocessed = np_preprocessed[0] if label_placeholders is not None: np_labels_preprocessed = np_preprocessed[1] np_feature_spec = tensorspec_utils.flatten_spec_structure( np_feature_spec) if label_placeholders is not None: np_label_spec = tensorspec_utils.flatten_spec_structure(np_label_spec) for key, value in np_feature_spec.items(): np.testing.assert_allclose(value, np_features_preprocessed[key]) if label_placeholders is not None: for key, value in np_label_spec.items(): np.testing.assert_allclose(value, np_labels_preprocessed[key]) preprocessor = noop_preprocessor.NoOpPreprocessor(mock_features_fn, mock_labels_fn) # Here we test that we can pass through our features, flattened and # unflattend. preprocess(preprocessor, mock_features, mock_labels, flatten=True) preprocess(preprocessor, mock_features, mock_labels, flatten=False) # Now we test that we can pass through the required subset. # Note, this really means the optional values are not provided and our # preprocessor does not complain. preprocess( preprocessor, mock_features_required, mock_labels, flatten=False) preprocess( preprocessor, mock_features_required, mock_labels, flatten=True) # Labels are not required. preprocess( preprocessor, mock_features_required, None, flatten=True) # Now we will make a new preprocessor with additional requirements which # should be broken since we have no features which fulfill the requirements. preprocessor = noop_preprocessor.NoOpPreprocessor(mock_features_broken_fn, mock_labels_fn) with self.assertRaises(ValueError): preprocess( preprocessor, mock_features_required, mock_labels, flatten=False) with self.assertRaises(ValueError): preprocess( preprocessor, mock_features_required, mock_labels, flatten=True)