def _preprocessing_fn_with_two_phases(inputs): x = inputs['x'] x_mean = tft.mean(x, name='x') x_square_deviations = tf.square(x - x_mean) x_var = tft.mean(x_square_deviations, name='x_square_deviations') x_normalized = (x - x_mean) / tf.sqrt(x_var) return {'x_normalized': x_normalized}
def _preprocessing_fn_with_packable_analyzer_single_phase(inputs): x, y = inputs['x'], inputs['y'] x_mean = tft.mean(x, name='x') x_centered = x - x_mean y_mean = tft.mean(y, name='y') y_centered = y - y_mean return {'x_centered': x_centered, 'y_centered': y_centered}
def _preprocessing_fn_for_common_optimize_traversal(inputs): _ = tft.vocabulary(inputs['s']) x = inputs['x'] x_mean = tft.mean(x, name='x') x_square_deviations = tf.square(x - x_mean) x_var = tft.mean(x_square_deviations, name='x_square_deviations') x_normalized = (x - x_mean) / tf.sqrt(x_var) return {'x_normalized': x_normalized}
def _preprocessing_fn_with_packable_analyzer_two_phases(inputs): x, y = inputs['x'], inputs['y'] x_mean = tft.mean(x, name='x') x_square_deviations = tf.square(x - x_mean) x_var = tft.mean(x_square_deviations, name='x_square_deviations') x_normalized = (x - x_mean) / tf.sqrt(x_var) y_mean = tft.mean(y, name='y') y_square_deviations = tf.square(y - y_mean) y_var = tft.mean(y_square_deviations, name='y_square_deviations') y_normalized = (y - y_mean) / tf.sqrt(y_var) return {'x_normalized': x_normalized, 'y_normalized': y_normalized}
def preprocessing_fn(inputs): _ = tft.vocabulary(inputs['s']) _ = tft.bucketize(inputs['x'], 2, name='bucketize') return { 'x_min': tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'x_mean': tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'y_min': tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 'y_mean': tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']), }
def __call__(self, pred, target, weight=None): pred_left = pred[:, 0] pred_top = pred[:, 1] pred_right = pred[:, 2] pred_bottom = pred[:, 3] target_left = target[:, 0] target_top = target[:, 1] target_right = target[:, 2] target_bottom = target[:, 3] target_area = (target_left + target_right) * \ (target_top + target_bottom) pred_area = (pred_left + pred_right) * (pred_top + pred_bottom) w_intersect = tf.minimum(pred_left, target_left) + \ tf.minimum(pred_right, target_right) h_intersect = tf.minimum(pred_bottom, target_bottom) + tf.minimum( pred_top, target_top) area_intersect = w_intersect * h_intersect area_union = target_area + pred_area - area_intersect loss = -tf.log((area_intersect + 1.0) / (area_union + 1.0)) if weight is not None and tft.sum(weight) > 0: return tft.sum(loss * weight) / tft.sum(weight) else: assert tf.size(loss) != 0 return tft.mean(loss)
def preprocessing_fn(inputs): no2 = inputs["no2"] pm10 = inputs["pm10"] so2 = inputs["so2"] soot = inputs["soot"] no2_normalized = no2 - tft.mean(no2) so2_normalized = so2 - tft.mean(so2) pm10_normalized = tft.scale_to_0_1(pm10) soot_normalized = tft.scale_by_min_max(soot) return { "no2_normalized": no2_normalized, "so2_normalized": so2_normalized, "pm10_normalized": pm10_normalized, "sott_normalized": soot_normalized }
def preprocess(inputs): # inputs is a batch of input features median_age = inputs["housing_median_age"] ocean_proximity = inputs["ocean_proximity"] standardized_age = tft.scale_to_z_score(median_age - tft.mean(median_age)) ocean_proximity_id = tft.compute_and_apply_vocabulary(ocean_proximity) return { "standardized_median_age": standardized_age, "ocean_proximity_id": ocean_proximity_id }
def _preprocessing_fn_with_one_analyzer(inputs): @tf.function def _plus_one(x): return x + 1 x = _plus_one(inputs['x']) x_mean = tft.mean(x, name='x') x_centered = x - x_mean return {'x_centered': x_centered}
def preprocessing_fn(inputs): integerized_s = tft.compute_and_apply_vocabulary(inputs['s']) _ = tft.bucketize(inputs['x'], 2, name='bucketize') return { 'integerized_s': integerized_s, 'x_min': tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'x_mean': tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'y_min': tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 'y_mean': tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']), }
def _preprocessing_fn_with_packable_analyzer_single_phase(inputs): x, y = inputs['x'], inputs['y'] x_mean = tft.mean(x, name='x') x_centered = x - x_mean y_mean = tft.mean(y, name='y') y_centered = y - y_mean z = inputs['z'] z_vocab = tft.vocabulary(z, name='z') initializer = tf.lookup.TextFileInitializer( z_vocab, key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE, value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER) table = tf.lookup.StaticHashTable(initializer, default_value=-1) z_integerized = table.lookup(z) return {'x_centered': x_centered, 'y_centered': y_centered, 'z_integerized': z_integerized}
def preprocessing_fn(inputs): # Define each column manually no2 = inputs['no2'] pm10 = inputs['pm10'] so2 = inputs['so2'] soot = inputs['soot'] # Normalize columns in preprocessing no2_normalized = no2 - tft.mean(no2) so2_normalized = so2 - tft.mean(so2) pm10_normalized = tft.scale_to_0_1(pm10) soot_normalized = tft.scale_by_min_max(soot) # Return the normalized columns in a dictionary return { "no2_normalized": no2_normalized, "so2_normalized": so2_normalized, "pm10_normalized": pm10_normalized, "soot_normalized": soot_normalized }
def preprocessing_fn(inputs): _ = tft.vocabulary(inputs['s'], vocab_filename='vocab1') _ = tft.bucketize(inputs['x'], 2, name='bucketize') return { 'x_min': tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'x_mean': tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'y_min': tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 'y_mean': tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 's_integerized': tft.compute_and_apply_vocabulary( inputs['s'], labels=inputs['label'], use_adjusted_mutual_info=True), }
def preprocessing_fn(inputs): def repeat(in_tensor, value): batch_size = tf.shape(in_tensor)[0] return tf.ones([batch_size], value.dtype) * value return { 'min': tft.map(repeat, inputs['a'], tft.min(inputs['a'])), 'max': tft.map(repeat, inputs['a'], tft.max(inputs['a'])), 'sum': tft.map(repeat, inputs['a'], tft.sum(inputs['a'])), 'size': tft.map(repeat, inputs['a'], tft.size(inputs['a'])), 'mean': tft.map(repeat, inputs['a'], tft.mean(inputs['a'])) }
def preprocessing_fn(inputs): x = inputs['x'] y = inputs['y'] s = inputs['s'] x_centered = x - tft.mean(x) y_normalized = tft.scale_to_0_1(y) s_integerized = tft.compute_and_apply_vocabulary(s) x_centered_times_y_normalized = x_centered * y_normalized return { 'x_centered': x_centered, 'y_normalized': y_normalized, 'x_centered_times_y_normalized': x_centered_times_y_normalized, 's_integerized': s_integerized }
def preprocessing_fn(inputs): """Preprocess input columns into transformed columns.""" x = inputs['x'] y = inputs['y'] s = inputs['s'] x_centered = x - tft.mean(x) y_normalized = tft.scale_to_0_1(y) s_integerized = tft.string_to_int(s) x_centered_times_y_normalized = (x_centered * y_normalized) return { 'x_centered': x_centered, 'y_normalized': y_normalized, 'x_centered_times_y_normalized': x_centered_times_y_normalized, 's_integerized': s_integerized }
def preprocessing_fn(inputs): """Preprocess input columns into transformed columns.""" x = inputs['x'] y = inputs['y'] s = inputs['s'] x_centered = tft.map(lambda x, mean: x - mean, x, tft.mean(x)) y_normalized = tft.scale_to_0_1(y) s_integerized = tft.string_to_int(s) x_centered_times_y_normalized = tft.map(lambda x, y: x * y, x_centered, y_normalized) return { 'x_centered': x_centered, 'y_normalized': y_normalized, 'x_centered_times_y_normalized': x_centered_times_y_normalized, 's_integerized': s_integerized }
def preprocessing_fn(inputs): no2 = inputs['no2'] pm10 = inputs['pm10'] so2 = inputs['so2'] soot = inputs['soot'] no2_normalized = no2 - tftmean(no2) so2_normalized = so2 - tft.mean(so2) pm10_normalized = tft.scale_to_0_1(pm10) soot_normalized = tft.scale_by_min_max(soot) return { 'no2_normalized':no2_normalized, 'so2_normalized':so2_normalized, 'pm10_normalized':pm10__normalized, 'soot_normalized':soot_normalized }
def _preprocessing_fn_with_one_analyzer(inputs): x = inputs['x'] x_mean = tft.mean(x, name='x') x_centered = x - x_mean return {'x_centered': x_centered}
def apply(x): m = tft.mean(x) x = tf.cast(x, dtype=tf.float32) x = _impute(x, m) return x
def mean_fn(inputs): return { 'mean': tft.map(repeat, inputs['a'], tft.mean(inputs['a'])) }
def preprocessing_fn(inputs): return { 'x_centered': x - tft.mean(inputs['x']), 'y_normalized': tft.scale_to_0_1(inputs['y']), 's_integerized': tft.compute_and_apply_vocabulary(inputs['s']) }
some_columns = [ocean_proximity_embed, bucketized_income] dense_features = keras.layers.DenseFeatures(some_columns) dense_features({ "ocean_proximity": [["NEAR OCEAN"], ["INLAND"], ["INLAND"]], "median_income": [[3.], [7.2], [1.]] }) # TF Transform try: import tensorflow_transform as tft def preprocess(inputs): # inputs is a batch of input features median_age = inputs["housing_median_age"] ocean_proximity = inputs["ocean_proximity"] standardized_age = tft.scale_to_z_score(median_age - tft.mean(median_age)) ocean_proximity_id = tft.compute_and_apply_vocabulary(ocean_proximity) return { "standardized_median_age": standardized_age, "ocean_proximity_id": ocean_proximity_id } except ImportError: print("TF Transform is not installed. Try running: pip3 install -U tensorflow-transform") # TensorFlow Datasets import tensorflow_datasets as tfds datasets = tfds.load(name="mnist") mnist_train, mnist_test = datasets["train"], datasets["test"]
def center(x): return x - tft.mean(x)
def compute_mean_ignore_nan(values): finite_indices = tf.math.is_finite(values) finite_values = tf.boolean_mask(values, finite_indices) mean_value = tft.mean(finite_values) return mean_value