def analyzer_fn(inputs): return { 'q_b': tft.quantiles(tf.cast(inputs['x'], input_dtype), num_buckets=3, epsilon=0.00001) }
def preprocessing_fn(inputs): x = tf.cast(inputs['x'], input_dtype) num_buckets = len(expected_boundaries) + 1 if should_apply: if is_manual_boundaries: bucket_boundaries = [ expected_boundaries, [2 * b for b in expected_boundaries] ] else: bucket_boundaries = tft.quantiles( x, num_buckets, epsilon, reduce_instance_dims=False) bucket_boundaries = tf.unstack(bucket_boundaries, axis=0) result = [] for i, boundaries in enumerate(bucket_boundaries): boundaries = tf.cast(boundaries, tf.float32) result.append( tft.apply_buckets(x[:, i], tf.expand_dims(boundaries, axis=0))) result = tf.stack(result, axis=1) else: result = tft.bucketize(x, num_buckets=num_buckets, epsilon=epsilon, elementwise=True) return {'q_b': result}
def preprocessing_fn(input_features): processed_features = {} for feature in raw_schema.feature: # Pass the target feature as is. if feature.name in [TARGET_FEATURE_NAME, WEIGHT_FEATURE_NAME]: processed_features[feature.name] = _prep( input_features[feature.name]) continue if feature.type == 1: # Extract vocabulary and integerize categorical features. processed_features[feature.name + "_integerized"] = _prep( tft.compute_and_apply_vocabulary(input_features[feature.name], vocab_filename=feature.name)) else: # normalize numeric features. processed_features[feature.name + "_scaled"] = _prep( tft.scale_to_z_score(input_features[feature.name])) # Bucketize age using quantiles. quantiles = tft.quantiles(input_features["age"], num_buckets=5, epsilon=0.01) processed_features["age_bucketized"] = _prep( tft.apply_buckets(input_features["age"], bucket_boundaries=quantiles)) return processed_features
def preprocess(input_features): output_features = {} output_features[metadata.TARGET_FEATURE_NAME] = input_features[metadata.TARGET_FEATURE_NAME] for feature_name in metadata.NUMERIC_FEATURE_NAMES: #output_features[feature_name+"_scaled"] = tft.scale_to_z_score(input_features[feature_name]) output_features[feature_name] = tft.scale_to_z_score(input_features[feature_name]) quantiles = tft.quantiles(input_features[feature_name], num_buckets=NUM_BUCKETS, epsilon=0.01) output_features[feature_name+"_bucketized"] = tft.apply_buckets(input_features[feature_name], bucket_boundaries=quantiles) for feature_name in metadata.CATEGORICAL_FEATURE_NAMES: tft.uniques(input_features[feature_name], vocab_filename=feature_name) output_features[feature_name] = input_features[feature_name] # sba added this #output_features[feature_name+"_integerized"] = tft.string_to_int(input_features[feature_name], #vocab_filename=feature_name) for feature_name in metadata.VOCAB_FEATURE_NAMES: output_features[feature_name +"_integerized"] = tft.string_to_int(input_features[feature_name],top_k=metadata.VOCAB_SIZE, num_oov_buckets=metadata.OOV_SIZE, vocab_filename=feature_name) return output_features
def preprocessing_fn(inputs): x = tf.cast(inputs['x'], input_dtype) quantiles = tft.quantiles(x, num_buckets, epsilon=0.0001, always_return_num_quantiles=False) quantiles.set_shape([1, num_expected_buckets - 1]) return {'q_b': quantiles}
def analyzer_fn(inputs): return { 'q_b': tft.quantiles(tf.cast(inputs['x'], input_dtype), num_buckets=3, epsilon=0.00001, weights=inputs['weights'], reduce_instance_dims=False) }
def analyzer_fn(inputs): return { 'q_b': tft.quantiles(tf.cast(inputs['x'], input_dtype), num_buckets=3, epsilon=0.00001, weights=inputs['weights'], always_return_num_quantiles=True) }
def preprocessing_fn(inputs): x = tf.cast(inputs['x'], input_dtype) num_buckets = len(expected_boundaries) + 1 if should_apply: if is_manual_boundaries: bucket_boundaries = [expected_boundaries] else: bucket_boundaries = tft.quantiles(inputs['x'], num_buckets, epsilon) result = tft.apply_buckets(x, bucket_boundaries) else: result = tft.bucketize(x, num_buckets=num_buckets, epsilon=epsilon) return {'q_b': result}