def analyzer_fn(inputs):
     return {
         'q_b':
         tft.quantiles(tf.cast(inputs['x'], input_dtype),
                       num_buckets=3,
                       epsilon=0.00001)
     }
        def preprocessing_fn(inputs):
            x = tf.cast(inputs['x'], input_dtype)

            num_buckets = len(expected_boundaries) + 1
            if should_apply:
                if is_manual_boundaries:
                    bucket_boundaries = [
                        expected_boundaries,
                        [2 * b for b in expected_boundaries]
                    ]
                else:
                    bucket_boundaries = tft.quantiles(
                        x, num_buckets, epsilon, reduce_instance_dims=False)
                    bucket_boundaries = tf.unstack(bucket_boundaries, axis=0)

                result = []
                for i, boundaries in enumerate(bucket_boundaries):
                    boundaries = tf.cast(boundaries, tf.float32)
                    result.append(
                        tft.apply_buckets(x[:, i],
                                          tf.expand_dims(boundaries, axis=0)))
                result = tf.stack(result, axis=1)

            else:
                result = tft.bucketize(x,
                                       num_buckets=num_buckets,
                                       epsilon=epsilon,
                                       elementwise=True)
            return {'q_b': result}
Esempio n. 3
0
def preprocessing_fn(input_features):

    processed_features = {}

    for feature in raw_schema.feature:

        # Pass the target feature as is.
        if feature.name in [TARGET_FEATURE_NAME, WEIGHT_FEATURE_NAME]:
            processed_features[feature.name] = _prep(
                input_features[feature.name])
            continue

        if feature.type == 1:
            # Extract vocabulary and integerize categorical features.
            processed_features[feature.name + "_integerized"] = _prep(
                tft.compute_and_apply_vocabulary(input_features[feature.name],
                                                 vocab_filename=feature.name))
        else:
            # normalize numeric features.
            processed_features[feature.name + "_scaled"] = _prep(
                tft.scale_to_z_score(input_features[feature.name]))

        # Bucketize age using quantiles.
        quantiles = tft.quantiles(input_features["age"],
                                  num_buckets=5,
                                  epsilon=0.01)
        processed_features["age_bucketized"] = _prep(
            tft.apply_buckets(input_features["age"],
                              bucket_boundaries=quantiles))

    return processed_features
Esempio n. 4
0
def preprocess(input_features):

    output_features = {}

    output_features[metadata.TARGET_FEATURE_NAME] = input_features[metadata.TARGET_FEATURE_NAME]

    for feature_name in metadata.NUMERIC_FEATURE_NAMES:

        #output_features[feature_name+"_scaled"] = tft.scale_to_z_score(input_features[feature_name])
        output_features[feature_name] = tft.scale_to_z_score(input_features[feature_name])

        quantiles = tft.quantiles(input_features[feature_name], num_buckets=NUM_BUCKETS, epsilon=0.01)
        output_features[feature_name+"_bucketized"] = tft.apply_buckets(input_features[feature_name],
                                                                        bucket_boundaries=quantiles)

    for feature_name in metadata.CATEGORICAL_FEATURE_NAMES:

        tft.uniques(input_features[feature_name], vocab_filename=feature_name)
        output_features[feature_name] = input_features[feature_name]

        # sba added this
        #output_features[feature_name+"_integerized"] = tft.string_to_int(input_features[feature_name],
                                                           #vocab_filename=feature_name)
    for feature_name in metadata.VOCAB_FEATURE_NAMES:

        output_features[feature_name +"_integerized"] = tft.string_to_int(input_features[feature_name],top_k=metadata.VOCAB_SIZE, num_oov_buckets=metadata.OOV_SIZE, vocab_filename=feature_name)
                                                           


    return output_features
 def preprocessing_fn(inputs):
     x = tf.cast(inputs['x'], input_dtype)
     quantiles = tft.quantiles(x,
                               num_buckets,
                               epsilon=0.0001,
                               always_return_num_quantiles=False)
     quantiles.set_shape([1, num_expected_buckets - 1])
     return {'q_b': quantiles}
 def analyzer_fn(inputs):
     return {
         'q_b':
         tft.quantiles(tf.cast(inputs['x'], input_dtype),
                       num_buckets=3,
                       epsilon=0.00001,
                       weights=inputs['weights'],
                       reduce_instance_dims=False)
     }
 def analyzer_fn(inputs):
     return {
         'q_b':
         tft.quantiles(tf.cast(inputs['x'], input_dtype),
                       num_buckets=3,
                       epsilon=0.00001,
                       weights=inputs['weights'],
                       always_return_num_quantiles=True)
     }
 def preprocessing_fn(inputs):
   x = tf.cast(inputs['x'], input_dtype)
   num_buckets = len(expected_boundaries) + 1
   if should_apply:
     if is_manual_boundaries:
       bucket_boundaries = [expected_boundaries]
     else:
       bucket_boundaries = tft.quantiles(inputs['x'], num_buckets, epsilon)
     result = tft.apply_buckets(x, bucket_boundaries)
   else:
     result = tft.bucketize(x, num_buckets=num_buckets, epsilon=epsilon)
   return {'q_b': result}