Python apply_buckets Exemples, tensorflow_transform.apply_buckets Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : bucketize_integration_test.py Projet : mikechen66/transform

        def preprocessing_fn(inputs):
            x = tf.cast(inputs['x'], input_dtype)

            num_buckets = len(expected_boundaries) + 1
            if should_apply:
                if is_manual_boundaries:
                    bucket_boundaries = [
                        expected_boundaries,
                        [2 * b for b in expected_boundaries]
                    ]
                else:
                    bucket_boundaries = tft.quantiles(
                        x, num_buckets, epsilon, reduce_instance_dims=False)
                    bucket_boundaries = tf.unstack(bucket_boundaries, axis=0)

                result = []
                for i, boundaries in enumerate(bucket_boundaries):
                    boundaries = tf.cast(boundaries, tf.float32)
                    result.append(
                        tft.apply_buckets(x[:, i],
                                          tf.expand_dims(boundaries, axis=0)))
                result = tf.stack(result, axis=1)

            else:
                result = tft.bucketize(x,
                                       num_buckets=num_buckets,
                                       epsilon=epsilon,
                                       elementwise=True)
            return {'q_b': result}

Exemple #2

0

Afficher le fichier

def preprocess(input_features):

    output_features = {}

    output_features[metadata.TARGET_FEATURE_NAME] = input_features[metadata.TARGET_FEATURE_NAME]

    for feature_name in metadata.NUMERIC_FEATURE_NAMES:

        #output_features[feature_name+"_scaled"] = tft.scale_to_z_score(input_features[feature_name])
        output_features[feature_name] = tft.scale_to_z_score(input_features[feature_name])

        quantiles = tft.quantiles(input_features[feature_name], num_buckets=NUM_BUCKETS, epsilon=0.01)
        output_features[feature_name+"_bucketized"] = tft.apply_buckets(input_features[feature_name],
                                                                        bucket_boundaries=quantiles)

    for feature_name in metadata.CATEGORICAL_FEATURE_NAMES:

        tft.uniques(input_features[feature_name], vocab_filename=feature_name)
        output_features[feature_name] = input_features[feature_name]

        # sba added this
        #output_features[feature_name+"_integerized"] = tft.string_to_int(input_features[feature_name],
                                                           #vocab_filename=feature_name)
    for feature_name in metadata.VOCAB_FEATURE_NAMES:

        output_features[feature_name +"_integerized"] = tft.string_to_int(input_features[feature_name],top_k=metadata.VOCAB_SIZE, num_oov_buckets=metadata.OOV_SIZE, vocab_filename=feature_name)
                                                           


    return output_features

Exemple #3

0

Afficher le fichier

Fichier : transform.py Projet : ksalama/tfx2-workshop

def preprocessing_fn(input_features):

    processed_features = {}

    for feature in raw_schema.feature:

        # Pass the target feature as is.
        if feature.name in [TARGET_FEATURE_NAME, WEIGHT_FEATURE_NAME]:
            processed_features[feature.name] = _prep(
                input_features[feature.name])
            continue

        if feature.type == 1:
            # Extract vocabulary and integerize categorical features.
            processed_features[feature.name + "_integerized"] = _prep(
                tft.compute_and_apply_vocabulary(input_features[feature.name],
                                                 vocab_filename=feature.name))
        else:
            # normalize numeric features.
            processed_features[feature.name + "_scaled"] = _prep(
                tft.scale_to_z_score(input_features[feature.name]))

        # Bucketize age using quantiles.
        quantiles = tft.quantiles(input_features["age"],
                                  num_buckets=5,
                                  epsilon=0.01)
        processed_features["age_bucketized"] = _prep(
            tft.apply_buckets(input_features["age"],
                              bucket_boundaries=quantiles))

    return processed_features

Exemple #4

0

Afficher le fichier

Fichier : bucketize_integration_test.py Projet : isabella232/transform

 def preprocessing_fn(inputs):
   x = tf.cast(inputs['x'], input_dtype)
   num_buckets = len(expected_boundaries) + 1
   if should_apply:
     if is_manual_boundaries:
       bucket_boundaries = [expected_boundaries]
     else:
       bucket_boundaries = tft.quantiles(inputs['x'], num_buckets, epsilon)
     result = tft.apply_buckets(x, bucket_boundaries)
   else:
     result = tft.bucketize(x, num_buckets=num_buckets, epsilon=epsilon)
   return {'q_b': result}

Exemple #5

0

Afficher le fichier

Fichier : preprocessing.py Projet : MichalGasiorowski/tfx-titanic-training

def preprocessing_fn(inputs):
    """Preprocesses Titanic Dataset."""

    outputs = {}

    # Scale numerical features
    for key in features.NUMERIC_FEATURE_KEYS:
        mean_value = compute_mean_ignore_nan(inputs[key].values)
        absl.logging.info(f'TFT preprocessing. Mean value for {key} = {mean_value}')
        outputs[features.transformed_name(key)] = tft.scale_to_z_score(
            _fill_in_missing_with_impute(inputs[key], mean_value))

    for key in features.VOCAB_FEATURE_KEYS:
        # Build a vocabulary for this feature.
        outputs[features.transformed_name(key)] = tft.compute_and_apply_vocabulary(
            _fill_in_missing(inputs[key]),
            top_k=features.VOCAB_SIZE_MAP.get(key, features.VOCAB_SIZE),
            num_oov_buckets=features.OOV_SIZE)

    for key in features.BUCKET_FEATURE_KEYS:
        if key in features.FEATURE_BUCKET_BOUNDARIES:
            bucket_boundaries = tf.constant(features.FEATURE_BUCKET_BOUNDARIES.get(key))
            # tf.print("bucket_boundaries:", bucket_boundaries, output_stream=absl.logging.info)
            outputs[features.transformed_name(key)] = tft.apply_buckets(_fill_in_missing(inputs[key]),
                                                                        bucket_boundaries)
        else:
            outputs[features.transformed_name(key)] = tft.bucketize(
                _fill_in_missing(inputs[key]),
                features.FEATURE_BUCKET_COUNT_MAP.get(key, features.FEATURE_BUCKET_COUNT))

    # Generate vocabularies and maps categorical features
    for key in features.CATEGORICAL_FEATURE_KEYS:
        outputs[features.transformed_name(key)] = tft.compute_and_apply_vocabulary(
            x=_fill_in_missing(inputs[key]), num_oov_buckets=1, vocab_filename=key)

    # Convert Cover_Type to dense tensor
    outputs[features.transformed_name(features.LABEL_KEY)] = _fill_in_missing(
        inputs[features.LABEL_KEY])

    return outputs