Esempi in Python per identify_type

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: reagent.preprocessing.identify_types

Metodo/funzione: identify_type

Esempi su hotexamples.com: 2

identify_type in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per reagent.preprocessing.identify_types.identify_type, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: test_type_identification.py Progetto: zwcdp/ReAgent

    def test_identification(self):
        feature_value_map = read_data()

        types = {}
        for name, values in feature_value_map.items():
            types[name] = identify_types.identify_type(values)

        # Examples through manual inspection
        self.assertEqual(types[BINARY_FEATURE_ID], identify_types.BINARY)
        self.assertEqual(types[CONTINUOUS_FEATURE_ID], identify_types.CONTINUOUS)

        # We don't yet know the boxcox type
        self.assertEqual(types[BOXCOX_FEATURE_ID], identify_types.CONTINUOUS)

        # We don't yet know the quantile type
        self.assertEqual(types[QUANTILE_FEATURE_ID], identify_types.CONTINUOUS)
        self.assertEqual(types[ENUM_FEATURE_ID], identify_types.ENUM)
        self.assertEqual(types[PROBABILITY_FEATURE_ID], identify_types.PROBABILITY)

Esempio n. 2

Mostra file

File: normalization.py Progetto: zachkeer/ReAgent

def identify_parameter(
    feature_name,
    values,
    max_unique_enum_values=DEFAULT_MAX_UNIQUE_ENUM,
    quantile_size=DEFAULT_MAX_QUANTILE_SIZE,
    quantile_k2_threshold=DEFAULT_QUANTILE_K2_THRESHOLD,
    skip_box_cox=False,
    skip_quantiles=False,
    feature_type=None,
):
    if feature_type is None:
        feature_type = identify_types.identify_type(values, max_unique_enum_values)

    boxcox_lambda = None
    boxcox_shift = 0.0
    mean = 0.0
    stddev = 1.0
    possible_values = None
    quantiles = None
    assert feature_type in [
        identify_types.CONTINUOUS,
        identify_types.PROBABILITY,
        identify_types.BINARY,
        identify_types.ENUM,
        identify_types.CONTINUOUS_ACTION,
        identify_types.DO_NOT_PREPROCESS,
    ], "unknown type {}".format(feature_type)
    assert (
        len(values) >= MINIMUM_SAMPLES_TO_IDENTIFY
    ), "insufficient information to identify parameter"

    min_value = float(np.min(values))
    max_value = float(np.max(values))

    if feature_type == identify_types.DO_NOT_PREPROCESS:
        mean = float(np.mean(values))
        values = values - mean
        stddev = max(float(np.std(values, ddof=1)), 1.0)
    if feature_type == identify_types.CONTINUOUS:
        if min_value == max_value:
            return no_op_feature()
        k2_original, p_original = stats.normaltest(values)

        # shift can be estimated but not in scipy
        boxcox_shift = float(min_value * -1)
        candidate_values, lambda_ = stats.boxcox(
            np.maximum(values + boxcox_shift, BOX_COX_MARGIN)
        )
        k2_boxcox, p_boxcox = stats.normaltest(candidate_values)
        logger.info(
            "Feature stats.  Original K2: {} P: {} Boxcox K2: {} P: {}".format(
                k2_original, p_original, k2_boxcox, p_boxcox
            )
        )
        if lambda_ < 0.9 or lambda_ > 1.1:
            # Lambda is far enough from 1.0 to be worth doing boxcox
            if k2_original > k2_boxcox * 10 and k2_boxcox <= quantile_k2_threshold:
                # The boxcox output is significantly more normally distributed
                # than the original data and is normal enough to apply
                # effectively.

                stddev = float(np.std(candidate_values, ddof=1))
                # Unclear whether this happens in practice or not
                if (
                    np.isfinite(stddev)
                    and stddev < BOX_COX_MAX_STDDEV
                    and not np.isclose(stddev, 0)
                ):
                    values = candidate_values
                    boxcox_lambda = float(lambda_)
        if boxcox_lambda is None or skip_box_cox:
            boxcox_shift = None
            boxcox_lambda = None
        if boxcox_lambda is not None:
            feature_type = identify_types.BOXCOX
        if (
            boxcox_lambda is None
            and k2_original > quantile_k2_threshold
            and (not skip_quantiles)
        ):
            feature_type = identify_types.QUANTILE
            quantiles = (
                np.unique(
                    mquantiles(
                        values,
                        np.arange(quantile_size + 1, dtype=np.float64)
                        / float(quantile_size),
                        alphap=0.0,
                        betap=1.0,
                    )
                )
                .astype(float)
                .tolist()
            )
            logger.info("Feature is non-normal, using quantiles: {}".format(quantiles))

    if (
        feature_type == identify_types.CONTINUOUS
        or feature_type == identify_types.BOXCOX
        or feature_type == identify_types.CONTINUOUS_ACTION
    ):
        mean = float(np.mean(values))
        values = values - mean
        stddev = max(float(np.std(values, ddof=1)), 1.0)
        if not np.isfinite(stddev):
            logger.info("Std. dev not finite for feature {}".format(feature_name))
            return None
        values /= stddev

    if feature_type == identify_types.ENUM:
        possible_values = np.unique(values.astype(int)).astype(int).tolist()

    return NormalizationParameters(
        feature_type,
        boxcox_lambda,
        boxcox_shift,
        mean,
        stddev,
        possible_values,
        quantiles,
        min_value,
        max_value,
    )