예제 #1
0
def describe_1d(series: pd.Series) -> dict:
    """Describe a series (infer the variable type, then calculate type-specific values).

    Args:
        series: The Series to describe.

    Returns:
        A Series containing calculated series description values.
    """

    # Replace infinite values with NaNs to avoid issues with histograms later.
    series.replace(to_replace=[np.inf, np.NINF, np.PINF],
                   value=np.nan,
                   inplace=True)

    # Infer variable types
    series_description = base.get_var_type(series)

    # Run type specific analysis
    if series_description["type"] == Variable.S_TYPE_UNSUPPORTED:
        series_description.update(
            describe_unsupported(series, series_description))
    else:
        series_description.update(
            describe_supported(series, series_description))

        type_to_func = {
            Variable.S_TYPE_CONST: describe_constant_1d,
            Variable.TYPE_BOOL: describe_boolean_1d,
            Variable.TYPE_NUM: describe_numeric_1d,
            Variable.TYPE_DATE: describe_date_1d,
            Variable.S_TYPE_UNIQUE: describe_unique_1d,
            Variable.TYPE_CAT: describe_categorical_1d,
            Variable.TYPE_URL: describe_url_1d,
            Variable.TYPE_PATH: describe_path_1d,
        }

        if series_description["type"] in type_to_func:
            series_description.update(type_to_func[series_description["type"]](
                series, series_description))
        else:
            raise ValueError("Unexpected type")

    # Return the description obtained
    return series_description
def test_numeric_with_inf():
    s = pd.Series([1, 2, 3, 6, np.inf])
    assert get_var_type(s)["type"] == Variable.TYPE_NUM
예제 #3
0
def describe_1d(series: pd.Series) -> dict:
    """Describe a series (infer the variable type, then calculate type-specific values).

    Args:
        series: The Series to describe.

    Returns:
        A Series containing calculated series description values.
    """
    def describe_supported(series: pd.Series,
                           series_description: dict) -> dict:
        """Describe a supported series.
        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.
        Returns:
            A dict containing calculated series description values.
        """

        # number of observations in the Series
        length = len(series)

        # number of non-NaN observations in the Series
        count = series.count()

        distinct_count = series_description["distinct_count_without_nan"]
        value_counts = series_description["value_counts_without_nan"]
        unique_count = value_counts.where(value_counts == 1).count()

        stats = {
            "n": length,
            "count": count,
            "n_distinct": distinct_count,
            "p_distinct": distinct_count / count,
            "p_missing": 1 - (count / length),
            "n_missing": length - count,
            "is_unique": unique_count == count,
            "n_unique": unique_count,
            "p_unique": unique_count / count,
            "memory_size":
            series.memory_usage(config["memory_deep"].get(bool)),
        }

        return stats

    def describe_unsupported(series: pd.Series, series_description: dict):
        """Describe an unsupported series.
        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.
        Returns:
            A dict containing calculated series description values.
        """

        # number of observations in the Series
        length = len(series)

        # number of non-NaN observations in the Series
        count = series.count()

        results_data = {
            "n":
            length,
            "count":
            count,
            "p_missing":
            1 - count / length,
            "n_missing":
            length - count,
            "memory_size":
            series.memory_usage(deep=config["memory_deep"].get(bool)),
        }

        return results_data

    def histogram_compute(finite_values, n_unique, name="histogram"):
        stats = {}
        bins = config["plot"]["histogram"]["bins"].get(int)
        bins = "auto" if bins == 0 else min(bins, n_unique)
        stats[name] = np.histogram(finite_values, bins)

        max_bins = config["plot"]["histogram"]["max_bins"].get(int)
        if bins == "auto" and len(stats[name][1]) > max_bins:
            stats[name] = np.histogram(finite_values, max_bins)

        return stats

    def numeric_stats_pandas(series: pd.Series):
        return {
            "mean": series.mean(),
            "std": series.std(),
            "variance": series.var(),
            "min": series.min(),
            "max": series.max(),
            # Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1.
            "kurtosis": series.kurt(),
            # Unbiased skew normalized by N-1
            "skewness": series.skew(),
            "sum": series.sum(),
        }

    def numeric_stats_numpy(present_values):
        return {
            "mean":
            np.mean(present_values),
            "std":
            np.std(present_values, ddof=1),
            "variance":
            np.var(present_values, ddof=1),
            "min":
            np.min(present_values),
            "max":
            np.max(present_values),
            # Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1.
            "kurtosis":
            series.kurt(),
            # Unbiased skew normalized by N-1
            "skewness":
            series.skew(),
            "sum":
            np.sum(present_values),
            "n_zeros":
            (series_description["count"] - np.count_nonzero(present_values)),
        }

    def describe_numeric_1d(series: pd.Series,
                            series_description: dict) -> dict:
        """Describe a numeric series.
        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.
        Returns:
            A dict containing calculated series description values.
        Notes:
            When 'bins_type' is set to 'bayesian_blocks', astropy.stats.bayesian_blocks is used to determine the number of
            bins. Read the docs:
            https://docs.astropy.org/en/stable/visualization/histogram.html
            https://docs.astropy.org/en/stable/api/astropy.stats.bayesian_blocks.html
            This method might print warnings, which we suppress.
            https://github.com/astropy/astropy/issues/4927
        """
        def mad(arr):
            """Median Absolute Deviation: a "Robust" version of standard deviation.
            Indices variability of the sample.
            https://en.wikipedia.org/wiki/Median_absolute_deviation
            """
            return np.median(np.abs(arr - np.median(arr)))

        quantiles = config["vars"]["num"]["quantiles"].get(list)

        n_infinite = ((series == np.inf) | (series == -np.inf)).sum()

        if isinstance(series.dtype, _IntegerDtype):
            stats = numeric_stats_pandas(series)
            present_values = series.loc[series.notnull()].astype(
                str(series.dtype).lower())
            stats["n_zeros"] = series_description["count"] - np.count_nonzero(
                present_values)
            stats["histogram_data"] = present_values
            finite_values = present_values
        else:
            values = series.values
            present_values = values[~np.isnan(values)]
            finite_values = values[np.isfinite(values)]
            stats = numeric_stats_numpy(present_values)
            stats["histogram_data"] = finite_values

        stats.update({
            "mad": mad(present_values),
            "scatter_data": series,  # For complex
            "p_infinite": n_infinite / series_description["n"],
            "n_infinite": n_infinite,
        })

        chi_squared_threshold = config["vars"]["num"][
            "chi_squared_threshold"].get(float)
        if chi_squared_threshold > 0.0:
            histogram, _ = np.histogram(finite_values, bins="auto")
            stats["chi_squared"] = chisquare(histogram)

        stats["range"] = stats["max"] - stats["min"]
        stats.update({
            f"{percentile:.0%}": value
            for percentile, value in series.quantile(
                quantiles).to_dict().items()
        })
        stats["iqr"] = stats["75%"] - stats["25%"]
        stats["cv"] = stats["std"] / stats["mean"] if stats["mean"] else np.NaN
        stats["p_zeros"] = stats["n_zeros"] / series_description["n"]

        stats["monotonic_increase"] = series.is_monotonic_increasing
        stats["monotonic_decrease"] = series.is_monotonic_decreasing

        stats["monotonic_increase_strict"] = (stats["monotonic_increase"]
                                              and series.is_unique)
        stats["monotonic_decrease_strict"] = (stats["monotonic_decrease"]
                                              and series.is_unique)

        stats.update(
            histogram_compute(finite_values, series_description["n_distinct"]))

        return stats

    def describe_date_1d(series: pd.Series, series_description: dict) -> dict:
        """Describe a date series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        stats = {
            "min": pd.Timestamp.to_pydatetime(series.min()),
            "max": pd.Timestamp.to_pydatetime(series.max()),
        }

        stats["range"] = stats["max"] - stats["min"]

        values = series[series.notnull()].values.astype(np.int64) // 10**9

        chi_squared_threshold = config["vars"]["num"][
            "chi_squared_threshold"].get(float)
        if chi_squared_threshold > 0.0:
            histogram, _ = np.histogram(values, bins="auto")
            stats["chi_squared"] = chisquare(histogram)

        stats.update(
            histogram_compute(values, series_description["n_distinct"]))
        return stats

    def describe_categorical_1d(series: pd.Series,
                                series_description: dict) -> dict:
        """Describe a categorical series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        # Make sure we deal with strings (Issue #100)
        series = series.astype(str)

        # Only run if at least 1 non-missing value
        value_counts = series_description["value_counts_without_nan"]

        stats = {"top": value_counts.index[0], "freq": value_counts.iloc[0]}

        redact = config["vars"]["cat"]["redact"].get(float)
        if not redact:
            stats.update({"first_rows": series.head(5)})

        stats.update(
            histogram_compute(value_counts,
                              len(value_counts),
                              name="histogram_frequencies"))

        chi_squared_threshold = config["vars"]["num"][
            "chi_squared_threshold"].get(float)
        if chi_squared_threshold > 0.0:
            stats["chi_squared"] = list(chisquare(value_counts.values))

        check_length = config["vars"]["cat"]["length"].get(bool)
        if check_length:
            stats.update(length_summary(series))
            stats.update(
                histogram_compute(stats["length"],
                                  stats["length"].nunique(),
                                  name="histogram_length"))

        check_unicode = config["vars"]["cat"]["characters"].get(bool)
        if check_unicode:
            stats.update(unicode_summary(series))
            stats["n_characters_distinct"] = stats["n_characters"]
            stats["n_characters"] = stats["character_counts"].values.sum()

            stats["category_alias_counts"].index = stats[
                "category_alias_counts"].index.str.replace("_", " ")

        words = config["vars"]["cat"]["words"]
        if words:
            stats.update(word_summary(series))

        coerce_str_to_date = config["vars"]["cat"]["coerce_str_to_date"].get(
            bool)
        if coerce_str_to_date:
            stats["date_warning"] = warning_type_date(series)

        return stats

    def describe_url_1d(series: pd.Series, series_description: dict) -> dict:
        """Describe a url series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        # Make sure we deal with strings (Issue #100)
        series = series[~series.isnull()].astype(str)
        series = series.apply(urlsplit)

        stats = url_summary(series)

        # Only run if at least 1 non-missing value
        value_counts = series_description["value_counts_without_nan"]

        stats["top"] = value_counts.index[0]
        stats["freq"] = value_counts.iloc[0]

        return stats

    def describe_file_1d(series: pd.Series, series_description: dict) -> dict:
        if "p_series" not in series_description:
            series = series[~series.isnull()].astype(str)
            series = series.map(Path)
            series_description["p_series"] = series
        else:
            series = series_description["p_series"]

        stats = file_summary(series)

        series_description.update(describe_path_1d(series, series_description))
        stats.update(
            histogram_compute(
                stats["file_size"],
                stats["file_size"].nunique(),
                name="histogram_file_size",
            ))

        return stats

    def describe_path_1d(series: pd.Series, series_description: dict) -> dict:
        """Describe a path series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        series_description.update(
            describe_categorical_1d(series, series_description))

        # Make sure we deal with strings (Issue #100)
        if "p_series" not in series_description:
            series = series[~series.isnull()].astype(str)
            series = series.map(Path)
        else:
            series = series_description["p_series"]
            del series_description["p_series"]

        stats = path_summary(series)

        # Only run if at least 1 non-missing value
        value_counts = series_description["value_counts_without_nan"]

        stats["top"] = value_counts.index[0]
        stats["freq"] = value_counts.iloc[0]

        return stats

    def describe_image_1d(series: pd.Series, series_description: dict):
        if "p_series" not in series_description:
            series = series[~series.isnull()].astype(str)
            series = series.map(Path)
            series_description["p_series"] = series
        else:
            series = series_description["p_series"]

        extract_exif = config["vars"]["image"]["exif"].get(bool)

        stats = image_summary(series, extract_exif)

        series_description.update(describe_file_1d(series, series_description))

        return stats

    def describe_boolean_1d(series: pd.Series,
                            series_description: dict) -> dict:
        """Describe a boolean series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        value_counts = series_description["value_counts_without_nan"]

        stats = {"top": value_counts.index[0], "freq": value_counts.iloc[0]}

        return stats

    # Make sure pd.NA is not in the series
    series = series.fillna(np.nan)

    # Infer variable types
    # TODO: use visions for type inference
    # https://github.com/dylan-profiler/visions
    series_description = base.get_var_type(series)

    # Run type specific analysis
    if series_description["type"] == Variable.S_TYPE_UNSUPPORTED:
        series_description.update(
            describe_unsupported(series, series_description))
    else:
        series_description.update(
            describe_supported(series, series_description))

        type_to_func = {
            Variable.TYPE_BOOL: describe_boolean_1d,
            Variable.TYPE_NUM: describe_numeric_1d,
            Variable.TYPE_DATE: describe_date_1d,
            Variable.TYPE_CAT: describe_categorical_1d,
            Variable.TYPE_URL: describe_url_1d,
            Variable.TYPE_PATH: describe_path_1d,
            Variable.TYPE_IMAGE: describe_image_1d,
            Variable.TYPE_FILE: describe_file_1d,
        }

        if series_description["type"] in type_to_func:
            series_description.update(type_to_func[series_description["type"]](
                series, series_description))
        else:
            raise ValueError("Unexpected type")

    # light weight of series_description
    if "value_counts_with_nan" in series_description.keys():
        del series_description["value_counts_with_nan"]
    if "value_counts_without_nan" in series_description.keys():
        del series_description["value_counts_without_nan"]

    # Return the description obtained
    return series_description
예제 #4
0
def describe_1d(series: pd.Series) -> dict:
    """Describe a series (infer the variable type, then calculate type-specific values).

    Args:
        series: The Series to describe.

    Returns:
        A Series containing calculated series description values.
    """
    def describe_supported(series: pd.Series,
                           series_description: dict) -> dict:
        """Describe a supported series.
        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.
        Returns:
            A dict containing calculated series description values.
        """

        # number of observations in the Series
        length = len(series)

        # number of non-NaN observations in the Series
        count = series.count()

        distinct_count = series_description["distinct_count_without_nan"]

        stats = {
            "n":
            length,
            "count":
            count,
            "distinct_count":
            distinct_count,
            "n_unique":
            distinct_count,
            "p_missing":
            1 - (count / length),
            "n_missing":
            length - count,
            "is_unique":
            distinct_count == count,
            "mode":
            series.mode().iloc[0] if count > distinct_count > 1 else series[0],
            "p_unique":
            distinct_count / count,
            "memory_size":
            series.memory_usage(),
        }

        return stats

    def describe_unsupported(series: pd.Series, series_description: dict):
        """Describe an unsupported series.
        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.
        Returns:
            A dict containing calculated series description values.
        """

        # number of observations in the Series
        length = len(series)

        # number of non-NaN observations in the Series
        count = series.count()

        results_data = {
            "n": length,
            "count": count,
            "p_missing": 1 - count / length,
            "n_missing": length - count,
            "memory_size": series.memory_usage(),
        }

        return results_data

    def describe_numeric_1d(series: pd.Series,
                            series_description: dict) -> dict:
        """Describe a numeric series.
        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.
        Returns:
            A dict containing calculated series description values.
        Notes:
            When 'bins_type' is set to 'bayesian_blocks', astropy.stats.bayesian_blocks is used to determine the number of
            bins. Read the docs:
            https://docs.astropy.org/en/stable/visualization/histogram.html
            https://docs.astropy.org/en/stable/api/astropy.stats.bayesian_blocks.html
            This method might print warnings, which we suppress.
            https://github.com/astropy/astropy/issues/4927
        """
        def mad(arr):
            """ Median Absolute Deviation: a "Robust" version of standard deviation.
                Indices variability of the sample.
                https://en.wikipedia.org/wiki/Median_absolute_deviation
            """
            return np.median(np.abs(arr - np.median(arr)))

        quantiles = config["vars"]["num"]["quantiles"].get(list)

        n_infinite = ((series == np.inf) | (series == -np.inf)).sum()

        values = series.values
        present_values = values[~np.isnan(values)]
        finite_values = values[np.isfinite(values)]

        stats = {
            "mean":
            np.mean(present_values),
            "std":
            np.std(present_values, ddof=1),
            "variance":
            np.var(present_values, ddof=1),
            "min":
            np.min(present_values),
            "max":
            np.max(present_values),
            # Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1.
            "kurtosis":
            series.kurt(),
            # Unbiased skew normalized by N-1
            "skewness":
            series.skew(),
            "sum":
            np.sum(present_values),
            "mad":
            mad(present_values),
            "n_zeros":
            (series_description["count"] - np.count_nonzero(present_values)),
            "histogram_data":
            finite_values,
            "scatter_data":
            series,  # For complex
            "p_infinite":
            n_infinite / series_description["n"],
            "n_infinite":
            n_infinite,
        }

        chi_squared_threshold = config["vars"]["num"][
            "chi_squared_threshold"].get(float)
        if chi_squared_threshold > 0.0:
            histogram, _ = np.histogram(finite_values, bins="auto")
            stats["chi_squared"] = chisquare(histogram)

        stats["range"] = stats["max"] - stats["min"]
        stats.update({
            f"{percentile:.0%}": value
            for percentile, value in series.quantile(
                quantiles).to_dict().items()
        })
        stats["iqr"] = stats["75%"] - stats["25%"]
        stats["cv"] = stats["std"] / stats["mean"] if stats["mean"] else np.NaN
        stats["p_zeros"] = stats["n_zeros"] / series_description["n"]

        bins = config["plot"]["histogram"]["bins"].get(int)
        # Bins should never be larger than the number of distinct values
        bins = min(series_description["distinct_count_with_nan"], bins)
        stats["histogram_bins"] = bins

        bayesian_blocks_bins = config["plot"]["histogram"][
            "bayesian_blocks_bins"].get(bool)
        if bayesian_blocks_bins:
            from astropy.stats import bayesian_blocks

            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                ret = bayesian_blocks(stats["histogram_data"])

                # Sanity check
                if not np.isnan(ret).any() and ret.size > 1:
                    stats["histogram_bins_bayesian_blocks"] = ret

        return stats

    def describe_date_1d(series: pd.Series, series_description: dict) -> dict:
        """Describe a date series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        stats = {
            "min": pd.Timestamp.to_pydatetime(series.min()),
            "max": pd.Timestamp.to_pydatetime(series.max()),
            "histogram_data": series,
        }

        bins = config["plot"]["histogram"]["bins"].get(int)
        # Bins should never be larger than the number of distinct values
        bins = min(series_description["distinct_count_with_nan"], bins)
        stats["histogram_bins"] = bins

        stats["range"] = stats["max"] - stats["min"]

        chi_squared_threshold = config["vars"]["num"][
            "chi_squared_threshold"].get(float)
        if chi_squared_threshold > 0.0:
            histogram = np.histogram(
                series[series.notna()].astype("int64").values, bins="auto")[0]
            stats["chi_squared"] = chisquare(histogram)

        return stats

    def describe_categorical_1d(series: pd.Series,
                                series_description: dict) -> dict:
        """Describe a categorical series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        # Make sure we deal with strings (Issue #100)
        series = series.astype(str)

        # Only run if at least 1 non-missing value
        value_counts = series_description["value_counts_without_nan"]

        stats = {"top": value_counts.index[0], "freq": value_counts.iloc[0]}

        chi_squared_threshold = config["vars"]["num"][
            "chi_squared_threshold"].get(float)
        if chi_squared_threshold > 0.0:
            stats["chi_squared"] = list(chisquare(value_counts.values))

        check_composition = config["vars"]["cat"]["check_composition"].get(
            bool)
        if check_composition:
            stats["max_length"] = series.str.len().max()
            stats["mean_length"] = series.str.len().mean()
            stats["min_length"] = series.str.len().min()

            from visions.application.summaries.series.text_summary import text_summary

            stats.update(text_summary(series))
            stats["length"] = series.str.len()

        coerce_str_to_date = config["vars"]["cat"]["coerce_str_to_date"].get(
            bool)
        if coerce_str_to_date:
            stats["date_warning"] = warning_type_date(series)

        return stats

    def describe_url_1d(series: pd.Series, series_description: dict) -> dict:
        """Describe a url series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        # Make sure we deal with strings (Issue #100)
        series = series[~series.isnull()].astype(str)

        stats = {}

        # Create separate columns for each URL part
        keys = ["scheme", "netloc", "path", "query", "fragment"]
        url_parts = dict(zip(keys, zip(*series.map(urlsplit))))
        for name, part in url_parts.items():
            stats[f"{name.lower()}_counts"] = pd.Series(
                part, name=name).value_counts()

        # Only run if at least 1 non-missing value
        value_counts = series_description["value_counts_without_nan"]

        stats["top"] = value_counts.index[0]
        stats["freq"] = value_counts.iloc[0]

        return stats

    def describe_path_1d(series: pd.Series, series_description: dict) -> dict:
        """Describe a path series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        series_description.update(
            describe_categorical_1d(series, series_description))

        # Make sure we deal with strings (Issue #100)
        series = series[~series.isnull()].astype(str)
        series = series.map(Path)

        common_prefix = os.path.commonprefix(list(series))
        if common_prefix == "":
            common_prefix = "No common prefix"

        stats = {"common_prefix": common_prefix}

        # Create separate columns for each path part
        keys = ["stem", "suffix", "name", "parent"]
        path_parts = dict(
            zip(
                keys,
                zip(*series.map(
                    lambda x: [x.stem, x.suffix, x.name, x.parent]))))
        for name, part in path_parts.items():
            stats[f"{name.lower()}_counts"] = pd.Series(
                part, name=name).value_counts()

        # Only run if at least 1 non-missing value
        value_counts = series_description["value_counts_without_nan"]

        stats["top"] = value_counts.index[0]
        stats["freq"] = value_counts.iloc[0]

        return stats

    def describe_boolean_1d(series: pd.Series,
                            series_description: dict) -> dict:
        """Describe a boolean series.

        Args:
            series: The Series to describe.
            series_description: The dict containing the series description so far.

        Returns:
            A dict containing calculated series description values.
        """
        value_counts = series_description["value_counts_without_nan"]

        stats = {"top": value_counts.index[0], "freq": value_counts.iloc[0]}

        return stats
        # Make sure pd.NA is not in the series

    series.fillna(np.nan, inplace=True)

    # Infer variable types
    # TODO: use visions for type inference
    # https://github.com/dylan-profiler/visions
    series_description = base.get_var_type(series)

    # Run type specific analysis
    if series_description["type"] == Variable.S_TYPE_UNSUPPORTED:
        series_description.update(
            describe_unsupported(series, series_description))
    else:
        series_description.update(
            describe_supported(series, series_description))

        type_to_func = {
            Variable.TYPE_BOOL: describe_boolean_1d,
            Variable.TYPE_NUM: describe_numeric_1d,
            Variable.TYPE_DATE: describe_date_1d,
            Variable.TYPE_CAT: describe_categorical_1d,
            Variable.TYPE_URL: describe_url_1d,
            Variable.TYPE_PATH: describe_path_1d,
        }

        if series_description["type"] in type_to_func:
            series_description.update(type_to_func[series_description["type"]](
                series, series_description))
        else:
            raise ValueError("Unexpected type")

    # light weight of series_description
    if "value_counts_with_nan" in series_description.keys():
        del series_description["value_counts_with_nan"]
    if "value_counts_without_nan" in series_description.keys():
        del series_description["value_counts_without_nan"]

    # Return the description obtained
    return series_description