Esempio n. 1
0
    def profile_point(self, sample, reference):
        buckets = reference._buckets
        data = [0] * len(buckets)

        if (sample is not None) and (sample in buckets):
            data[buckets.index(sample)] = 1
        return DiscreteHistogram(buckets, data)
Esempio n. 2
0
    def _profile_column(self, df, column):
        total = df.shape[0]
        try:
            missing = sum(df[column].isnull())
        except KeyError:  # pandas raises this if the column doesn't exist
            missing = total

        return (column + "_missing", DiscreteHistogram(["present", "missing"], [total - missing, missing]))
Esempio n. 3
0
    def profile_column(self, df, column):
        """Profile a data frame column, returning a name and DiscreteHistogram

        Parameters
        ----------
        df : pandas.core.frame.DataFrame
            A data frame to profile.
        column : str
            The column in the data frame to profile.

        Returns
        -------
        dict
            A dictionary from summary data names to DiscreteHistogram instances.
        """
        content = df[column].value_counts(dropna=True)
        keys = list(content.keys())
        values = [content[k] for k in keys]
        values = [v.item() for v in values]
        return (column + "_histogram", DiscreteHistogram(keys, values))
Esempio n. 4
0
    def profile_column(self, df, column):
        """Profile a data frame column, returning a name and DiscreteHistogram

        Parameters
        ----------
        df : pandas.core.frame.DataFrame
            A data frame to profile.
        column : str
            The column in the data frame to profile.

        Returns
        -------
        dict
            A dictionary from summary data names to DiscreteHistogram instances.
        """
        total = df.shape[0]
        try:
            missing = sum(df[column].isnull())
        except KeyError:  # pandas raises this if the column doesn't exist
            missing = total

        return (column + "_missing",
                DiscreteHistogram(["present", "missing"],
                                  [total - missing, missing]))
Esempio n. 5
0
 def profile_point(self, sample, reference):
     if sample is None:  # Missing
         return DiscreteHistogram(reference._buckets, [0, 1])
     return DiscreteHistogram(reference._buckets, [1, 0])
Esempio n. 6
0
 def _profile_column(self, df, column):
     content = df[column].value_counts()
     keys = list(content.keys())
     values = [content[k] for k in keys]
     values = [v.item() for v in values]
     return (column + "_histogram", DiscreteHistogram(keys, values))