def profile_point(self, sample, reference): buckets = reference._buckets data = [0] * len(buckets) if (sample is not None) and (sample in buckets): data[buckets.index(sample)] = 1 return DiscreteHistogram(buckets, data)
def _profile_column(self, df, column): total = df.shape[0] try: missing = sum(df[column].isnull()) except KeyError: # pandas raises this if the column doesn't exist missing = total return (column + "_missing", DiscreteHistogram(["present", "missing"], [total - missing, missing]))
def profile_column(self, df, column): """Profile a data frame column, returning a name and DiscreteHistogram Parameters ---------- df : pandas.core.frame.DataFrame A data frame to profile. column : str The column in the data frame to profile. Returns ------- dict A dictionary from summary data names to DiscreteHistogram instances. """ content = df[column].value_counts(dropna=True) keys = list(content.keys()) values = [content[k] for k in keys] values = [v.item() for v in values] return (column + "_histogram", DiscreteHistogram(keys, values))
def profile_column(self, df, column): """Profile a data frame column, returning a name and DiscreteHistogram Parameters ---------- df : pandas.core.frame.DataFrame A data frame to profile. column : str The column in the data frame to profile. Returns ------- dict A dictionary from summary data names to DiscreteHistogram instances. """ total = df.shape[0] try: missing = sum(df[column].isnull()) except KeyError: # pandas raises this if the column doesn't exist missing = total return (column + "_missing", DiscreteHistogram(["present", "missing"], [total - missing, missing]))
def profile_point(self, sample, reference): if sample is None: # Missing return DiscreteHistogram(reference._buckets, [0, 1]) return DiscreteHistogram(reference._buckets, [1, 0])
def _profile_column(self, df, column): content = df[column].value_counts() keys = list(content.keys()) values = [content[k] for k in keys] values = [v.item() for v in values] return (column + "_histogram", DiscreteHistogram(keys, values))