def _operation(self, group_df): from Summarize import Average, Variance result = deepcopy(group_df) average_df = Average(*self.vars).apply(group_df) sd_df = Variance(*self.vars, get_var=False).apply(group_df) for var in self.vars: new_var = group_df[var] if self.__center: average = average_df[var + "_Average", 0] new_var = [(x - average) if isinstance(x, Number) else None for x in new_var] if self.__reduce: sd = sd_df[var + "_SD", 0] new_var = [(x / sd) if isinstance(x, Number) else None for x in new_var] result.add_column(var + "_Std", new_var, after=var) return result
def test_sd_var2_without_variance_group_by(self): self.assertEqual(Pipeline(GroupBy("Cat"), Variance("Var2", get_var=False)).apply(self.df)["Var2_SD"][0], 0.50) self.assertEqual(Pipeline(GroupBy("Cat"), Variance("Var2", get_var=False)).apply(self.df)["Var2_SD"][1], 0)
def test_variance_var2_without_sd(self): self.assertEqual(Variance("Var2", get_sd=False).apply(self.df)["Var2_Var"], [var(self.df["Var2"])])
def test_sd_var2_without_variance(self): self.assertEqual(Variance("Var2", get_var=False).apply(self.df)["Var2_SD"], [sqrt(var(self.df["Var2"]))])
def test_variance_var1_without_sd_group_by(self): self.assertEqual(Pipeline(GroupBy("Cat"), Variance("Var1", get_sd=False)).apply(self.df)["Var1_Var"][0], 0) self.assertEqual(Pipeline(GroupBy("Cat"), Variance("Var1", get_sd=False)).apply(self.df)["Var1_Var"][1], 0)