Beispiel #1
0
 def test_pivot_no_val_var(self):
     domain = self.table.domain
     pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1])
     pivot_tab = pivot.pivot_table
     atts = (Dv("Aggregate", ["Count"]), Cv("c"), Cv("d"), Cv("e"))
     X = np.array([[0, 0, 2, 1, 0], [1, 0, 2, 1, 1]])
     self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
Beispiel #2
0
    def test_group_table_metas(self):
        domain = Domain([Dv("d1", ("a", "b")), Cv("c1"),
                         Dv("d2", ("a", "b")), Cv("c2")])
        X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]])
        table = Table(domain, X).transform(
            Domain(domain.attributes[:2], metas=domain.attributes[2:]),
            copy=True)
        with table.unlocked():
            table.metas = table.metas.astype(object)

        pivot = Pivot(table, Pivot.Functions, table.domain[-1])
        group_tab = pivot.group_table
        atts = (table.domain[-1], Cv("(count)"), Cv("d1 (count defined)"),
                Dv("d1 (majority)", ["a", "b"]),
                Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"),
                Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"),
                Cv("c1 (median)"), Cv("c1 (var)"), Cv("d2 (count defined)"),
                Dv("d2 (majority)", ["a", "b"]), Cv("c2 (count defined)"),
                Cv("c2 (sum)"), Cv("c2 (mean)"), Cv("c2 (min)"), Cv("c2 (max)"),
                Cv("c2 (mode)"), Cv("c2 (median)"), Cv("c2 (var)"))
        X = np.array([[0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                       1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0],
                      [1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0, np.nan,
                       np.nan, np.nan, np.nan, np.nan, np.nan]], dtype=float)
        self.assert_table_equal(group_tab, Table(Domain(atts), X))
Beispiel #3
0
 def test_pivot_no_col_var(self):
     domain = self.table.domain
     pivot = Pivot(self.table, Pivot.Functions, domain[0], None, domain[2])
     pivot_tab = pivot.pivot_table
     atts = (Dv("Aggregate",
                ["Count", "Count defined", "Sum", "Mean",
                 "Min", "Max", "Mode", "Median", "Var"]),
             Cv("a"), Cv("b"))
     X = np.array([[0, 0, 4, 0],
                   [0, 1, 4, 0],
                   [0, 2, 10, 0],
                   [0, 3, 2.5, np.nan],
                   [0, 4, 1, np.nan],
                   [0, 5, 4, np.nan],
                   [0, 6, 1, np.nan],
                   [0, 7, 2.5, np.nan],
                   [0, 8, 1.25, np.nan],
                   [1, 0, 0, 4],
                   [1, 1, 0, 3],
                   [1, 2, 0, 18],
                   [1, 3, np.nan, 6],
                   [1, 4, np.nan, 5],
                   [1, 5, np.nan, 7],
                   [1, 6, np.nan, 5],
                   [1, 7, np.nan, 6],
                   [1, 8, np.nan, 2 / 3]])
     self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
Beispiel #4
0
    def test_group_table_metas(self):
        domain = Domain(
            [Dv("d1", ("a", "b")),
             Cv("c1"),
             Dv("d2", ("a", "b")),
             Cv("c2")])
        X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]])
        table = Table(domain, X).transform(
            Domain(domain.attributes[:2], metas=domain.attributes[2:])).copy()
        with table.unlocked():
            table.metas = table.metas.astype(object)

        pivot = Pivot(table, Pivot.Functions, table.domain[-1])
        group_tab = pivot.group_table
        atts = (table.domain[-1], Cv("(数目)"), Cv("d1 (非缺失数目)"),
                Dv("d1 (最常见)", ["a", "b"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"),
                Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"),
                Cv("c1 (中位数)"), Cv("c1 (方差)"), Cv("d2 (非缺失数目)"),
                Dv("d2 (最常见)", ["a", "b"]), Cv("c2 (非缺失数目)"), Cv("c2 (总和)"),
                Cv("c2 (平均值)"), Cv("c2 (最小值)"), Cv("c2 (最大值)"), Cv("c2 (众数)"),
                Cv("c2 (中位数)"), Cv("c2 (方差)"))
        X = np.array([[
            0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0
        ],
                      [
                          1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0,
                          np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
                      ]],
                     dtype=float)
        self.assert_table_equal(group_tab, Table(Domain(atts), X))
Beispiel #5
0
    def test_group_table_no_col_var_metas(self):
        for var in self.table1.domain.metas:
            self.assertRaises(TypeError, Pivot, self.table1, var)

        domain = Domain(
            [Dv("d1", ("a", "b")),
             Cv("c1"),
             Dv("d2", ("a", "b")),
             Cv("c2")])
        X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]])
        table = Table(domain, X).transform(
            Domain(domain.attributes[:2], metas=domain.attributes[2:]))

        pivot = Pivot(table, Pivot.Functions, table.domain[-1])
        group_tab = pivot.group_table
        atts = (table.domain[-1], Cv("(数目)"), Cv("d1 (非缺失数目)"),
                Dv("d1 (最常见)", ["a", "b"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"),
                Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"),
                Cv("c1 (中位数)"), Cv("c1 (方差)"), Cv("d2 (非缺失数目)"),
                Dv("d2 (最常见)", ["a", "b"]), Cv("c2 (非缺失数目)"), Cv("c2 (总和)"),
                Cv("c2 (平均值)"), Cv("c2 (最小值)"), Cv("c2 (最大值)"), Cv("c2 (众数)"),
                Cv("c2 (中位数)"), Cv("c2 (方差)"))
        X = np.array([[
            0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0
        ],
                      [
                          1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0,
                          np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
                      ]],
                     dtype=float)
        self.assert_table_equal(group_tab, Table(Domain(atts), X))
Beispiel #6
0
 def test_pivot_disc_val_var(self):
     domain = self.table.domain
     pivot = Pivot(self.table, [Pivot.Count_defined, Pivot.Majority],
                   domain[2], domain[0], domain[1])
     pivot_tab = pivot.pivot_table
     atts = (domain[2], Dv("Aggregate", ["Count defined", "Majority"]),
             Dv("a", ["0.0", "1.0", "c", "d"]),
             Dv("b", ["0.0", "1.0", "c", "e"]))
     X = np.array([[1, 0, 1, 0],
                   [1, 1, 2, np.nan],
                   [2, 0, 1, 0],
                   [2, 1, 3, np.nan],
                   [3, 0, 0, 0],
                   [3, 1, np.nan, np.nan],
                   [4, 0, 1, 0],
                   [4, 1, 2, np.nan],
                   [5, 0, 0, 1],
                   [5, 1, np.nan, 2],
                   [6, 0, 0, 1],
                   [6, 1, np.nan, 2],
                   [7, 0, 0, 1],
                   [7, 1, np.nan, 3],
                   [8, 0, 0, 0],
                   [8, 1, np.nan, np.nan]])
     self.assert_table_equal(pivot_tab, Table(Domain(atts), X))
Beispiel #7
0
    def test_group_table_no_col_var_metas(self):
        for var in self.table1.domain.metas:
            self.assertRaises(TypeError, Pivot, self.table1, var)

        domain = Domain([Dv("d1", ("a", "b")), Cv("c1"),
                         Dv("d2", ("a", "b")), Cv("c2")])
        X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]])
        table = Table(domain, X).transform(
            Domain(domain.attributes[:2], metas=domain.attributes[2:]))

        pivot = Pivot(table, Pivot.Functions, table.domain[-1])
        group_tab = pivot.group_table
        atts = (table.domain[-1], Cv("(count)"), Cv("d1 (count defined)"),
                Dv("d1 (majority)", ["a", "b"]),
                Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"),
                Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"),
                Cv("c1 (median)"), Cv("c1 (var)"), Cv("d2 (count defined)"),
                Dv("d2 (majority)", ["a", "b"]), Cv("c2 (count defined)"),
                Cv("c2 (sum)"), Cv("c2 (mean)"), Cv("c2 (min)"), Cv("c2 (max)"),
                Cv("c2 (mode)"), Cv("c2 (median)"), Cv("c2 (var)"))
        X = np.array([[0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                       1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0],
                      [1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0, np.nan,
                       np.nan, np.nan, np.nan, np.nan, np.nan]], dtype=float)
        self.assert_table_equal(group_tab, Table(Domain(atts), X))
Beispiel #8
0
 def test_pivot(self):
     domain = self.table.domain
     pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1], domain[2])
     pivot_tab = pivot.pivot_table
     atts = (Dv("Aggregate", ["Count", "Count defined", "Sum", "Mean",
                              "Min", "Max", "Mode", "Median", "Var"]),
             Cv("c"), Cv("d"), Cv("e"))
     X = np.array([[0, 0, 2, 1, 0],
                   [0, 1, 2, 1, 0],
                   [0, 2, 5, 2, 0],
                   [0, 3, 2.5, 2, np.nan],
                   [0, 4, 1, 2, np.nan],
                   [0, 5, 4, 2, np.nan],
                   [0, 6, 1, 2, np.nan],
                   [0, 7, 2.5, 2, np.nan],
                   [0, 8, 2.25, 0, np.nan],
                   [1, 0, 2, 1, 1],
                   [1, 1, 2, 0, 1],
                   [1, 2, 11, 0, 7],
                   [1, 3, 5.5, np.nan, 7],
                   [1, 4, 5, np.nan, 7],
                   [1, 5, 6, np.nan, 7],
                   [1, 6, 5, np.nan, 7],
                   [1, 7, 5.5, np.nan, 7],
                   [1, 8, 0.25, np.nan, 0]])
     self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
Beispiel #9
0
    def test_pivot_time_val_var(self):
        domain = Domain([
            Dv("d1", ("a", "b")),
            Dv("d2", ("c", "d")),
            Tv("t1", have_date=1)
        ])
        X = np.array([[0, 1, 1e9], [0, 0, 1e8], [1, 0, 2e8], [1, 1, np.nan]])
        table = Table(domain, X)

        # Min
        pivot = Pivot(table, [Pivot.Min], domain[0], domain[1], domain[2])
        atts = (domain[0], Dv("Aggregate",
                              ["Min"]), Tv("c",
                                           have_date=1), Tv("d", have_date=1))
        X = np.array([[0, 0, 1e8, 1e9], [1, 0, 2e8, np.nan]])
        self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

        # Min, Max
        pivot = Pivot(table, [Pivot.Min, Pivot.Max], domain[0], domain[1],
                      domain[2])
        atts = (domain[0], Dv("Aggregate", ["Min", "Max"]), Tv("c",
                                                               have_date=1),
                Tv("d", have_date=1))
        X = np.array([[0, 0, 1e8, 1e9], [0, 1, 1e8, 1e9], [1, 0, 2e8, np.nan],
                      [1, 1, 2e8, np.nan]])
        self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

        # Count defined, Sum
        pivot = Pivot(table, [Pivot.Count_defined, Pivot.Sum], domain[0],
                      domain[1], domain[2])
        atts = (domain[0], Dv("Aggregate",
                              ["Count defined", "Sum"]), Cv("c"), Cv("d"))
        X = np.array([[0, 0, 1, 1], [0, 1, 1e8, 1e9], [1, 0, 1, 0],
                      [1, 1, 2e8, 0]])
        self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

        # Count defined, Max
        pivot = Pivot(table, [Pivot.Count_defined, Pivot.Max], domain[0],
                      domain[1], domain[2])
        atts = (domain[0], Dv("Aggregate", ["Count defined", "Max"]),
                Dv("c", ["1.0", "1973-03-03", "1976-05-03"]),
                Dv("d", ["0.0", "1.0", "2001-09-09"]))
        X = np.array([[0, 0, 0, 1], [0, 1, 1, 2], [1, 0, 0, 0],
                      [1, 1, 2, np.nan]])
        self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))
Beispiel #10
0
 def test_pivot_data_subset(self):
     data = Table("iris")
     cls_var = data.domain.class_var
     pivot = Pivot(data[:100], Pivot.Functions, cls_var, None, cls_var)
     atts = (cls_var, Dv("Aggregate", ["Count", "Count defined", "Majority"]),
             Dv("Iris-setosa", ["0.0", "50.0", "Iris-setosa"]),
             Dv("Iris-versicolor", ["0.0", "50.0", "Iris-versicolor"]))
     domain = Domain(atts)
     self.assert_domain_equal(domain, pivot.pivot_table.domain)
Beispiel #11
0
    def test_group_table_no_col_var(self):
        domain = self.table.domain
        pivot = Pivot(self.table, Pivot.Functions, domain[0])
        group_tab = pivot.group_table
        atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]),
                Cv("d2 (非缺失数目)"), Dv("d2 (最常见)",
                                     ["c", "d", "e"]), Cv("c1 (非缺失数目)"),
                Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"),
                Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"))
        domain = Domain(domain[:1] + atts)
        X = np.array([[0, 4, 4, 0, 3, 0, 4, 10, 2.5, 1, 4, 1, 2.5, 1.25],
                      [1, 4, 4, 1, 4, 0, 3, 18, 6, 5, 7, 5, 6, 2 / 3]],
                     dtype=float)
        self.assert_table_equal(group_tab, Table(Domain(domain[:1] + atts), X))

        pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[0])
        group_tab_same_vars = pivot.group_table
        self.assert_table_equal(group_tab, group_tab_same_vars)
Beispiel #12
0
    def test_pivot_renaming_domain(self):
        data = Table("iris")
        cls_var = data.domain.class_var.copy(name='Aggregate')
        data.domain = Domain(data.domain.attributes, (cls_var, ))
        pivot = Pivot(data, [Pivot.Functions.Sum], cls_var, None, None)

        renamed_var = data.domain.class_var.copy(name='Aggregate (1)')
        self.assertTrue(renamed_var in pivot.pivot_table.domain)
        renamed_var = data.domain.class_var.copy(name='Aggregate (2)')
        self.assertTrue(renamed_var in pivot.pivot_table.domain)
Beispiel #13
0
    def test_group_table_no_col_var(self):
        domain = self.table.domain
        pivot = Pivot(self.table, Pivot.Functions, domain[0])
        group_tab = pivot.group_table
        atts = (Cv("(count)"), Cv("d1 (count defined)"),
                Dv("d1 (majority)", ["a", "b"]), Cv("d2 (count defined)"),
                Dv("d2 (majority)",
                   ["c", "d", "e"]), Cv("c1 (count defined)"), Cv("c1 (sum)"),
                Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"),
                Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)"))
        domain = Domain(domain[:1] + atts)
        X = np.array([[0, 4, 4, 0, 3, 0, 4, 10, 2.5, 1, 4, 1, 2.5, 1.25],
                      [1, 4, 4, 1, 4, 0, 3, 18, 6, 5, 7, 5, 6, 2 / 3]],
                     dtype=float)
        self.assert_table_equal(group_tab, Table(Domain(domain[:1] + atts), X))

        pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[0])
        group_tab_same_vars = pivot.group_table
        self.assert_table_equal(group_tab, group_tab_same_vars)
Beispiel #14
0
 def test_group_table_time_var(self):
     domain = Domain([Dv("d1", ("a", "b")), Tv("t1", have_date=1)])
     X = np.array([[0, 1e9], [0, 1e8], [1, 2e8], [1, np.nan]])
     table = Table(domain, X)
     pivot = Pivot(table, Pivot.Functions, domain[0], val_var=domain[1])
     str_grouped = \
         "[[a, 2, 2, a, 2, 1.1e+09, 1987-06-06, 1973-03-03, " \
         "2001-09-09, 1973-03-03, 1987-06-06, 2.025e+17],\n " \
         "[b, 2, 2, b, 1, 2e+08, 1976-05-03, 1976-05-03, " \
         "1976-05-03, 1976-05-03, 1976-05-03, 0]]"
     self.assertEqual(str(pivot.group_table), str_grouped)
Beispiel #15
0
    def test_group_table_update(self):
        domain = self.table.domain
        atts = (Cv("(count)"), Cv("d1 (count defined)"),
                Dv("d1 (majority)", ["a", "b"]), Cv("d2 (count defined)"),
                Dv("d2 (majority)",
                   ["c", "d", "e"]), Cv("c1 (count defined)"), Cv("c1 (sum)"),
                Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"),
                Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)"))
        X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25],
                      [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
                      [
                          0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan,
                          np.nan, np.nan, np.nan, np.nan
                      ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25],
                      [
                          1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan,
                          np.nan, np.nan, np.nan
                      ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]])
        table = Table(Domain(domain[:2] + atts), X)

        agg = [Pivot.Functions.Count, Pivot.Functions.Sum]
        pivot = Pivot(self.table, agg, domain[0], domain[1])
        group_tab = pivot.group_table
        pivot.update_group_table(Pivot.Functions)
        self.assert_table_equal(pivot.group_table, table)
        pivot.update_group_table(agg)
        self.assert_table_equal(group_tab, pivot.group_table)
Beispiel #16
0
    def test_group_table_use_cached(self, count_func, sum_func):
        domain = self.table.domain
        pivot = Pivot(self.table, [Pivot.Count, Pivot.Sum], domain[0],
                      domain[1])
        group_tab = pivot.group_table
        count_func.reset_mock()
        sum_func.reset_mock()

        pivot.update_group_table(Pivot.Functions)
        count_func.assert_not_called()
        sum_func.assert_not_called()
        atts = (Cv("(count)"), Cv("d1 (count defined)"),
                Dv("d1 (majority)", ["a", "b"]), Cv("d2 (count defined)"),
                Dv("d2 (majority)",
                   ["c", "d", "e"]), Cv("c1 (count defined)"), Cv("c1 (sum)"),
                Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"),
                Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)"))
        X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25],
                      [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
                      [
                          0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan,
                          np.nan, np.nan, np.nan, np.nan
                      ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25],
                      [
                          1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan,
                          np.nan, np.nan, np.nan
                      ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]])
        self.assert_table_equal(pivot.group_table,
                                Table(Domain(domain[:2] + atts), X))

        pivot.update_group_table([Pivot.Count, Pivot.Sum])
        count_func.assert_not_called()
        sum_func.assert_not_called()
        self.assert_table_equal(pivot.group_table, group_tab)
Beispiel #17
0
    def test_group_table_use_cached(self, count_func, sum_func):
        domain = self.table.domain
        pivot = Pivot(self.table, [Pivot.Count, Pivot.Sum], domain[0],
                      domain[1])
        group_tab = pivot.group_table
        count_func.reset_mock()
        sum_func.reset_mock()

        pivot.update_group_table(Pivot.Functions)
        count_func.assert_not_called()
        sum_func.assert_not_called()
        atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]),
                Cv("d2 (非缺失数目)"), Dv("d2 (最常见)",
                                     ["c", "d", "e"]), Cv("c1 (非缺失数目)"),
                Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"),
                Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"))
        X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25],
                      [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
                      [
                          0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan,
                          np.nan, np.nan, np.nan, np.nan
                      ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25],
                      [
                          1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan,
                          np.nan, np.nan, np.nan
                      ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]])
        self.assert_table_equal(pivot.group_table,
                                Table(Domain(domain[:2] + atts), X))

        pivot.update_group_table([Pivot.Count, Pivot.Sum])
        count_func.assert_not_called()
        sum_func.assert_not_called()
        self.assert_table_equal(pivot.group_table, group_tab)
Beispiel #18
0
    def test_group_table_update(self):
        domain = self.table.domain
        atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]),
                Cv("d2 (非缺失数目)"), Dv("d2 (最常见)",
                                     ["c", "d", "e"]), Cv("c1 (非缺失数目)"),
                Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"),
                Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"))
        X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25],
                      [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
                      [
                          0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan,
                          np.nan, np.nan, np.nan, np.nan
                      ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25],
                      [
                          1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan,
                          np.nan, np.nan, np.nan
                      ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]])
        table = Table(Domain(domain[:2] + atts), X)

        agg = [Pivot.Functions.Count, Pivot.Functions.Sum]
        pivot = Pivot(self.table, agg, domain[0], domain[1])
        group_tab = pivot.group_table
        pivot.update_group_table(Pivot.Functions)
        self.assert_table_equal(pivot.group_table, table)
        pivot.update_group_table(agg)
        self.assert_table_equal(group_tab, pivot.group_table)
Beispiel #19
0
 def test_group_table_1(self):
     var = self.table1.domain.variables[1]
     domain = Domain([
         var,
         Cv("(count)"),
         Cv("c0 (count defined)"),
         Cv("c0 (sum)"),
         Cv("c0 (mean)"),
         Cv("c0 (min)"),
         Cv("c0 (max)"),
         Cv("c0 (mode)"),
         Cv("c0 (median)"),
         Cv("c0 (var)"),
         Cv("d1 (count defined)"),
         Dv("d1 (majority)", ["a", "b"]),
         Cv("c1 (count defined)"),
         Cv("c1 (sum)"),
         Cv("c1 (mean)"),
         Cv("c1 (min)"),
         Cv("c1 (max)"),
         Cv("c1 (mode)"),
         Cv("c1 (median)"),
         Cv("c1 (var)"),
         Cv("d2 (count defined)"),
         Dv("d2 (majority)", ["a", "b"]),
         Cv("c2 (count defined)"),
         Cv("c2 (sum)"),
         Cv("c2 (mean)"),
         Cv("c2 (min)"),
         Cv("c2 (max)"),
         Cv("c2 (mode)"),
         Cv("c2 (median)"),
         Cv("c2 (var)"),
         Cv("cls (count defined)"),
         Dv("cls (majority)", ["a", "b"]),
         Cv("m1 (count defined)"),
         Cv("m2 (count defined)")
     ])
     X = np.array([[
         0, 2, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 2, 0,
         2, 4, 2, 1, 3, 1, 2, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 1
     ],
                   [
                       1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan,
                       np.nan, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, np.nan, 1,
                       3, 3, 3, 3, 3, 3, 0, 1, 0, 1, 1
                   ]])
     pivot = Pivot(self.table1, Pivot.Functions, var)
     group_tab = pivot.group_table
     self.assert_table_equal(group_tab, Table(domain, X))
Beispiel #20
0
 def test_group_table_1(self):
     var = self.table1.domain.variables[1]
     domain = Domain([
         var,
         Cv("(数目)"),
         Cv("c0 (非缺失数目)"),
         Cv("c0 (总和)"),
         Cv("c0 (平均值)"),
         Cv("c0 (最小值)"),
         Cv("c0 (最大值)"),
         Cv("c0 (众数)"),
         Cv("c0 (中位数)"),
         Cv("c0 (方差)"),
         Cv("d1 (非缺失数目)"),
         Dv("d1 (最常见)", ["a", "b"]),
         Cv("c1 (非缺失数目)"),
         Cv("c1 (总和)"),
         Cv("c1 (平均值)"),
         Cv("c1 (最小值)"),
         Cv("c1 (最大值)"),
         Cv("c1 (众数)"),
         Cv("c1 (中位数)"),
         Cv("c1 (方差)"),
         Cv("d2 (非缺失数目)"),
         Dv("d2 (最常见)", ["a", "b"]),
         Cv("c2 (非缺失数目)"),
         Cv("c2 (总和)"),
         Cv("c2 (平均值)"),
         Cv("c2 (最小值)"),
         Cv("c2 (最大值)"),
         Cv("c2 (众数)"),
         Cv("c2 (中位数)"),
         Cv("c2 (方差)"),
         Cv("cls (非缺失数目)"),
         Dv("cls (最常见)", ["a", "b"]),
         Cv("m1 (非缺失数目)"),
         Cv("m2 (非缺失数目)")
     ])
     X = np.array([[
         0, 2, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 2, 0,
         2, 4, 2, 1, 3, 1, 2, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 1
     ],
                   [
                       1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan,
                       np.nan, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, np.nan, 1,
                       3, 3, 3, 3, 3, 3, 0, 1, 0, 1, 1
                   ]])
     pivot = Pivot(self.table1, Pivot.Functions, var)
     group_tab = pivot.group_table
     self.assert_table_equal(group_tab, Table(domain, X))
Beispiel #21
0
    def test_pivot_total(self):
        domain = self.table.domain
        pivot = Pivot(self.table, [Pivot.Functions.Count, Pivot.Functions.Sum],
                      domain[0], domain[1], domain[2])

        atts = (Dv(domain[0].name, ["Total"]),
                Dv("Aggregate", ["Count", "Sum"]), Cv("c"), Cv("d"), Cv("e"))
        X = np.array([[0, 0, 4, 2, 1], [0, 1, 16, 2, 7]])
        table = Table(Domain(atts), X)

        self.assert_table_equal(pivot.pivot_total_h, table)
        table = Table(Domain((Cv("Total"),)), np.array([[3], [7], [4], [18]]))
        self.assert_table_equal(pivot.pivot_total_v, table)

        table = Table(Domain((Cv("Total"),)), np.array([[7], [25]]))
        self.assert_table_equal(pivot.pivot_total, table)
Beispiel #22
0
 def test_pivot_attr_combinations(self):
     domain = self.table1.domain
     for var1, var2, var3 in ((domain[1], domain[3], domain[5]),  # d d d
                              (domain[1], domain[3], domain[4]),  # d d c
                              (domain[1], domain[3], domain[-1]),  # d d s
                              (domain[2], domain[3], domain[5]),  # c d d
                              (domain[2], domain[3], domain[4]),  # c d c
                              (domain[2], domain[3], domain[-1])):  # c d s
         pivot = Pivot(self.table1, Pivot.Functions, var1, var2, var3)
         pivot_tab = pivot.pivot_table
         self.assertGreaterEqual(pivot_tab.X.shape[0], 4)
         self.assertGreaterEqual(pivot_tab.X.shape[1], 4)
     for var1, var2 in ((domain[1], domain[2]),  # d c
                        (domain[1], domain[-2]),  # d s
                        (domain[2], domain[4]),  # c
                        (domain[-1], domain[1])):  # s
         self.assertRaises(TypeError, Pivot, self.table1, var1, var2)
Beispiel #23
0
 def test_pivot_no_col_var(self):
     domain = self.table.domain
     pivot = Pivot(self.table, Pivot.Functions, domain[0], None, domain[2])
     pivot_tab = pivot.pivot_table
     atts = (Dv(
         "Aggregate",
         ["数目", "非缺失数目", "总和", "平均值", "最小值", "最大值", "众数", "中位数", "方差"]),
             Cv("a"), Cv("b"))
     X = np.array([[0, 0, 4, 0], [0, 1, 4, 0], [0, 2, 10, 0],
                   [0, 3, 2.5, np.nan], [0, 4, 1,
                                         np.nan], [0, 5, 4, np.nan],
                   [0, 6, 1, np.nan], [0, 7, 2.5, np.nan],
                   [0, 8, 1.25, np.nan], [1, 0, 0, 4], [1, 1, 0, 3],
                   [1, 2, 0, 18], [1, 3, np.nan, 6], [1, 4, np.nan, 5],
                   [1, 5, np.nan, 7], [1, 6, np.nan, 5], [1, 7, np.nan, 6],
                   [1, 8, np.nan, 2 / 3]])
     self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
Beispiel #24
0
 def test_pivot(self):
     domain = self.table.domain
     pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1],
                   domain[2])
     pivot_tab = pivot.pivot_table
     atts = (Dv(
         "Aggregate",
         ["数目", "非缺失数目", "总和", "平均值", "最小值", "最大值", "众数", "中位数", "方差"]),
             Cv("c"), Cv("d"), Cv("e"))
     X = np.array([[0, 0, 2, 1, 0], [0, 1, 2, 1, 0], [0, 2, 5, 2, 0],
                   [0, 3, 2.5, 2, np.nan], [0, 4, 1, 2, np.nan],
                   [0, 5, 4, 2, np.nan], [0, 6, 1, 2, np.nan],
                   [0, 7, 2.5, 2, np.nan], [0, 8, 2.25, 0, np.nan],
                   [1, 0, 2, 1, 1], [1, 1, 2, 0, 1], [1, 2, 11, 0, 7],
                   [1, 3, 5.5, np.nan, 7], [1, 4, 5, np.nan, 7],
                   [1, 5, 6, np.nan, 7], [1, 6, 5, np.nan, 7],
                   [1, 7, 5.5, np.nan, 7], [1, 8, 0.25, np.nan, 0]])
     self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
Beispiel #25
0
 def test_group_table(self):
     domain = self.table.domain
     pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1])
     group_tab = pivot.group_table
     atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]),
             Cv("d2 (非缺失数目)"), Dv("d2 (最常见)",
                                  ["c", "d", "e"]), Cv("c1 (非缺失数目)"),
             Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"),
             Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"))
     X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25],
                   [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
                   [
                       0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan,
                       np.nan, np.nan, np.nan, np.nan
                   ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25],
                   [
                       1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan,
                       np.nan, np.nan, np.nan
                   ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]])
     self.assert_table_equal(group_tab, Table(Domain(domain[:2] + atts), X))