def test_seriesStatByIndex(self): dataLocal = [((1,), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) assert_true(array_equal(data.seriesStatByIndex('sum').values().first(), array([6, 22, 38]))) assert_true(array_equal(data.seriesStatByIndex('mean').values().first(), array([1.5, 5.5, 9.5]))) assert_true(array_equal(data.seriesStatByIndex('min').values().first(), array([0, 4, 8]))) assert_true(array_equal(data.seriesStatByIndex('max').values().first(), array([3, 7, 11]))) assert_true(array_equal(data.seriesStatByIndex('count').values().first(), array([4, 4, 4]))) assert_true(array_equal(data.seriesStatByIndex('median').values().first(), array([1.5, 5.5, 9.5]))) assert_true(array_equal(data.seriesSumByIndex().values().first(), array([6, 22, 38]))) assert_true(array_equal(data.seriesMeanByIndex().values().first(), array([1.5, 5.5, 9.5]))) assert_true(array_equal(data.seriesMinByIndex().values().first(), array([0, 4, 8]))) assert_true(array_equal(data.seriesMaxByIndex().values().first(), array([3, 7, 11]))) assert_true(array_equal(data.seriesCountByIndex().values().first(), array([4, 4, 4]))) assert_true(array_equal(data.seriesMedianByIndex().values().first(), array([1.5, 5.5, 9.5]))) index = [ [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3] ] data.index = array(index).T result = data.seriesStatByIndex('sum', level=[0, 1]) assert_true(array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]]))) result = data.seriesSumByIndex(level=[0, 1]) assert_true(array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))