예제 #1
0
    def test_seriesStatByIndex(self):
        dataLocal = [((1,), arange(12))]
        index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
        data = Series(self.sc.parallelize(dataLocal), index=index)

        assert_true(array_equal(data.seriesStatByIndex('sum').values().first(), array([6, 22, 38])))
        assert_true(array_equal(data.seriesStatByIndex('mean').values().first(), array([1.5, 5.5, 9.5])))
        assert_true(array_equal(data.seriesStatByIndex('min').values().first(), array([0, 4, 8])))
        assert_true(array_equal(data.seriesStatByIndex('max').values().first(), array([3, 7, 11])))
        assert_true(array_equal(data.seriesStatByIndex('count').values().first(), array([4, 4, 4])))
        assert_true(array_equal(data.seriesStatByIndex('median').values().first(), array([1.5, 5.5, 9.5])))

        assert_true(array_equal(data.seriesSumByIndex().values().first(), array([6, 22, 38])))
        assert_true(array_equal(data.seriesMeanByIndex().values().first(), array([1.5, 5.5, 9.5])))
        assert_true(array_equal(data.seriesMinByIndex().values().first(), array([0, 4, 8])))
        assert_true(array_equal(data.seriesMaxByIndex().values().first(), array([3, 7, 11])))
        assert_true(array_equal(data.seriesCountByIndex().values().first(), array([4, 4, 4])))
        assert_true(array_equal(data.seriesMedianByIndex().values().first(), array([1.5, 5.5, 9.5])))

        index = [
            [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
            [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
            [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3]
        ]
        data.index = array(index).T

        result = data.seriesStatByIndex('sum', level=[0, 1])
        assert_true(array_equal(result.values().first(), array([1, 14, 13, 38])))
        assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))

        result = data.seriesSumByIndex(level=[0, 1])
        assert_true(array_equal(result.values().first(), array([1, 14, 13, 38])))
        assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))
예제 #2
0
    def test_index_setter_getter(self):
        dataLocal = [
            ((1,), array([1.0, 2.0, 3.0])),
            ((2,), array([2.0, 2.0, 4.0])),
            ((3,), array([4.0, 2.0, 1.0]))
        ]
        data = Series(self.sc.parallelize(dataLocal))

        assert_true(array_equal(data.index, array([0, 1, 2])))
        data.index = [3, 2, 1]
        assert_true(data.index == [3, 2, 1])

        def setIndex(data, idx):
            data.index = idx

        assert_raises(ValueError, setIndex, data, 5)
        assert_raises(ValueError, setIndex, data, [1, 2])
예제 #3
0
    def test_seriesAggregateByIndex(self):
        dataLocal = [((1,), arange(12))]
        index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
        data = Series(self.sc.parallelize(dataLocal), index=index)

        result = data.seriesAggregateByIndex(sum)
        print result.values().first()
        assert_true(array_equal(result.values().first(), array([6, 22, 38])))
        assert_true(array_equal(result.index, array([0, 1, 2])))

        index = [
            [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
            [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
            [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3]
        ]
        data.index = array(index).T
        
        result = data.seriesAggregateByIndex(sum, level=[0, 1])
        assert_true(array_equal(result.values().first(), array([1, 14, 13, 38])))
        assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))
예제 #4
0
    def test_selectByIndex(self):
        dataLocal = [((1,), arange(12))]
        index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
        data = Series(self.sc.parallelize(dataLocal), index=index)

        result = data.selectByIndex(1)
        assert_true(array_equal(result.values().first(), array([4, 5, 6, 7])))
        assert_true(array_equal(result.index, array([1, 1, 1, 1])))

        result = data.selectByIndex(1, squeeze=True)
        assert_true(array_equal(result.index, array([0, 1, 2, 3])))

        index = [
            [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
            [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
            [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3]
        ]
        data.index = array(index).T

        result, mask = data.selectByIndex(0, level=2, returnMask=True)
        assert_true(array_equal(result.values().first(), array([0, 2, 6, 8])))
        assert_true(array_equal(result.index, array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]])))
        assert_true(array_equal(mask, array([1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0])))

        result = data.selectByIndex(0, level=2, squeeze=True)
        assert_true(array_equal(result.values().first(), array([0, 2, 6, 8])))
        assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))

        result = data.selectByIndex([1, 0], level=[0, 1])
        assert_true(array_equal(result.values().first(), array([6, 7])))
        assert_true(array_equal(result.index, array([[1, 0, 0], [1, 0, 1]])))

        result = data.selectByIndex(val=[0, [2,3]], level=[0, 2])
        assert_true(array_equal(result.values().first(), array([4, 5])))
        assert_true(array_equal(result.index, array([[0, 1, 2], [0, 1, 3]])))

        result = data.selectByIndex(1, level=1, filter=True)
        assert_true(array_equal(result.values().first(), array([0, 1, 6, 7])))
        assert_true(array_equal(result.index, array([[0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 1]])))