Beispiel #1
0
    def test_combineByKey(self):
        kvs = [('a', 1), ('b', 3), ('a', 5), ('b', 3), ('a', 1)]

        # Want to ensure that both value and combiner merging is performed
        # It's possible to do everything with only value merging, but if we did
        # we'd miss bugs in the merge combiner
        used_merge_value = False
        used_merge_combiners = False

        def create_combiner(v):
            return {v}

        def merge_value(v, x):
            nonlocal used_merge_value
            used_merge_value = True
            v.add(x)
            return v

        def merge_combiners(a, b):
            nonlocal used_merge_combiners
            used_merge_combiners = True
            a.update(b)
            return a

        rdd = MockRDD.from_seq(kvs).combineByKey(create_combiner, merge_value,
                                                 merge_combiners)
        results = list(rdd)
        self.assertEqual(len(results), 2)
        results = dict(results)
        self.assertEqual(results, {'a': {1, 5}, 'b': {3}})
        self.assertTrue(used_merge_value)
        self.assertTrue(used_merge_combiners)
Beispiel #2
0
 def test_distinct(self):
     xs = [1, 2, 5, 1, 6, 8, 2, 90]
     d = set(xs)
     rdd = MockRDD.from_seq(xs).distinct()
     results = list(rdd)
     self.assertEqual(len(results), len(d))
     self.assertEqual(set(results), d)
Beispiel #3
0
    def test_mapPartitions(self):
        x = [3, 's', None]

        def func(seq):
            assert list(seq) == x
            yield 'result'

        rdd = MockRDD(identity, x).mapPartitions(func)
        self.assertEqual(list(rdd), ['result'])
Beispiel #4
0
    def test_mapPartitionsWithIndex(self):
        x = [1, 2, 3]

        def func(index, seq):
            assert list(seq) == x
            yield 'value'

        rdd = MockRDD(identity, x).mapPartitionsWithIndex(func)
        self.assertEqual(list(rdd), ['value'])
Beispiel #5
0
    def test_groupByKey(self):
        kvs = [('a', 1), ('b', 3), ('a', 5), ('b', 3)]

        rdd = MockRDD.from_seq(kvs).groupByKey()
        results = list(rdd)
        self.assertEqual(len(results), 2)
        results = dict(results)
        self.assertEqual(set(results), {'a', 'b'})
        self.assertEqual(set(results['a']), {1, 5})
        self.assertEqual(results['b'], [3, 3])
Beispiel #6
0
    def test_flatMap(self):
        x = [1, 2, 3]

        def func(el):
            assert el in x
            for i in range(el):
                yield i

        rdd = MockRDD.from_seq(x).flatMap(func)
        self.assertEqual(list(rdd), [0, 0, 1, 0, 1, 2])
Beispiel #7
0
    def test_flatMapValues(self):
        x = [('a', 1), ('b', 3)]

        def func(el):
            self.assertIn(el, (1, 3))
            for i in range(1, el):
                yield i

        rdd = MockRDD.from_seq(x).flatMapValues(func)
        self.assertEqual(list(rdd), [('b', 1), ('b', 2)])
Beispiel #8
0
    def test_aggregate(self):
        zeroValue = set()

        def seqO(c, x):
            c.add(x)
            return c

        did_combine = False

        def combOp(a, b):
            nonlocal did_combine
            did_combine = True
            a.update(b)
            return a

        x = [1, 2, 3, 5, 2, 8, 3]
        result = MockRDD.from_seq(x).aggregate(zeroValue, seqO, combOp)
        self.assertEqual(result, set(x))
        self.assertTrue(did_combine)
Beispiel #9
0
 def test_union(self):
     x = [1, 2]
     y = [7, 8]
     rdd = MockRDD.from_seq(x).union(MockRDD.from_seq(y))
     self.assertEqual(list(rdd), x + y)
Beispiel #10
0
 def test_reduce(self):
     x = [1, 2, 3, 5]
     result = MockRDD.from_seq(x).reduce(lambda a, b: a + b)
     self.assertEqual(result, sum(x))
Beispiel #11
0
 def test_fold(self):
     x = [1, 2, 3, 5]
     i = 7
     result = MockRDD.from_seq(x).fold(i, lambda a, b: a + b)
     self.assertEqual(result, i + sum(x))
Beispiel #12
0
 def test_only_one_pass(self):
     rdd = MockRDD(identity, iter(range(5)))
     self.assertEqual(rdd.collect(), list(range(5)))
     with self.assertWarns(Warning):
         self.assertEqual(rdd.collect(), [])
Beispiel #13
0
 def test_collectAsMap(self):
     self.assertEqual(MockRDD(identity, [(1, 2)]).collectAsMap(), {1: 2})
Beispiel #14
0
 def test_collect(self):
     self.assertEqual(MockRDD(identity, [1]).collect(), [1])
Beispiel #15
0
 def test_persist(self):
     rdd = MockRDD(identity, iter(range(5))).persist()
     for _ in range(10):
         self.assertEqual(rdd.collect(), list(range(5)))
Beispiel #16
0
 def test_sum(self):
     x = [1, 5, 2]
     self.assertEqual(MockRDD.from_seq(x).sum(), 8)
Beispiel #17
0
 def test_countByValue(self):
     x = [1, 3, 1]
     self.assertEqual(MockRDD.from_seq(x).countByValue(), {1: 2, 3: 1})
Beispiel #18
0
 def test_cogroup_match(self):
     zero = MockRDD.of(('k', 0)).persist()
     one = MockRDD.of(('k', 1)).persist()
     self.assertEqual(zero.cogroup(one).collect(), [('k', ([0], [1]))])
     self.assertEqual(one.cogroup(zero).collect(), [('k', ([1], [0]))])
Beispiel #19
0
 def test_cogroup_empty(self):
     self.assertEqual(MockRDD.empty().cogroup(MockRDD.empty()).count(), 0)
Beispiel #20
0
    def test_mapValues(self):
        x = [('a', 1), ('b', 3)]

        rdd = MockRDD.from_seq(x).mapValues(lambda a: a + 1)
        self.assertEqual(list(rdd), [('a', 2), ('b', 4)])
Beispiel #21
0
    def test_map(self):
        x = [1, 2, 3, 5, 8]

        rdd = MockRDD(identity, x).map(lambda a: 2 * a)
        self.assertEqual(list(rdd), [2, 4, 6, 10, 16])
Beispiel #22
0
    def test_filter(self):
        x = [1, 2, 3, 4, 5]

        rdd = MockRDD(identity, x).filter(lambda a: a % 2 == 0)
        self.assertEqual(list(rdd), [2, 4])
Beispiel #23
0
 def test_cogroup_only_one(self):
     empty = MockRDD.empty().persist()
     one = MockRDD.of(('k', 1)).persist()
     self.assertEqual(empty.cogroup(one).collect(), [('k', ([], [1]))])
     self.assertEqual(one.cogroup(empty).collect(), [('k', ([1], []))])
Beispiel #24
0
 def test_keys(self):
     kvs = [(1, 2), (3, 4)]
     rdd = MockRDD.from_seq(kvs).keys()
     results = list(rdd)
     self.assertEqual(results, [1, 3])
Beispiel #25
0
 def test_min(self):
     x = [1, 2, 3, 5, 2, 8, 3]
     self.assertEqual(MockRDD.from_seq(x).min(), 1)
Beispiel #26
0
 def test_values(self):
     kvs = [(1, 2), (3, 4)]
     rdd = MockRDD.from_seq(kvs).values()
     results = list(rdd)
     self.assertEqual(results, [2, 4])
Beispiel #27
0
 def test_count(self):
     x = [1, 2, 1]
     self.assertEqual(MockRDD.from_seq(x).count(), 3)
Beispiel #28
0
 def test_keyBy(self):
     xs = [1, 2]
     rdd = MockRDD.from_seq(xs).keyBy(lambda x: x % 2)
     results = list(rdd)
     self.assertEqual(results, [(1, 1), (0, 2)])
Beispiel #29
0
 def test_countByKey(self):
     x = [(1, 'a'), (3, 'a'), (1, 'b')]
     self.assertEqual(MockRDD.from_seq(x).countByKey(), {1: 2, 3: 1})
Beispiel #30
0
 def test_multiple(self):
     x = [1, 3, '5', (), 102]
     self.assertEqual(list(MockRDD(identity, x)), x)