Esempio n. 1
0
 def test_distinct(self):
     for start, stop, step in self.TEST_RANGES:
         l = range(start, stop, step)
         rdd = RDD(list(l), self.SPARK_CONTEXT)
         rdd = rdd.map(RDDTests.return_one)
         rdd = rdd.distinct()
         if len(l) > 0:
             self.assertEquals(rdd.collect(), [1])
         else:
             self.assertEquals(rdd.collect(), [])
Esempio n. 2
0
 def test_distinct(self):
     for start, stop, step in self.TEST_RANGES:
         l = range(start, stop, step)
         rdd = RDD(list(l), self.SPARK_CONTEXT)
         rdd = rdd.map(RDDTests.return_one)
         rdd = rdd.distinct()
         if len(l) > 0:
             self.assertEquals(rdd.collect(), [1])
         else:
             self.assertEquals(rdd.collect(), [])
Esempio n. 3
0
 def test_filter(self):
     for start, stop, step in self.TEST_RANGES:
         l1 = range(start, stop, step)
         l2 = filter(RDDTests.is_square, l1)
         rdd = RDD(list(l1), self.SPARK_CONTEXT)
         rdd = rdd.filter(RDDTests.is_square)
         self.assertEquals(rdd.collect(), list(l2))
Esempio n. 4
0
    def test_init(self):
        for start, stop, step in self.TEST_RANGES:
            l = list(range(start, stop, step))
            rdd = RDD(l, self.SPARK_CONTEXT)
            self.assertEquals(l, rdd.collect())

            s = set(range(100))
            rdd = RDD(s, self.SPARK_CONTEXT)
            self.assertEquals(sorted(list(s)), sorted(rdd.collect()))

        t = (1, 2, 3)
        with self.assertRaises(AttributeError):
            RDD(t, self.SPARK_CONTEXT)

        with self.assertRaises(AttributeError):
            RDD('', self.SPARK_CONTEXT)
Esempio n. 5
0
    def test_init(self):
        for start, stop, step in self.TEST_RANGES:
            l = list(range(start, stop, step))
            rdd = RDD(l, self.SPARK_CONTEXT)
            self.assertEquals(l, rdd.collect())

            s = set(range(100))
            rdd = RDD(s, self.SPARK_CONTEXT)
            self.assertEquals(sorted(list(s)), sorted(rdd.collect()))

        t = (1, 2, 3)
        with self.assertRaises(AttributeError):
            RDD(t, self.SPARK_CONTEXT)

        with self.assertRaises(AttributeError):
            RDD('', self.SPARK_CONTEXT)
Esempio n. 6
0
 def test_filter(self):
     for start, stop, step in self.TEST_RANGES:
         l1 = range(start, stop, step)
         l2 = filter(RDDTests.is_square, l1)
         rdd = RDD(list(l1), self.SPARK_CONTEXT)
         rdd = rdd.filter(RDDTests.is_square)
         self.assertEquals(rdd.collect(), list(l2))
Esempio n. 7
0
 def test_group_by_key(self):
     l = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
     rdd = RDD(l, self.SPARK_CONTEXT)
     rdd = rdd.groupByKey()
     r = rdd.collect()
     r = [(kv[0], list(kv[1])) for kv in r]
     self.assertEquals(sorted(r), sorted([(1, [1]), (2, [1, 2]), (3, [1, 2, 3])]))
Esempio n. 8
0
 def test_group_by_key(self):
     l = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
     rdd = RDD(l, self.SPARK_CONTEXT)
     rdd = rdd.groupByKey()
     r = rdd.collect()
     r = [(kv[0], list(kv[1])) for kv in r]
     self.assertEquals(sorted(r), sorted([(1, [1]), (2, [1, 2]), (3, [1, 2, 3])]))
Esempio n. 9
0
    def test_reduce_by_key(self):
        l = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
        rdd = RDD(l, self.SPARK_CONTEXT)
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        print(rdd)

        self.assertEquals(sorted(rdd.collect()), sorted([(1, 1), (2, 3), (3, 6)]))
Esempio n. 10
0
 def test_flat_map(self):
     for start, stop, step in self.TEST_RANGES:
         l1 = range(start, stop, step)
         l2 = map(RDDTests.triplicate, l1)
         l3 = []
         for sl in l2:
             l3.extend(sl)
         rdd = RDD(list(l1), self.SPARK_CONTEXT)
         rdd = rdd.flatMap(RDDTests.triplicate)
         self.assertEquals(rdd.collect(), list(l3))
Esempio n. 11
0
 def test_flat_map(self):
     for start, stop, step in self.TEST_RANGES:
         l1 = range(start, stop, step)
         l2 = map(RDDTests.triplicate, l1)
         l3 = []
         for sl in l2:
             l3.extend(sl)
         rdd = RDD(list(l1), self.SPARK_CONTEXT)
         rdd = rdd.flatMap(RDDTests.triplicate)
         self.assertEquals(rdd.collect(), list(l3))
Esempio n. 12
0
 def test_reduce_by_key(self):
     l = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
     rdd = RDD(l, self.SPARK_CONTEXT)
     rdd = rdd.reduceByKey(lambda a, b: a + b)
     self.assertEquals(sorted(rdd.collect()), sorted([(1, 1), (2, 3), (3, 6)]))