def test_return_none(self): input = ["hello hello world"] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd, 3) expected = self.sc.parallelize([]) self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
def test_same_value(self): input = ["hello hello world world"] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd) expected = self.sc.parallelize([('hello', 2), ('world', 2)]) assert self.assertRDDEquals(expected, result) == True
def test_select_filter_by_count_distinct(self): input = ["hello hello world world"] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd, 2) expected = self.sc.parallelize(["hello", "world"]) self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
def test_order_by_value(self): input = ["hello world world"] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd) expected = self.sc.parallelize([('world', 2), ('hello',1)]) self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
def test_order_by_key(self): """Test a parallelize & collect.""" input = ["hello world"] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd) expected = self.sc.parallelize([('world', 1), ('hello', 1)]) assert self.assertRDDEquals(expected, result) == True
def test_select_filter_number(self): input = [('apple', 1), ('banana', 5), ('mac', 2), ('ipad', 3)] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd, 5) expected = self.sc.parallelize(['banana']) self.assertTrue(self.assertRDDEquals(expected, result))
def test_select_without_dup(self): input = [('apple', 'fruit'), ('banana', 'fruit'), ('mac', '3c'), ('ipad', '3c')] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd, 'fruit') expected = self.sc.parallelize(['apple', 'banana']) self.assertTrue(self.assertRDDEquals(expected, result))
def test_select_filter_by_string_3(self): input = [(u'Some1', (u'ABC', 9989)), (u'Some2', (u'XYZ', 235)), (u'Some3', (u'BBB', 5379)), (u'Some4', (u'ABC', 5379))] keyword = 'XYZ' rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd, keyword) expected = self.sc.parallelize([(u'Some2', (u'XYZ', 235))]) self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
def test_select_filter_by_string_2(self): input = [(u'Some1', (u'ABC', 9989)), (u'Some2', (u'XYZ', 235)), (u'Some3', (u'BBB', 5379)), (u'Some4', (u'ABC', 5379))] keyword = 'QQ' rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd, keyword) print(result) expected = None self.assertEquals(expected, result)
def test_count_by_category(self): """Test a parallelize & collect.""" input = [('apple', 'fruit'), ('apple', 'fruit'), ('banana', 'fruit'), ('mac', '3c'), ('ipad', '3c'), ('ipad', '3c'), ('ipad', '3c')] rdd = self.sc.parallelize(input) timeout_ = timeout(answer, 5) result = timeout_(rdd) expected = self.sc.parallelize([('3c', 'ipad', 3), ('3c', 'mac', 1), ('fruit', 'apple', 2), ('fruit', 'banana', 1)]) self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
def test_basic_join(self): inputA = [('fruit', 'apple'), ('fruit', 'apple'), ('fruit', 'banana'), ('3c', 'mac')] inputB = [('apple', 5), ('banana', 3), ('kiwi', 10)] rddA = self.sc.parallelize(inputA) rddB = self.sc.parallelize(inputB) timeout_ = timeout(answer, 5) result = timeout_(rddA, rddB) expected = self.sc.parallelize([('apple', ('fruit', 5)), ('apple', ('fruit', 5)), ('banana', ('fruit', 3))]) self.assertTrue(self.assertRDDEquals(expected, result))