class SaveTestCase(TestCase): def serve(self, path): return '{0}\n'.format(path) def test_save_map(self): input = range(10) self.job = SaveMapJob().run(input=self.test_server.urls(input)) results = sorted(self.results(self.job)) self.tag = self.disco.results(self.job.name)[1][0] # Previously, each map would save one blob into DDFS. Now, # the pipeline termination does it, using the output of the # shuffle stage. So now, the number of blobs in the tag # depends on the grouping used for shuffle, and also the # number of nodes used. Hence, we cannot anymore assert on # the number of blobs in the tag. # self.assertEquals(len(list(self.ddfs.blobs(self.tag))), len(input)) self.assertEquals(results, [(str_to_bytes(str(e)+'!'), '') for e in input]) def test_save(self): ducks = ['dewey', 'huey', 'louie'] a, b = SaveJob1(), SaveJob2() self.job = JobChain({a: self.test_server.urls(ducks), b: a}) self.job.wait() self.tag = self.disco.results(b)[1][0] self.assertAllEqual(sorted(self.results(b)), [(str_to_bytes('{0}!?!?'.format(d)), '') for d in ducks]) def tearDown(self): super(SaveTestCase, self).tearDown() if hasattr(self, 'tag'): self.ddfs.delete(self.tag)
class DavinChainTestCase(TestCase): def runTest(self): a, b, c = DavinChainJobA(), DavinChainJobA(), DavinChainJobC() self.job = JobChain({a: ['raw://0', 'raw://1', 'raw://2'], b: ['raw://3', 'raw://4', 'raw://5'], c: [a, b]}) self.job.wait() self.assertAllEqual(sorted(self.results(c)), ((str(x), '') for x in range(6)))
class SchemesTestCase(TestCase): animals = ['horse', 'sheep', 'whale', 'tiger'] def serve(self, path): return '\n'.join(self.animals) def test_scheme_disco(self): a, b = SchemesJobA(), SchemesJobB() self.job = JobChain({a: self.test_server.urls([''] * 10), b: a}) self.job.wait() for key, value in self.results(b): self.assert_(key in self.animals) self.assertEquals(value, None)
class AsyncTestCase(TestCase): def sample(self, n): from random import sample return sample(range(n * 10), n * 2) def serve(self, path): return '\n'.join([path] * 10) def runTest(self): N = self.num_workers self.job = JobChain((AsyncJob(), self.test_server.urls(self.sample(N))) for x in range(5)) self.job.wait() for job in self.job: self.assertEquals(sum(1 for result in self.results(job)), N * 20)
class ChainTestCase(TestCase): animals = [b'horse', b'sheep', b'whale', b'tiger'] def serve(self, path): return b'\n'.join(self.animals) def runTest(self): a, b = ChainJobA(), ChainJobB() self.job = JobChain({a: self.test_server.urls([''] * 100), b: a}) self.job.wait() for key, value in self.results(b): self.assert_(key[:5] in self.animals) self.assertEquals(key[5:], b'0-1-') self.assertEquals(value, 1)
class AsyncTestCase(TestCase): def sample(self, n): from random import sample return sample(range(n * 10), n * 2) def serve(self, path): return "\n".join([path] * 10) def runTest(self): N = self.num_workers self.job = JobChain((AsyncJob(), self.test_server.urls(self.sample(N))) for x in range(5)) self.job.wait() for job in self.job: self.assertEquals(sum(1 for result in self.results(job)), N * 20)
def runTest(self): input = self.test_server.urls([''] * 5) a, b, c = WaitJob1(), WaitJob1(), WaitJob2() self.job = JobChain({a: input, b: input, c: input}) self.assertRaises(JobError, self.job.wait) valid = JobChain({a: input, b: input}) self.assertEquals(valid.wait(), valid)
class InputTestCase(TestCase): def serve(self, path): return b'smoothies' def test_empty_map(self): self.job = MapJob().run(input=[]) self.assertResults(self.job, []) def test_empty_reduce(self): self.job = ReduceJob().run(input=[]) self.assertResults(self.job, []) def test_empty_mapreduce(self): self.job = MapReduceJob().run(input=[]) self.assertResults(self.job, []) def test_partitioned_map(self): self.job = MapJob().run(input=['raw://organic_vodka'], partitions=2) self.assertResults(self.job, [('organic_vodka', 'against_me')]) def test_nonpartitioned_map(self): self.job = MapJob().run(input=['raw://organic_vodka'], partitions=None) self.assertResults(self.job, [('organic_vodka', 'against_me')]) def test_nonpartitioned_reduce(self): self.job = ReduceJob().run(input=self.test_server.urls(['test']), partitions=None, reduce_reader=None) self.assertResults(self.job, [(b'smoothies', 'mmm')]) def test_partitioned_mapreduce(self): self.job = MapReduceJob().run(input=self.test_server.urls(['test']), partitions=8, reduce_reader=task_io.chain_reader) self.assertResults(self.job, [((b'smoothies', 'against_me'), 'mmm')]) def test_partitioned_reduce(self): beers = ['sam_adams', 'trader_jose', 'boont_esb'] input = ['raw://{0}'.format(beer) for beer in beers] a, b, c, d = MapJob(), MapJob(), ReduceJob(), MergeReduceJob() self.job = JobChain({a: input, b: input, c: [a, b], d: [a, b]}) self.job.wait() self.assertAllEqual(sorted(self.results(c)), sorted(self.results(d)))
def runTest(self): input = self.test_server.urls([''] * 5) a, b, c = WaitJob1(), WaitJob1(), WaitJob2() self.job = JobChain({a: input, b: input, c: input}) self.assertRaises(JobError, self.job.wait) valid = JobChain({a: input, b:input}) self.assertEquals(valid.wait(), valid)
class SaveTestCase(TestCase): def serve(self, path): return '{0}\n'.format(path) def test_save_map(self): input = range(10) self.job = SaveMapJob().run(input=self.test_server.urls(input)) results = sorted(self.results(self.job)) self.tag = self.disco.results(self.job.name)[1][0] # Previously, each map would save one blob into DDFS. Now, # the pipeline termination does it, using the output of the # shuffle stage. So now, the number of blobs in the tag # depends on the grouping used for shuffle, and also the # number of nodes used. Hence, we cannot anymore assert on # the number of blobs in the tag. # self.assertEquals(len(list(self.ddfs.blobs(self.tag))), len(input)) self.assertEquals(results, [(str_to_bytes(str(e) + '!'), '') for e in input]) def test_save(self): ducks = ['dewey', 'huey', 'louie'] a, b = SaveJob1(), SaveJob2() self.job = JobChain({a: self.test_server.urls(ducks), b: a}) self.job.wait() self.tag = self.disco.results(b)[1][0] self.assertAllEqual(sorted(self.results(b)), [(str_to_bytes('{0}!?!?'.format(d)), '') for d in ducks]) def tearDown(self): super(SaveTestCase, self).tearDown() if hasattr(self, 'tag'): self.ddfs.delete(self.tag)