コード例 #1
0
ファイル: test_save.py プロジェクト: JohnEmhoff/disco
class SaveTestCase(TestCase):
    def serve(self, path):
        return '{0}\n'.format(path)

    def test_save_map(self):
        input = range(10)
        self.job = SaveMapJob().run(input=self.test_server.urls(input))
        results = sorted(self.results(self.job))
        self.tag = self.disco.results(self.job.name)[1][0]

        # Previously, each map would save one blob into DDFS.  Now,
        # the pipeline termination does it, using the output of the
        # shuffle stage.  So now, the number of blobs in the tag
        # depends on the grouping used for shuffle, and also the
        # number of nodes used.  Hence, we cannot anymore assert on
        # the number of blobs in the tag.

        # self.assertEquals(len(list(self.ddfs.blobs(self.tag))), len(input))

        self.assertEquals(results, [(str_to_bytes(str(e)+'!'), '') for e in input])

    def test_save(self):
        ducks = ['dewey', 'huey', 'louie']
        a, b = SaveJob1(), SaveJob2()
        self.job = JobChain({a: self.test_server.urls(ducks),
                             b: a})
        self.job.wait()
        self.tag = self.disco.results(b)[1][0]
        self.assertAllEqual(sorted(self.results(b)),
                            [(str_to_bytes('{0}!?!?'.format(d)), '') for d in ducks])

    def tearDown(self):
        super(SaveTestCase, self).tearDown()
        if hasattr(self, 'tag'):
            self.ddfs.delete(self.tag)
コード例 #2
0
class DavinChainTestCase(TestCase):
    def runTest(self):
        a, b, c = DavinChainJobA(), DavinChainJobA(), DavinChainJobC()
        self.job = JobChain({a: ['raw://0', 'raw://1', 'raw://2'],
                             b: ['raw://3', 'raw://4', 'raw://5'],
                             c: [a, b]})
        self.job.wait()
        self.assertAllEqual(sorted(self.results(c)),
                            ((str(x), '') for x in range(6)))
コード例 #3
0
class SchemesTestCase(TestCase):
    animals = ['horse', 'sheep', 'whale', 'tiger']

    def serve(self, path):
        return '\n'.join(self.animals)

    def test_scheme_disco(self):
        a, b = SchemesJobA(), SchemesJobB()
        self.job = JobChain({a: self.test_server.urls([''] * 10), b: a})
        self.job.wait()
        for key, value in self.results(b):
            self.assert_(key in self.animals)
            self.assertEquals(value, None)
コード例 #4
0
class AsyncTestCase(TestCase):
    def sample(self, n):
        from random import sample
        return sample(range(n * 10), n * 2)

    def serve(self, path):
        return '\n'.join([path] * 10)

    def runTest(self):
        N = self.num_workers
        self.job = JobChain((AsyncJob(), self.test_server.urls(self.sample(N)))
                            for x in range(5))
        self.job.wait()
        for job in self.job:
            self.assertEquals(sum(1 for result in self.results(job)), N * 20)
コード例 #5
0
class ChainTestCase(TestCase):
    animals = [b'horse', b'sheep', b'whale', b'tiger']

    def serve(self, path):
        return b'\n'.join(self.animals)

    def runTest(self):
        a, b = ChainJobA(), ChainJobB()
        self.job = JobChain({a: self.test_server.urls([''] * 100),
                             b: a})
        self.job.wait()
        for key, value in self.results(b):
            self.assert_(key[:5] in self.animals)
            self.assertEquals(key[5:], b'0-1-')
            self.assertEquals(value, 1)
コード例 #6
0
ファイル: test_async.py プロジェクト: nicolasramy/disco
class AsyncTestCase(TestCase):
    def sample(self, n):
        from random import sample

        return sample(range(n * 10), n * 2)

    def serve(self, path):
        return "\n".join([path] * 10)

    def runTest(self):
        N = self.num_workers
        self.job = JobChain((AsyncJob(), self.test_server.urls(self.sample(N))) for x in range(5))
        self.job.wait()
        for job in self.job:
            self.assertEquals(sum(1 for result in self.results(job)), N * 20)
コード例 #7
0
 def runTest(self):
     input = self.test_server.urls([''] * 5)
     a, b, c = WaitJob1(), WaitJob1(), WaitJob2()
     self.job = JobChain({a: input, b: input, c: input})
     self.assertRaises(JobError, self.job.wait)
     valid = JobChain({a: input, b: input})
     self.assertEquals(valid.wait(), valid)
コード例 #8
0
class InputTestCase(TestCase):
    def serve(self, path):
        return b'smoothies'

    def test_empty_map(self):
        self.job = MapJob().run(input=[])
        self.assertResults(self.job, [])

    def test_empty_reduce(self):
        self.job = ReduceJob().run(input=[])
        self.assertResults(self.job, [])

    def test_empty_mapreduce(self):
        self.job = MapReduceJob().run(input=[])
        self.assertResults(self.job, [])

    def test_partitioned_map(self):
        self.job = MapJob().run(input=['raw://organic_vodka'], partitions=2)
        self.assertResults(self.job, [('organic_vodka', 'against_me')])

    def test_nonpartitioned_map(self):
        self.job = MapJob().run(input=['raw://organic_vodka'], partitions=None)
        self.assertResults(self.job, [('organic_vodka', 'against_me')])

    def test_nonpartitioned_reduce(self):
        self.job = ReduceJob().run(input=self.test_server.urls(['test']),
                                   partitions=None,
                                   reduce_reader=None)
        self.assertResults(self.job, [(b'smoothies', 'mmm')])

    def test_partitioned_mapreduce(self):
        self.job = MapReduceJob().run(input=self.test_server.urls(['test']),
                                      partitions=8,
                                      reduce_reader=task_io.chain_reader)
        self.assertResults(self.job, [((b'smoothies', 'against_me'), 'mmm')])

    def test_partitioned_reduce(self):
        beers = ['sam_adams', 'trader_jose', 'boont_esb']
        input = ['raw://{0}'.format(beer) for beer in beers]
        a, b, c, d = MapJob(), MapJob(), ReduceJob(), MergeReduceJob()
        self.job = JobChain({a: input,
                             b: input,
                             c: [a, b],
                             d: [a, b]})
        self.job.wait()
        self.assertAllEqual(sorted(self.results(c)), sorted(self.results(d)))
コード例 #9
0
ファイル: test_waitmany.py プロジェクト: Dieterbe/disco
 def runTest(self):
     input = self.test_server.urls([''] * 5)
     a, b, c = WaitJob1(), WaitJob1(), WaitJob2()
     self.job = JobChain({a: input,
                          b: input,
                          c: input})
     self.assertRaises(JobError, self.job.wait)
     valid = JobChain({a: input, b:input})
     self.assertEquals(valid.wait(), valid)
コード例 #10
0
ファイル: test_save.py プロジェクト: sajal/disco
class SaveTestCase(TestCase):
    def serve(self, path):
        return '{0}\n'.format(path)

    def test_save_map(self):
        input = range(10)
        self.job = SaveMapJob().run(input=self.test_server.urls(input))
        results = sorted(self.results(self.job))
        self.tag = self.disco.results(self.job.name)[1][0]

        # Previously, each map would save one blob into DDFS.  Now,
        # the pipeline termination does it, using the output of the
        # shuffle stage.  So now, the number of blobs in the tag
        # depends on the grouping used for shuffle, and also the
        # number of nodes used.  Hence, we cannot anymore assert on
        # the number of blobs in the tag.

        # self.assertEquals(len(list(self.ddfs.blobs(self.tag))), len(input))

        self.assertEquals(results,
                          [(str_to_bytes(str(e) + '!'), '') for e in input])

    def test_save(self):
        ducks = ['dewey', 'huey', 'louie']
        a, b = SaveJob1(), SaveJob2()
        self.job = JobChain({a: self.test_server.urls(ducks), b: a})
        self.job.wait()
        self.tag = self.disco.results(b)[1][0]
        self.assertAllEqual(sorted(self.results(b)),
                            [(str_to_bytes('{0}!?!?'.format(d)), '')
                             for d in ducks])

    def tearDown(self):
        super(SaveTestCase, self).tearDown()
        if hasattr(self, 'tag'):
            self.ddfs.delete(self.tag)