Python BatchBackend Beispiele, hailtop.pipeline.BatchBackend Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_pipeline.py Projekt: tpoterba/hail

 def setUp(self):
     self.backend = BatchBackend('test')

Beispiel #2

0

Datei anzeigen

Datei: test_pipeline.py Projekt: tmwong2003/hail

 def setUp(self):
     self.backend = BatchBackend(os.environ.get('BATCH_URL'))

Beispiel #3

0

Datei anzeigen

Datei: test_pipeline.py Projekt: tpoterba/hail

class BatchTests(unittest.TestCase):
    def setUp(self):
        self.backend = BatchBackend('test')

    def tearDown(self):
        self.backend.close()

    def pipeline(self):
        return Pipeline(backend=self.backend,
                        default_image='google/cloud-sdk:237.0.0-alpine',
                        attributes={
                            'foo': 'a',
                            'bar': 'b'
                        })

    def test_single_task_no_io(self):
        p = self.pipeline()
        t = p.new_task()
        t.command('echo hello')
        assert p.run().status()['state'] == 'success'

    def test_single_task_input(self):
        p = self.pipeline()
        input = p.read_input(f'{gcs_input_dir}/hello.txt')
        t = p.new_task()
        t.command(f'cat {input}')
        assert p.run().status()['state'] == 'success'

    def test_single_task_input_resource_group(self):
        p = self.pipeline()
        input = p.read_input_group(foo=f'{gcs_input_dir}/hello.txt')
        t = p.new_task()
        t.storage('0.25Gi')
        t.command(f'cat {input.foo}')
        assert p.run().status()['state'] == 'success'

    def test_single_task_output(self):
        p = self.pipeline()
        t = p.new_task(attributes={'a': 'bar', 'b': 'foo'})
        t.command(f'echo hello > {t.ofile}')
        assert p.run().status()['state'] == 'success'

    def test_single_task_write_output(self):
        p = self.pipeline()
        t = p.new_task()
        t.command(f'echo hello > {t.ofile}')
        p.write_output(t.ofile,
                       f'{gcs_output_dir}/test_single_task_output.txt')
        assert p.run().status()['state'] == 'success'

    def test_single_task_resource_group(self):
        p = self.pipeline()
        t = p.new_task()
        t.declare_resource_group(output={'foo': '{root}.foo'})
        t.command(f'echo "hello" > {t.output.foo}')
        assert p.run().status()['state'] == 'success'

    def test_single_task_write_resource_group(self):
        p = self.pipeline()
        t = p.new_task()
        t.declare_resource_group(output={'foo': '{root}.foo'})
        t.command(f'echo "hello" > {t.output.foo}')
        p.write_output(
            t.output,
            f'{gcs_output_dir}/test_single_task_write_resource_group')
        p.write_output(
            t.output.foo,
            f'{gcs_output_dir}/test_single_task_write_resource_group_file.txt')
        assert p.run().status()['state'] == 'success'

    def test_multiple_dependent_tasks(self):
        output_file = f'{gcs_output_dir}/test_multiple_dependent_tasks.txt'
        p = self.pipeline()
        t = p.new_task()
        t.command(f'echo "0" >> {t.ofile}')

        for i in range(1, 3):
            t2 = p.new_task()
            t2.command(f'echo "{i}" > {t2.tmp1}')
            t2.command(f'cat {t.ofile} {t2.tmp1} > {t2.ofile}')
            t = t2

        p.write_output(t.ofile, output_file)
        assert p.run().status()['state'] == 'success'

    def test_specify_cpu(self):
        p = self.pipeline()
        t = p.new_task()
        t.cpu('0.5')
        t.command(f'echo "hello" > {t.ofile}')
        assert p.run().status()['state'] == 'success'

    def test_specify_memory(self):
        p = self.pipeline()
        t = p.new_task()
        t.memory('100M')
        t.command(f'echo "hello" > {t.ofile}')
        assert p.run().status()['state'] == 'success'

    def test_scatter_gather(self):
        p = self.pipeline()

        for i in range(3):
            t = p.new_task(name=f'foo{i}')
            t.command(f'echo "{i}" > {t.ofile}')

        merger = p.new_task()
        merger.command('cat {files} > {ofile}'.format(files=' '.join([
            t.ofile for t in sorted(
                p.select_tasks('foo'), key=lambda x: x.name, reverse=True)
        ]),
                                                      ofile=merger.ofile))

        assert p.run().status()['state'] == 'success'

    def test_file_name_space(self):
        p = self.pipeline()
        input = p.read_input(f'{gcs_input_dir}/hello (foo) spaces.txt')
        t = p.new_task()
        t.command(f'cat {input} > {t.ofile}')
        p.write_output(t.ofile, f'{gcs_output_dir}/hello (foo) spaces.txt')
        assert p.run().status()['state'] == 'success'

    def test_dry_run(self):
        p = self.pipeline()
        t = p.new_task()
        t.command(f'echo hello > {t.ofile}')
        p.write_output(t.ofile,
                       f'{gcs_output_dir}/test_single_task_output.txt')
        p.run(dry_run=True)

    def test_verbose(self):
        p = self.pipeline()
        input = p.read_input(f'{gcs_input_dir}/hello.txt')
        t = p.new_task()
        t.command(f'cat {input}')
        p.write_output(input, f'{gcs_output_dir}/hello.txt')
        assert p.run(verbose=True).status()['state'] == 'success'

    def test_benchmark_lookalike_workflow(self):
        p = self.pipeline()

        setup_tasks = []
        for i in range(10):
            t = p.new_task(f'setup_{i}').cpu(0.1)
            t.command(f'echo "foo" > {t.ofile}')
            setup_tasks.append(t)

        tasks = []
        for i in range(500):
            t = p.new_task(f'create_file_{i}').cpu(0.1)
            t.command(
                f'echo {setup_tasks[i % len(setup_tasks)].ofile} > {t.ofile}')
            t.command(f'echo "bar" >> {t.ofile}')
            tasks.append(t)

        combine = p.new_task(f'combine_output').cpu(0.1)
        for tasks in grouped(arg_max(), tasks):
            combine.command(
                f'cat {" ".join(shq(t.ofile) for t in tasks)} >> {combine.ofile}'
            )
        p.write_output(combine.ofile,
                       f'{gcs_output_dir}/pipeline_benchmark_test.txt')

Beispiel #4

0

Datei anzeigen

Datei: test_pipeline.py Projekt: tmwong2003/hail

class BatchTests(unittest.TestCase):
    def setUp(self):
        self.backend = BatchBackend(os.environ.get('BATCH_URL'))

    def tearDown(self):
        self.backend.close()

    def pipeline(self):
        return Pipeline(backend=self.backend,
                        default_image='google/cloud-sdk:237.0.0-alpine',
                        attributes={'foo': 'a', 'bar': 'b'})

    def test_single_task_no_io(self):
        p = self.pipeline()
        t = p.new_task()
        t.command('echo hello')
        p.run()

    def test_single_task_input(self):
        p = self.pipeline()
        input = p.read_input(f'{gcs_input_dir}/hello.txt')
        t = p.new_task()
        t.command(f'cat {input}')
        p.run()

    def test_single_task_input_resource_group(self):
        p = self.pipeline()
        input = p.read_input_group(foo=f'{gcs_input_dir}/hello.txt')
        t = p.new_task()
        t.storage('0.25Gi')
        t.command(f'cat {input.foo}')
        p.run()

    def test_single_task_output(self):
        p = self.pipeline()
        t = p.new_task(attributes={'a': 'bar', 'b': 'foo'})
        t.command(f'echo hello > {t.ofile}')
        p.run()

    def test_single_task_write_output(self):
        p = self.pipeline()
        t = p.new_task()
        t.command(f'echo hello > {t.ofile}')
        p.write_output(t.ofile, f'{gcs_output_dir}/test_single_task_output.txt')
        p.run()

    def test_single_task_resource_group(self):
        p = self.pipeline()
        t = p.new_task()
        t.declare_resource_group(output={'foo': '{root}.foo'})
        t.command(f'echo "hello" > {t.output.foo}')
        p.run()

    def test_single_task_write_resource_group(self):
        p = self.pipeline()
        t = p.new_task()
        t.declare_resource_group(output={'foo': '{root}.foo'})
        t.command(f'echo "hello" > {t.output.foo}')
        p.write_output(t.output, f'{gcs_output_dir}/test_single_task_write_resource_group')
        p.write_output(t.output.foo, f'{gcs_output_dir}/test_single_task_write_resource_group_file.txt')
        p.run()

    def test_multiple_dependent_tasks(self):
        output_file = f'{gcs_output_dir}/test_multiple_dependent_tasks.txt'
        p = self.pipeline()
        t = p.new_task()
        t.command(f'echo "0" >> {t.ofile}')

        for i in range(1, 3):
            t2 = p.new_task()
            t2.command(f'echo "{i}" > {t2.tmp1}')
            t2.command(f'cat {t.ofile} {t2.tmp1} > {t2.ofile}')
            t = t2

        p.write_output(t.ofile, output_file)
        p.run()

    def test_specify_cpu(self):
        p = self.pipeline()
        t = p.new_task()
        t.cpu('0.5')
        t.command(f'echo "hello" > {t.ofile}')
        p.run()

    def test_specify_memory(self):
        p = self.pipeline()
        t = p.new_task()
        t.memory('100M')
        t.command(f'echo "hello" > {t.ofile}')
        p.run()

    def test_scatter_gather(self):
        p = self.pipeline()

        for i in range(3):
            t = p.new_task(name=f'foo{i}')
            t.command(f'echo "{i}" > {t.ofile}')

        merger = p.new_task()
        merger.command('cat {files} > {ofile}'.format(files=' '.join([t.ofile for t in sorted(p.select_tasks('foo'),
                                                                                              key=lambda x: x.name,
                                                                                              reverse=True)]),
                                                      ofile=merger.ofile))

        p.run()

    def test_file_name_space(self):
        p = self.pipeline()
        input = p.read_input(f'{gcs_input_dir}/hello (foo) spaces.txt')
        t = p.new_task()
        t.command(f'cat {input} > {t.ofile}')
        p.write_output(t.ofile, f'{gcs_output_dir}/hello (foo) spaces.txt')
        p.run()

    def test_dry_run(self):
        p = self.pipeline()
        t = p.new_task()
        t.command(f'echo hello > {t.ofile}')
        p.write_output(t.ofile, f'{gcs_output_dir}/test_single_task_output.txt')
        p.run(dry_run=True)

    def test_verbose(self):
        p = self.pipeline()
        input = p.read_input(f'{gcs_input_dir}/hello.txt')
        t = p.new_task()
        t.command(f'cat {input}')
        p.write_output(input, f'{gcs_output_dir}/hello.txt')
        p.run(verbose=True)

    def test_failed_job_error_msg(self):
        with self.assertRaises(PipelineException):
            p = self.pipeline()
            t = p.new_task()
            t.command('false')
            p.run()