Beispiel #1
0
    def test_batch(self):
        b = self.client.create_batch()
        j1 = b.create_job('ubuntu:18.04', ['false'])
        j2 = b.create_job('ubuntu:18.04', ['sleep', '1'])
        j3 = b.create_job('ubuntu:18.04', ['sleep', '30'])
        b = b.submit()

        j1.wait()
        j2.wait()
        b.cancel()
        bstatus = b.wait()

        assert len(bstatus['jobs']) == 3, bstatus
        state_count = collections.Counter(
            [j['state'] for j in bstatus['jobs']])
        n_cancelled = state_count['Cancelled']
        n_complete = state_count['Error'] + state_count[
            'Failed'] + state_count['Success']
        assert n_cancelled <= 1, bstatus
        assert n_cancelled + n_complete == 3, bstatus

        n_failed = sum([
            Job._get_exit_code(j, 'main') > 0 for j in bstatus['jobs']
            if j['state'] in ('Failed', 'Error')
        ])
        assert n_failed == 1, bstatus
Beispiel #2
0
def test_missing_parent_is_400(client):
    try:
        batch = client.create_batch()
        fake_job = aioclient.Job.unsubmitted_job(batch._async_builder, 10000)
        fake_job = Job.from_async_job(fake_job)
        batch.create_job('ubuntu:18.04', command=['echo', 'head'], parents=[fake_job])
        batch.submit()
    except ValueError as err:
        assert re.search('parents with invalid job ids', str(err))
        return
    assert False
Beispiel #3
0
    def _run(self, pipeline, dry_run, verbose, delete_scratch_on_exit):  # pylint: disable-msg=R0915
        start = time.time()

        bucket = self._batch_client.bucket
        subdir_name = 'pipeline-{}'.format(uuid.uuid4().hex[:12])

        remote_tmpdir = f'gs://{bucket}/pipeline/{subdir_name}'
        local_tmpdir = f'/io/pipeline/{subdir_name}'

        default_image = 'ubuntu:latest'

        attributes = pipeline.attributes
        if pipeline.name is not None:
            attributes['name'] = pipeline.name

        batch = self._batch_client.create_batch(attributes=attributes)

        n_jobs_submitted = 0
        used_remote_tmpdir = False

        task_to_job_mapping = {}
        jobs_to_command = {}
        commands = []

        bash_flags = 'set -e' + ('x' if verbose else '') + '; '

        activate_service_account = 'gcloud -q auth activate-service-account ' \
                                   '--key-file=/gsa-key/privateKeyData'

        def copy_input(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, r._get_path(local_tmpdir))]
            assert isinstance(r, TaskResourceFile)
            return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))]

        def copy_internal_output(r):
            assert isinstance(r, TaskResourceFile)
            return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))]

        def copy_external_output(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, dest) for dest in r._output_paths]
            assert isinstance(r, TaskResourceFile)
            return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths]

        write_external_inputs = [x for r in pipeline._input_resources for x in copy_external_output(r)]
        if write_external_inputs:
            def _cp(src, dst):
                return f'gsutil -m cp -R {src} {dst}'

            write_cmd = bash_flags + activate_service_account + ' && ' + \
                ' && '.join([_cp(*files) for files in write_external_inputs])

            if dry_run:
                commands.append(write_cmd)
            else:
                j = batch.create_job(image='google/cloud-sdk:237.0.0-alpine',
                                     command=['/bin/bash', '-c', write_cmd],
                                     attributes={'name': 'write_external_inputs'})
                jobs_to_command[j] = write_cmd
                n_jobs_submitted += 1

        for task in pipeline._tasks:
            inputs = [x for r in task._inputs for x in copy_input(r)]

            outputs = [x for r in task._internal_outputs for x in copy_internal_output(r)]
            if outputs:
                used_remote_tmpdir = True
            outputs += [x for r in task._external_outputs for x in copy_external_output(r)]

            resource_defs = [r._declare(directory=local_tmpdir) for r in task._mentioned]

            if task._image is None:
                if verbose:
                    print(f"Using image '{default_image}' since no image was specified.")

            make_local_tmpdir = f'mkdir -p {local_tmpdir}/{task._uid}/; '
            defs = '; '.join(resource_defs) + '; ' if resource_defs else ''
            task_command = [cmd.strip() for cmd in task._command]

            cmd = bash_flags + make_local_tmpdir + defs + " && ".join(task_command)
            if dry_run:
                commands.append(cmd)
                continue

            parents = [task_to_job_mapping[t] for t in task._dependencies]

            attributes = {'task_uid': task._uid}
            if task.name:
                attributes['name'] = task.name
            attributes.update(task.attributes)

            resources = {'requests': {}}
            if task._cpu:
                resources['requests']['cpu'] = task._cpu
            if task._memory:
                resources['requests']['memory'] = task._memory

            j = batch.create_job(image=task._image if task._image else default_image,
                                 command=['/bin/bash', '-c', cmd],
                                 parents=parents,
                                 attributes=attributes,
                                 resources=resources,
                                 input_files=inputs if len(inputs) > 0 else None,
                                 output_files=outputs if len(outputs) > 0 else None,
                                 pvc_size=task._storage)
            n_jobs_submitted += 1

            task_to_job_mapping[task] = j
            jobs_to_command[j] = cmd

        if dry_run:
            print("\n\n".join(commands))
            return

        if delete_scratch_on_exit and used_remote_tmpdir:
            parents = list(jobs_to_command.keys())
            rm_cmd = f'gsutil -m rm -r {remote_tmpdir}'
            cmd = bash_flags + f'{activate_service_account} && {rm_cmd}'
            j = batch.create_job(
                image='google/cloud-sdk:237.0.0-alpine',
                command=['/bin/bash', '-c', cmd],
                parents=parents,
                attributes={'name': 'remove_tmpdir'},
                always_run=True)
            jobs_to_command[j] = cmd
            n_jobs_submitted += 1

        print(f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - start, 3)} seconds:')
        start = time.time()
        batch = batch.submit()
        print(f'Submitted batch {batch.id} with {n_jobs_submitted} jobs in {round(time.time() - start, 3)} seconds:')

        jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()}

        if verbose:
            print(f'Submitted batch {batch.id} with {n_jobs_submitted} jobs in {round(time.time() - start, 3)} seconds:')
            for jid, cmd in jobs_to_command.items():
                print(f'{jid}: {cmd}')

        status = batch.wait()

        if status['state'] == 'success':
            print('Pipeline completed successfully!')
            return

        failed_jobs = [((j['batch_id'], j['job_id']), j['exit_code']) for j in status['jobs'] if 'exit_code' in j and any([ec != 0 for _, ec in j['exit_code'].items()])]

        fail_msg = ''
        for jid, ec in failed_jobs:
            ec = Job.exit_code(ec)
            job = self._batch_client.get_job(*jid)
            log = job.log()
            name = job.status()['attributes'].get('name', None)
            fail_msg += (
                f"Job {jid} failed with exit code {ec}:\n"
                f"  Task name:\t{name}\n"
                f"  Command:\t{jobs_to_command[jid]}\n"
                f"  Log:\t{log}\n")

        raise PipelineException(fail_msg)
Beispiel #4
0
def batch_status_exit_codes(batch_status):
    return [Job._get_exit_codes(j) for j in batch_status['jobs']]