コード例 #1
0
 async def create(billing_project: str,
                  deploy_config: Optional[DeployConfig] = None,
                  session: Optional[httpx.ClientSession] = None,
                  headers: Optional[Dict[str, str]] = None,
                  _token: Optional[str] = None,
                  token_file: Optional[str] = None):
     if not deploy_config:
         deploy_config = get_deploy_config()
     url = deploy_config.base_url('batch')
     if session is None:
         session = httpx.client_session()
     if headers is None:
         headers = {}
     if _token:
         headers['Authorization'] = f'Bearer {_token}'
     else:
         headers.update(
             service_auth_headers(deploy_config,
                                  'batch',
                                  token_file=token_file))
     return BatchClient(billing_project=billing_project,
                        url=url,
                        session=session,
                        headers=headers)
コード例 #2
0
ファイル: dbuf.py プロジェクト: nawatts/hail
 def __init__(self, name, binding_host, port, leader, dbuf, aiofiles):
     self.deploy_config = get_deploy_config()
     self.app = web.Application(client_max_size=50 * 1024 * 1024)
     self.routes = web.RouteTableDef()
     self.workers = set()
     self.name = name
     self.binding_host = binding_host
     self.port = port
     self.dbuf = dbuf
     self.leader = leader
     self.leader_url = self.deploy_config.base_url(leader)
     self.aiofiles = aiofiles
     self.shuffle_create_lock = asyncio.Lock()
     self.app.add_routes([
         web.post('/s', self.create),
         web.post('/s/{session}', self.post),
         web.post('/s/{session}/get', self.get),
         web.post('/s/{session}/getmany', self.getmany),
         web.delete('/s/{session}', self.delete),
         web.post('/w', self.register_worker),
         web.get('/w', self.list_workers),
         web.get('/healthcheck', self.healthcheck),
     ])
     self.app.on_cleanup.append(self.cleanup)
コード例 #3
0
    def __init__(self,
                 gcs_project=None,
                 fs=None,
                 deploy_config=None,
                 session=None,
                 headers=None,
                 _token=None):
        if not deploy_config:
            self._deploy_config = get_deploy_config()
        else:
            self._deploy_config = deploy_config

        self.url = self._deploy_config.base_url('memory')
        self.objects_url = f'{self.url}/api/v1alpha/objects'
        self._session = session
        if fs is None:
            fs = GCS(blocking_pool=concurrent.futures.ThreadPoolExecutor(),
                     project=gcs_project)
        self._fs = fs
        self._headers = {}
        if headers:
            self._headers.update(headers)
        if _token:
            self._headers['Authorization'] = f'Bearer {_token}'
コード例 #4
0
    rest_authenticated_developers_only, web_authenticated_developers_only, \
    check_csrf_token
from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template, \
    set_message

from .constants import BUCKET
from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch

with open(os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token'), 'r') as f:
    oauth_token = f.read().strip()

log = logging.getLogger('ci')

uvloop.install()

deploy_config = get_deploy_config()

watched_branches = [
    WatchedBranch(index, FQBranch.from_short_str(bss), deployable)
    for (index, [bss, deployable]) in enumerate(json.loads(os.environ.get('HAIL_WATCHED_BRANCHES', '[]')))
]

routes = web.RouteTableDef()

start_time = datetime.datetime.now()


@routes.get('')
@routes.get('/')
@web_authenticated_developers_only()
async def index(request, userdata):  # pylint: disable=unused-argument
コード例 #5
0
 def __init__(self, deploy_config=None):
     if not deploy_config:
         deploy_config = get_deploy_config()
     self._deploy_config = deploy_config
     self._session = None
コード例 #6
0
ファイル: service_backend.py プロジェクト: TileDB-Inc/hail
 def __init__(self, *, deploy_config: Optional[DeployConfig] = None):
     if not deploy_config:
         deploy_config = get_deploy_config()
     self.deploy_config = deploy_config
     self.url = deploy_config.base_url('query')
     self._session: Optional[aiohttp.ClientSession] = None
コード例 #7
0
 def get_tokens_file():
     deploy_config = get_deploy_config()
     location = deploy_config.location()
     if location == 'external':
         return os.path.expanduser('~/.hail/tokens.json')
     return '/user-tokens/tokens.json'
コード例 #8
0
 def __init__(self):
     self._deploy_config = get_deploy_config()
     self._session = None
コード例 #9
0
    def _run(self,
             batch: 'batch.Batch',
             dry_run: bool,
             verbose: bool,
             delete_scratch_on_exit: bool,
             wait: bool = True,
             open: bool = False,
             disable_progress_bar: bool = False,
             callback: Optional[str] = None,
             token: Optional[str] = None,
             **backend_kwargs):  # pylint: disable-msg=too-many-statements
        """Execute a batch.

        Warning
        -------
        This method should not be called directly. Instead, use :meth:`.batch.Batch.run`
        and pass :class:`.ServiceBackend` specific arguments as key-word arguments.

        Parameters
        ----------
        batch:
            Batch to execute.
        dry_run:
            If `True`, don't execute code.
        verbose:
            If `True`, print debugging output.
        delete_scratch_on_exit:
            If `True`, delete temporary directories with intermediate files.
        wait:
            If `True`, wait for the batch to finish executing before returning.
        open:
            If `True`, open the UI page for the batch.
        disable_progress_bar:
            If `True`, disable the progress bar.
        callback:
            If not `None`, a URL that will receive at most one POST request
            after the entire batch completes.
        token:
            If not `None`, a string used for idempotency of batch submission.
        """

        if backend_kwargs:
            raise ValueError(f'ServiceBackend does not support any of these keywords: {backend_kwargs}')

        build_dag_start = time.time()

        uid = uuid.uuid4().hex[:6]
        remote_tmpdir = f'gs://{self._bucket_name}/batch/{uid}'
        local_tmpdir = f'/io/batch/{uid}'

        default_image = 'ubuntu:18.04'

        attributes = copy.deepcopy(batch.attributes)
        if batch.name is not None:
            attributes['name'] = batch.name

        bc_batch = self._batch_client.create_batch(attributes=attributes, callback=callback,
                                                   token=token, cancel_after_n_failures=batch._cancel_after_n_failures)

        n_jobs_submitted = 0
        used_remote_tmpdir = False

        job_to_client_job_mapping: Dict[_job.Job, bc.Job] = {}
        jobs_to_command = {}
        commands = []

        bash_flags = 'set -e' + ('x' if verbose else '')

        activate_service_account = 'gcloud -q auth activate-service-account ' \
                                   '--key-file=/gsa-key/key.json'

        def copy_input(r):
            if isinstance(r, resource.InputResourceFile):
                return [(r._input_path, r._get_path(local_tmpdir))]
            assert isinstance(r, (resource.JobResourceFile, resource.PythonResult))
            return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))]

        def copy_internal_output(r):
            assert isinstance(r, (resource.JobResourceFile, resource.PythonResult))
            return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))]

        def copy_external_output(r):
            if isinstance(r, resource.InputResourceFile):
                return [(r._input_path, dest) for dest in r._output_paths]
            assert isinstance(r, (resource.JobResourceFile, resource.PythonResult))
            return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths]

        def symlink_input_resource_group(r):
            symlinks = []
            if isinstance(r, resource.ResourceGroup) and r._source is None:
                for name, irf in r._resources.items():
                    src = irf._get_path(local_tmpdir)
                    dest = f'{r._get_path(local_tmpdir)}.{name}'
                    symlinks.append(f'ln -sf {shq(src)} {shq(dest)}')
            return symlinks

        write_external_inputs = [x for r in batch._input_resources for x in copy_external_output(r)]
        if write_external_inputs:
            def _cp(src, dst):
                return f'gsutil -m cp -R {shq(src)} {shq(dst)}'

            write_cmd = f'''
{bash_flags}
{activate_service_account}
{' && '.join([_cp(*files) for files in write_external_inputs])}
'''

            if dry_run:
                commands.append(write_cmd)
            else:
                j = bc_batch.create_job(image='gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine',
                                        command=['/bin/bash', '-c', write_cmd],
                                        attributes={'name': 'write_external_inputs'})
                jobs_to_command[j] = write_cmd
                n_jobs_submitted += 1

        for job in batch._jobs:
            if isinstance(job, _job.PythonJob):
                if job._image is None:
                    version = sys.version_info
                    if version.major != 3 or version.minor not in (6, 7, 8):
                        raise BatchException(
                            f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})")
                    job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim'
                job._compile(local_tmpdir, remote_tmpdir)

            inputs = [x for r in job._inputs for x in copy_input(r)]

            outputs = [x for r in job._internal_outputs for x in copy_internal_output(r)]
            if outputs:
                used_remote_tmpdir = True
            outputs += [x for r in job._external_outputs for x in copy_external_output(r)]

            symlinks = [x for r in job._mentioned for x in symlink_input_resource_group(r)]

            env_vars = {
                **job._env,
                **{r._uid: r._get_path(local_tmpdir) for r in job._mentioned}}

            if job._image is None:
                if verbose:
                    print(f"Using image '{default_image}' since no image was specified.")

            make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._job_id}'

            job_command = [cmd.strip() for cmd in job._command]

            prepared_job_command = (f'{{\n{x}\n}}' for x in job_command)
            cmd = f'''
{bash_flags}
{make_local_tmpdir}
{"; ".join(symlinks)}
{" && ".join(prepared_job_command)}
'''

            if dry_run:
                commands.append(cmd)
                continue

            parents = [job_to_client_job_mapping[j] for j in job._dependencies]

            attributes = copy.deepcopy(job.attributes) if job.attributes else dict()
            if job.name:
                attributes['name'] = job.name

            resources: Dict[str, Any] = {}
            if job._cpu:
                resources['cpu'] = job._cpu
            if job._memory:
                resources['memory'] = job._memory
            if job._storage:
                resources['storage'] = job._storage
            if job._machine_type:
                resources['machine_type'] = job._machine_type
            if job._preemptible is not None:
                resources['preemptible'] = job._preemptible

            image = job._image if job._image else default_image
            image_ref = parse_docker_image_reference(image)
            if not is_google_registry_domain(image_ref.domain) and image_ref.name() not in HAIL_GENETICS_IMAGES:
                warnings.warn(f'Using an image {image} not in GCR. '
                              f'Jobs may fail due to Docker Hub rate limits.')

            j = bc_batch.create_job(image=image,
                                    command=[job._shell if job._shell else self._DEFAULT_SHELL, '-c', cmd],
                                    parents=parents,
                                    attributes=attributes,
                                    resources=resources,
                                    input_files=inputs if len(inputs) > 0 else None,
                                    output_files=outputs if len(outputs) > 0 else None,
                                    always_run=job._always_run,
                                    timeout=job._timeout,
                                    gcsfuse=job._gcsfuse if len(job._gcsfuse) > 0 else None,
                                    env=env_vars,
                                    requester_pays_project=batch.requester_pays_project,
                                    mount_tokens=True)

            n_jobs_submitted += 1

            job_to_client_job_mapping[job] = j
            jobs_to_command[j] = cmd

        if dry_run:
            print("\n\n".join(commands))
            return None

        if delete_scratch_on_exit and used_remote_tmpdir:
            parents = list(jobs_to_command.keys())
            rm_cmd = f'gsutil -m rm -r {remote_tmpdir}'
            cmd = f'''
{bash_flags}
{activate_service_account}
{rm_cmd}
'''
            j = bc_batch.create_job(
                image='gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine',
                command=['/bin/bash', '-c', cmd],
                parents=parents,
                attributes={'name': 'remove_tmpdir'},
                always_run=True)
            jobs_to_command[j] = cmd
            n_jobs_submitted += 1

        if verbose:
            print(f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.')

        submit_batch_start = time.time()
        bc_batch = bc_batch.submit(disable_progress_bar=disable_progress_bar)

        jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()}

        if verbose:
            print(f'Submitted batch {bc_batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:')
            for jid, cmd in jobs_to_command.items():
                print(f'{jid}: {cmd}')

            print('')

        deploy_config = get_deploy_config()
        url = deploy_config.url('batch', f'/batches/{bc_batch.id}')
        print(f'Submitted batch {bc_batch.id}, see {url}')

        if open:
            webbrowser.open(url)
        if wait:
            print(f'Waiting for batch {bc_batch.id}...')
            status = bc_batch.wait()
            print(f'batch {bc_batch.id} complete: {status["state"]}')
        return bc_batch
コード例 #10
0
ファイル: backend.py プロジェクト: nawatts/hail
    def _run(
            self,
            pipeline,
            dry_run,
            verbose,
            delete_scratch_on_exit,
            wait=True,
            open=False,
            batch_submit_args=None):  # pylint: disable-msg=too-many-statements
        build_dag_start = time.time()

        bucket = self._batch_client.bucket
        subdir_name = 'pipeline-{}'.format(uuid.uuid4().hex[:12])

        remote_tmpdir = f'gs://{bucket}/pipeline/{subdir_name}'
        local_tmpdir = f'/io/pipeline/{subdir_name}'

        default_image = 'ubuntu:latest'

        attributes = pipeline.attributes
        if pipeline.name is not None:
            attributes['name'] = pipeline.name

        batch = self._batch_client.create_batch(attributes=attributes)

        n_jobs_submitted = 0
        used_remote_tmpdir = False

        task_to_job_mapping = {}
        jobs_to_command = {}
        commands = []

        bash_flags = 'set -e' + ('x' if verbose else '') + '; '

        activate_service_account = 'gcloud -q auth activate-service-account ' \
                                   '--key-file=/gsa-key/key.json'

        def copy_input(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, r._get_path(local_tmpdir))]
            assert isinstance(r, TaskResourceFile)
            return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))]

        def copy_internal_output(r):
            assert isinstance(r, TaskResourceFile)
            return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))]

        def copy_external_output(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, dest) for dest in r._output_paths]
            assert isinstance(r, TaskResourceFile)
            return [(r._get_path(local_tmpdir), dest)
                    for dest in r._output_paths]

        write_external_inputs = [
            x for r in pipeline._input_resources
            for x in copy_external_output(r)
        ]
        if write_external_inputs:

            def _cp(src, dst):
                return f'gsutil -m cp -R {src} {dst}'

            write_cmd = bash_flags + activate_service_account + ' && ' + \
                ' && '.join([_cp(*files) for files in write_external_inputs])

            if dry_run:
                commands.append(write_cmd)
            else:
                j = batch.create_job(
                    image='google/cloud-sdk:237.0.0-alpine',
                    command=['/bin/bash', '-c', write_cmd],
                    attributes={'name': 'write_external_inputs'})
                jobs_to_command[j] = write_cmd
                n_jobs_submitted += 1

        for task in pipeline._tasks:
            inputs = [x for r in task._inputs for x in copy_input(r)]

            outputs = [
                x for r in task._internal_outputs
                for x in copy_internal_output(r)
            ]
            if outputs:
                used_remote_tmpdir = True
            outputs += [
                x for r in task._external_outputs
                for x in copy_external_output(r)
            ]

            resource_defs = [
                r._declare(directory=local_tmpdir) for r in task._mentioned
            ]

            if task._image is None:
                if verbose:
                    print(
                        f"Using image '{default_image}' since no image was specified."
                    )

            make_local_tmpdir = f'mkdir -p {local_tmpdir}/{task._uid}/; '
            defs = '; '.join(resource_defs) + '; ' if resource_defs else ''
            task_command = [cmd.strip() for cmd in task._command]

            cmd = bash_flags + make_local_tmpdir + defs + " && ".join(
                task_command)
            if dry_run:
                commands.append(cmd)
                continue

            parents = [task_to_job_mapping[t] for t in task._dependencies]

            attributes = task.attributes
            if task.name:
                attributes['name'] = task.name

            resources = {}
            if task._cpu:
                resources['cpu'] = task._cpu
            if task._memory:
                resources['memory'] = task._memory

            j = batch.create_job(
                image=task._image if task._image else default_image,
                command=['/bin/bash', '-c', cmd],
                parents=parents,
                attributes=attributes,
                resources=resources,
                input_files=inputs if len(inputs) > 0 else None,
                output_files=outputs if len(outputs) > 0 else None,
                pvc_size=task._storage,
                always_run=task._always_run)
            n_jobs_submitted += 1

            task_to_job_mapping[task] = j
            jobs_to_command[j] = cmd

        if dry_run:
            print("\n\n".join(commands))
            return None

        if delete_scratch_on_exit and used_remote_tmpdir:
            parents = list(jobs_to_command.keys())
            rm_cmd = f'gsutil -m rm -r {remote_tmpdir}'
            cmd = bash_flags + f'{activate_service_account} && {rm_cmd}'
            j = batch.create_job(image='google/cloud-sdk:237.0.0-alpine',
                                 command=['/bin/bash', '-c', cmd],
                                 parents=parents,
                                 attributes={'name': 'remove_tmpdir'},
                                 always_run=True)
            jobs_to_command[j] = cmd
            n_jobs_submitted += 1

        if verbose:
            print(
                f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.'
            )

        submit_batch_start = time.time()
        batch = batch.submit(**(batch_submit_args or {}))

        jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()}

        if verbose:
            print(
                f'Submitted batch {batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:'
            )
            for jid, cmd in jobs_to_command.items():
                print(f'{jid}: {cmd}')

            print('')

        deploy_config = get_deploy_config()
        url = deploy_config.url('batch', f'/batches/{batch.id}')
        print(f'Submitted batch {batch.id}, see {url}')

        if open:
            webbrowser.open(url)
        if wait:
            print(f'Waiting for batch {batch.id}...')
            status = batch.wait()
            print(f'Batch {batch.id} complete: {status["state"]}')
        return batch
コード例 #11
0
ファイル: backend.py プロジェクト: MikeyRupert/hail
    def _run(self,
             batch,
             dry_run,
             verbose,
             delete_scratch_on_exit,
             wait=True,
             open=False,
             disable_progress_bar=False
             ):  # pylint: disable-msg=too-many-statements
        """
        Execute a batch.

        Warning
        -------
        This method should not be called directly. Instead, use :meth:`.Batch.run`
        and pass :class:`.ServiceBackend` specific arguments as key-word arguments.

        Parameters
        ----------
        batch: :class:`.Batch`
            Batch to execute.
        dry_run: :obj:`bool`
            If `True`, don't execute code.
        verbose: :obj:`bool`
            If `True`, print debugging output.
        delete_scratch_on_exit: :obj:`bool`
            If `True`, delete temporary directories with intermediate files.
        wait: :obj:`bool`, optional
            If `True`, wait for the batch to finish executing before returning.
        open: :obj:`bool`, optional
            If `True`, open the UI page for the batch.
        disable_progress_bar: :obj:`bool`, optional
            If `True`, disable the progress bar.
        """
        build_dag_start = time.time()

        bucket = self._batch_client.bucket
        subdir_name = 'batch-{}'.format(uuid.uuid4().hex[:12])

        remote_tmpdir = f'gs://{bucket}/batch/{subdir_name}'
        local_tmpdir = f'/io/batch/{subdir_name}'

        default_image = 'ubuntu:latest'

        attributes = copy.deepcopy(batch.attributes)
        if batch.name is not None:
            attributes['name'] = batch.name

        bc_batch = self._batch_client.create_batch(attributes=attributes)

        n_jobs_submitted = 0
        used_remote_tmpdir = False

        job_to_client_job_mapping = {}
        jobs_to_command = {}
        commands = []

        bash_flags = 'set -e' + ('x' if verbose else '') + '; '

        activate_service_account = 'gcloud -q auth activate-service-account ' \
                                   '--key-file=/gsa-key/key.json'

        def copy_input(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, r._get_path(local_tmpdir))]
            assert isinstance(r, JobResourceFile)
            return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))]

        def copy_internal_output(r):
            assert isinstance(r, JobResourceFile)
            return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))]

        def copy_external_output(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, dest) for dest in r._output_paths]
            assert isinstance(r, JobResourceFile)
            return [(r._get_path(local_tmpdir), dest)
                    for dest in r._output_paths]

        write_external_inputs = [
            x for r in batch._input_resources for x in copy_external_output(r)
        ]
        if write_external_inputs:

            def _cp(src, dst):
                return f'gsutil -m cp -R {src} {dst}'

            write_cmd = bash_flags + activate_service_account + ' && ' + \
                ' && '.join([_cp(*files) for files in write_external_inputs])

            if dry_run:
                commands.append(write_cmd)
            else:
                j = bc_batch.create_job(
                    image='google/cloud-sdk:237.0.0-alpine',
                    command=['/bin/bash', '-c', write_cmd],
                    attributes={'name': 'write_external_inputs'})
                jobs_to_command[j] = write_cmd
                n_jobs_submitted += 1

        for job in batch._jobs:
            inputs = [x for r in job._inputs for x in copy_input(r)]

            outputs = [
                x for r in job._internal_outputs
                for x in copy_internal_output(r)
            ]
            if outputs:
                used_remote_tmpdir = True
            outputs += [
                x for r in job._external_outputs
                for x in copy_external_output(r)
            ]

            env_vars = {
                r._uid: r._get_path(local_tmpdir)
                for r in job._mentioned
            }

            if job._image is None:
                if verbose:
                    print(
                        f"Using image '{default_image}' since no image was specified."
                    )

            make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._uid}/; '
            job_command = [cmd.strip() for cmd in job._command]

            cmd = bash_flags + make_local_tmpdir + " && ".join(job_command)

            if dry_run:
                commands.append(cmd)
                continue

            parents = [job_to_client_job_mapping[j] for j in job._dependencies]

            attributes = copy.deepcopy(job.attributes)
            if job.name:
                attributes['name'] = job.name

            resources = {}
            if job._cpu:
                resources['cpu'] = job._cpu
            if job._memory:
                resources['memory'] = job._memory

            j = bc_batch.create_job(
                image=job._image if job._image else default_image,
                command=['/bin/bash', '-c', cmd],
                parents=parents,
                attributes=attributes,
                resources=resources,
                input_files=inputs if len(inputs) > 0 else None,
                output_files=outputs if len(outputs) > 0 else None,
                pvc_size=job._storage,
                always_run=job._always_run,
                timeout=job._timeout,
                env=env_vars)

            n_jobs_submitted += 1

            job_to_client_job_mapping[job] = j
            jobs_to_command[j] = cmd

        if dry_run:
            print("\n\n".join(commands))
            return None

        if delete_scratch_on_exit and used_remote_tmpdir:
            parents = list(jobs_to_command.keys())
            rm_cmd = f'gsutil -m rm -r {remote_tmpdir}'
            cmd = bash_flags + f'{activate_service_account} && {rm_cmd}'
            j = bc_batch.create_job(image='google/cloud-sdk:237.0.0-alpine',
                                    command=['/bin/bash', '-c', cmd],
                                    parents=parents,
                                    attributes={'name': 'remove_tmpdir'},
                                    always_run=True)
            jobs_to_command[j] = cmd
            n_jobs_submitted += 1

        if verbose:
            print(
                f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.'
            )

        submit_batch_start = time.time()
        bc_batch = bc_batch.submit(disable_progress_bar=disable_progress_bar)

        jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()}

        if verbose:
            print(
                f'Submitted batch {bc_batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:'
            )
            for jid, cmd in jobs_to_command.items():
                print(f'{jid}: {cmd}')

            print('')

        deploy_config = get_deploy_config()
        url = deploy_config.url('batch', f'/batches/{bc_batch.id}')
        print(f'Submitted batch {bc_batch.id}, see {url}')

        if open:
            webbrowser.open(url)
        if wait:
            print(f'Waiting for batch {bc_batch.id}...')
            status = bc_batch.wait()
            print(f'batch {bc_batch.id} complete: {status["state"]}')
        return bc_batch
コード例 #12
0
ファイル: backend.py プロジェクト: tuyanglin/hail
    def _run(self,
             batch: 'Batch',
             dry_run: bool,
             verbose: bool,
             delete_scratch_on_exit: bool,
             wait: bool = True,
             open: bool = False,
             disable_progress_bar: bool = False,
             callback: Optional[str] = None,
             **backend_kwargs):  # pylint: disable-msg=too-many-statements
        """Execute a batch.

        Warning
        -------
        This method should not be called directly. Instead, use :meth:`.Batch.run`
        and pass :class:`.ServiceBackend` specific arguments as key-word arguments.

        Parameters
        ----------
        batch: :class:`.Batch`
            Batch to execute.
        dry_run: :obj:`bool`
            If `True`, don't execute code.
        verbose: :obj:`bool`
            If `True`, print debugging output.
        delete_scratch_on_exit: :obj:`bool`
            If `True`, delete temporary directories with intermediate files.
        wait: :obj:`bool`, optional
            If `True`, wait for the batch to finish executing before returning.
        open: :obj:`bool`, optional
            If `True`, open the UI page for the batch.
        disable_progress_bar: :obj:`bool`, optional
            If `True`, disable the progress bar.
        callback: :obj:`str`, optional
            If not `None`, a URL that will receive at most one POST request
            after the entire batch completes.
        """

        if backend_kwargs:
            raise ValueError(
                f'ServiceBackend does not support any of these keywords: {backend_kwargs}'
            )

        build_dag_start = time.time()

        token = uuid.uuid4().hex[:6]
        remote_tmpdir = f'gs://{self._bucket_name}/batch/{token}'
        local_tmpdir = f'/io/batch/{token}'

        default_image = 'ubuntu:latest'

        attributes = copy.deepcopy(batch.attributes)
        if batch.name is not None:
            attributes['name'] = batch.name

        bc_batch = self._batch_client.create_batch(attributes=attributes,
                                                   callback=callback)

        n_jobs_submitted = 0
        used_remote_tmpdir = False

        job_to_client_job_mapping: Dict['Job', bc.Job] = {}
        jobs_to_command = {}
        commands = []

        bash_flags = 'set -e' + ('x' if verbose else '')

        activate_service_account = 'gcloud -q auth activate-service-account ' \
                                   '--key-file=/gsa-key/key.json'

        def copy_input(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, r._get_path(local_tmpdir))]
            assert isinstance(r, JobResourceFile)
            return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))]

        def copy_internal_output(r):
            assert isinstance(r, JobResourceFile)
            return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))]

        def copy_external_output(r):
            if isinstance(r, InputResourceFile):
                return [(r._input_path, dest) for dest in r._output_paths]
            assert isinstance(r, JobResourceFile)
            return [(r._get_path(local_tmpdir), dest)
                    for dest in r._output_paths]

        def symlink_input_resource_group(r):
            symlinks = []
            if isinstance(r, ResourceGroup) and r._source is None:
                for name, irf in r._resources.items():
                    src = irf._get_path(local_tmpdir)
                    dest = f'{r._get_path(local_tmpdir)}.{name}'
                    symlinks.append(f'ln -sf {shq(src)} {shq(dest)}')
            return symlinks

        write_external_inputs = [
            x for r in batch._input_resources for x in copy_external_output(r)
        ]
        if write_external_inputs:

            def _cp(src, dst):
                return f'gsutil -m cp -R {shq(src)} {shq(dst)}'

            write_cmd = f'''
{bash_flags}
{activate_service_account}
{' && '.join([_cp(*files) for files in write_external_inputs])}
'''

            if dry_run:
                commands.append(write_cmd)
            else:
                j = bc_batch.create_job(
                    image='google/cloud-sdk:237.0.0-alpine',
                    command=['/bin/bash', '-c', write_cmd],
                    attributes={'name': 'write_external_inputs'})
                jobs_to_command[j] = write_cmd
                n_jobs_submitted += 1

        for job in batch._jobs:
            inputs = [x for r in job._inputs for x in copy_input(r)]

            outputs = [
                x for r in job._internal_outputs
                for x in copy_internal_output(r)
            ]
            if outputs:
                used_remote_tmpdir = True
            outputs += [
                x for r in job._external_outputs
                for x in copy_external_output(r)
            ]

            symlinks = [
                x for r in job._mentioned
                for x in symlink_input_resource_group(r)
            ]

            env_vars = {
                **job._env,
                **{r._uid: r._get_path(local_tmpdir)
                   for r in job._mentioned}
            }

            if job._image is None:
                if verbose:
                    print(
                        f"Using image '{default_image}' since no image was specified."
                    )

            make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._job_id}'
            job_command = [cmd.strip() for cmd in job._command]

            prepared_job_command = (f'{{\n{x}\n}}' for x in job_command)
            cmd = f'''
{bash_flags}
{make_local_tmpdir}
{"; ".join(symlinks)}
{" && ".join(prepared_job_command)}
'''

            if dry_run:
                commands.append(cmd)
                continue

            parents = [job_to_client_job_mapping[j] for j in job._dependencies]

            attributes = copy.deepcopy(
                job.attributes) if job.attributes else dict()
            if job.name:
                attributes['name'] = job.name

            resources = {}
            if job._cpu:
                resources['cpu'] = job._cpu
            if job._memory:
                resources['memory'] = job._memory
            if job._storage:
                resources['storage'] = job._storage

            j = bc_batch.create_job(
                image=job._image if job._image else default_image,
                command=['/bin/bash', '-c', cmd],
                parents=parents,
                attributes=attributes,
                resources=resources,
                input_files=inputs if len(inputs) > 0 else None,
                output_files=outputs if len(outputs) > 0 else None,
                always_run=job._always_run,
                timeout=job._timeout,
                gcsfuse=job._gcsfuse if len(job._gcsfuse) > 0 else None,
                env=env_vars,
                requester_pays_project=batch.requester_pays_project)

            n_jobs_submitted += 1

            job_to_client_job_mapping[job] = j
            jobs_to_command[j] = cmd

        if dry_run:
            print("\n\n".join(commands))
            return None

        if delete_scratch_on_exit and used_remote_tmpdir:
            parents = list(jobs_to_command.keys())
            rm_cmd = f'gsutil -m rm -r {remote_tmpdir}'
            cmd = f'''
{bash_flags}
{activate_service_account}
{rm_cmd}
'''
            j = bc_batch.create_job(image='google/cloud-sdk:237.0.0-alpine',
                                    command=['/bin/bash', '-c', cmd],
                                    parents=parents,
                                    attributes={'name': 'remove_tmpdir'},
                                    always_run=True)
            jobs_to_command[j] = cmd
            n_jobs_submitted += 1

        if verbose:
            print(
                f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.'
            )

        submit_batch_start = time.time()
        bc_batch = bc_batch.submit(disable_progress_bar=disable_progress_bar)

        jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()}

        if verbose:
            print(
                f'Submitted batch {bc_batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:'
            )
            for jid, cmd in jobs_to_command.items():
                print(f'{jid}: {cmd}')

            print('')

        deploy_config = get_deploy_config()
        url = deploy_config.url('batch', f'/batches/{bc_batch.id}')
        print(f'Submitted batch {bc_batch.id}, see {url}')

        if open:
            webbrowser.open(url)
        if wait:
            print(f'Waiting for batch {bc_batch.id}...')
            status = bc_batch.wait()
            print(f'batch {bc_batch.id} complete: {status["state"]}')
        return bc_batch
コード例 #13
0
def main(args):  # pylint: disable=unused-argument
    deploy_config = get_deploy_config()
    print(f'  location: {deploy_config.location()}')
    print(f'  default_namespace: {deploy_config._default_namespace}')
    print(f'  domain: {deploy_config._domain}')
コード例 #14
0
ファイル: backend.py プロジェクト: chrisvittal/hail
    async def _async_run(
            self,
            batch: 'batch.Batch',
            dry_run: bool,
            verbose: bool,
            delete_scratch_on_exit: bool,
            wait: bool = True,
            open: bool = False,
            disable_progress_bar: bool = False,
            callback: Optional[str] = None,
            token: Optional[str] = None,
            **backend_kwargs):  # pylint: disable-msg=too-many-statements
        if backend_kwargs:
            raise ValueError(
                f'ServiceBackend does not support any of these keywords: {backend_kwargs}'
            )

        build_dag_start = time.time()

        uid = uuid.uuid4().hex[:6]
        batch_remote_tmpdir = f'{self.remote_tmpdir}{uid}'
        local_tmpdir = f'/io/batch/{uid}'

        default_image = 'ubuntu:20.04'

        attributes = copy.deepcopy(batch.attributes)
        if batch.name is not None:
            attributes['name'] = batch.name

        bc_batch = self._batch_client.create_batch(
            attributes=attributes,
            callback=callback,
            token=token,
            cancel_after_n_failures=batch._cancel_after_n_failures)

        n_jobs_submitted = 0
        used_remote_tmpdir = False

        job_to_client_job_mapping: Dict[_job.Job, bc.Job] = {}
        jobs_to_command = {}
        commands = []

        bash_flags = 'set -e' + ('x' if verbose else '')

        def copy_input(r):
            if isinstance(r, resource.InputResourceFile):
                return [(r._input_path, r._get_path(local_tmpdir))]
            assert isinstance(
                r, (resource.JobResourceFile, resource.PythonResult))
            return [(r._get_path(batch_remote_tmpdir),
                     r._get_path(local_tmpdir))]

        def copy_internal_output(r):
            assert isinstance(
                r, (resource.JobResourceFile, resource.PythonResult))
            return [(r._get_path(local_tmpdir),
                     r._get_path(batch_remote_tmpdir))]

        def copy_external_output(r):
            if isinstance(r, resource.InputResourceFile):
                return [(r._input_path, dest) for dest in r._output_paths]
            assert isinstance(
                r, (resource.JobResourceFile, resource.PythonResult))
            return [(r._get_path(local_tmpdir), dest)
                    for dest in r._output_paths]

        def symlink_input_resource_group(r):
            symlinks = []
            if isinstance(r, resource.ResourceGroup) and r._source is None:
                for name, irf in r._resources.items():
                    src = irf._get_path(local_tmpdir)
                    dest = f'{r._get_path(local_tmpdir)}.{name}'
                    symlinks.append(f'ln -sf {shq(src)} {shq(dest)}')
            return symlinks

        write_external_inputs = [
            x for r in batch._input_resources for x in copy_external_output(r)
        ]
        if write_external_inputs:
            transfers_bytes = orjson.dumps([{
                "from": src,
                "to": dest
            } for src, dest in write_external_inputs])
            transfers = transfers_bytes.decode('utf-8')
            write_cmd = [
                'python3', '-m', 'hailtop.aiotools.copy', 'null', transfers
            ]
            if dry_run:
                commands.append(' '.join(shq(x) for x in write_cmd))
            else:
                j = bc_batch.create_job(
                    image=HAIL_GENETICS_HAIL_IMAGE,
                    command=write_cmd,
                    attributes={'name': 'write_external_inputs'})
                jobs_to_command[j] = ' '.join(shq(x) for x in write_cmd)
                n_jobs_submitted += 1

        pyjobs = [j for j in batch._jobs if isinstance(j, _job.PythonJob)]
        for job in pyjobs:
            if job._image is None:
                version = sys.version_info
                if version.major != 3 or version.minor not in (6, 7, 8):
                    raise BatchException(
                        f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})"
                    )
                job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim'

        with tqdm(total=len(batch._jobs),
                  desc='upload code',
                  disable=disable_progress_bar) as pbar:

            async def compile_job(job):
                used_remote_tmpdir = await job._compile(local_tmpdir,
                                                        batch_remote_tmpdir,
                                                        dry_run=dry_run)
                pbar.update(1)
                return used_remote_tmpdir

            used_remote_tmpdir_results = await bounded_gather(
                *[functools.partial(compile_job, j) for j in batch._jobs],
                parallelism=150)
            used_remote_tmpdir |= any(used_remote_tmpdir_results)

        for job in tqdm(batch._jobs,
                        desc='create job objects',
                        disable=disable_progress_bar):
            inputs = [x for r in job._inputs for x in copy_input(r)]

            outputs = [
                x for r in job._internal_outputs
                for x in copy_internal_output(r)
            ]
            if outputs:
                used_remote_tmpdir = True
            outputs += [
                x for r in job._external_outputs
                for x in copy_external_output(r)
            ]

            symlinks = [
                x for r in job._mentioned
                for x in symlink_input_resource_group(r)
            ]

            if job._image is None:
                if verbose:
                    print(
                        f"Using image '{default_image}' since no image was specified."
                    )

            make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._dirname}'

            job_command = [cmd.strip() for cmd in job._wrapper_code]
            prepared_job_command = (f'{{\n{x}\n}}' for x in job_command)
            cmd = f'''
{bash_flags}
{make_local_tmpdir}
{"; ".join(symlinks)}
{" && ".join(prepared_job_command)}
'''

            user_code = '\n\n'.join(job._user_code) if job._user_code else None

            if dry_run:
                formatted_command = f'''
================================================================================
# Job {job._job_id} {f": {job.name}" if job.name else ''}

--------------------------------------------------------------------------------
## USER CODE
--------------------------------------------------------------------------------
{user_code}

--------------------------------------------------------------------------------
## COMMAND
--------------------------------------------------------------------------------
{cmd}
================================================================================
'''
                commands.append(formatted_command)
                continue

            parents = [job_to_client_job_mapping[j] for j in job._dependencies]

            attributes = copy.deepcopy(
                job.attributes) if job.attributes else {}
            if job.name:
                attributes['name'] = job.name

            resources: Dict[str, Any] = {}
            if job._cpu:
                resources['cpu'] = job._cpu
            if job._memory:
                resources['memory'] = job._memory
            if job._storage:
                resources['storage'] = job._storage
            if job._machine_type:
                resources['machine_type'] = job._machine_type
            if job._preemptible is not None:
                resources['preemptible'] = job._preemptible

            image = job._image if job._image else default_image
            image_ref = parse_docker_image_reference(image)
            if image_ref.hosted_in('dockerhub') and image_ref.name(
            ) not in HAIL_GENETICS_IMAGES:
                warnings.warn(f'Using an image {image} from Docker Hub. '
                              f'Jobs may fail due to Docker Hub rate limits.')

            env = {**job._env, 'BATCH_TMPDIR': local_tmpdir}

            j = bc_batch.create_job(
                image=image,
                command=[
                    job._shell if job._shell else DEFAULT_SHELL, '-c', cmd
                ],
                parents=parents,
                attributes=attributes,
                resources=resources,
                input_files=inputs if len(inputs) > 0 else None,
                output_files=outputs if len(outputs) > 0 else None,
                always_run=job._always_run,
                timeout=job._timeout,
                cloudfuse=job._cloudfuse if len(job._cloudfuse) > 0 else None,
                env=env,
                requester_pays_project=batch.requester_pays_project,
                mount_tokens=True,
                user_code=user_code)

            n_jobs_submitted += 1

            job_to_client_job_mapping[job] = j
            jobs_to_command[j] = cmd

        if dry_run:
            print("\n\n".join(commands))
            return None

        if delete_scratch_on_exit and used_remote_tmpdir:
            parents = list(jobs_to_command.keys())
            j = bc_batch.create_job(image=HAIL_GENETICS_HAIL_IMAGE,
                                    command=[
                                        'python3', '-m',
                                        'hailtop.aiotools.delete',
                                        batch_remote_tmpdir
                                    ],
                                    parents=parents,
                                    attributes={'name': 'remove_tmpdir'},
                                    always_run=True)
            jobs_to_command[j] = cmd
            n_jobs_submitted += 1

        if verbose:
            print(
                f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.'
            )

        submit_batch_start = time.time()
        batch_handle = bc_batch.submit(
            disable_progress_bar=disable_progress_bar)

        jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()}

        if verbose:
            print(
                f'Submitted batch {batch_handle.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:'
            )
            for jid, cmd in jobs_to_command.items():
                print(f'{jid}: {cmd}')
            print('')

        deploy_config = get_deploy_config()
        url = deploy_config.url('batch', f'/batches/{batch_handle.id}')
        print(f'Submitted batch {batch_handle.id}, see {url}')

        if open:
            webbrowser.open(url)
        if wait:
            print(f'Waiting for batch {batch_handle.id}...')
            status = batch_handle.wait()
            print(f'batch {batch_handle.id} complete: {status["state"]}')
        return batch_handle
コード例 #15
0
import concurrent.futures
import aiohttp
import gidgethub
import zulip

from hailtop.config import get_deploy_config
from hailtop.utils import check_shell, check_shell_output
from .constants import GITHUB_CLONE_URL, AUTHORIZED_USERS
from .build import BuildConfiguration, Code
from .globals import is_test_deployment

repos_lock = asyncio.Lock()

log = logging.getLogger('ci')

CALLBACK_URL = get_deploy_config().url('ci', '/api/v1alpha/batch_callback')

zulip_client = zulip.Client(config_file="/zulip-config/.zuliprc")


class Repo:
    def __init__(self, owner, name):
        assert isinstance(owner, str)
        assert isinstance(name, str)
        self.owner = owner
        self.name = name
        self.url = f'{GITHUB_CLONE_URL}{owner}/{name}.git'

    def __eq__(self, other):
        return self.owner == other.owner and self.name == other.name