def run(job_mode: c.JobMode, run_args: Optional[List[str]] = None, script_args: Optional[List[str]] = None, image_id: Optional[str] = None, **build_image_kwargs) -> None: """Builds an image using the supplied **build_image_kwargs and calls `docker run` on the resulting image using sensible defaults. Keyword args: - job_mode: c.JobMode. - run_args: extra arguments to supply to `docker run` after our defaults. - script_args: extra arguments to supply to the entrypoint. (You can - override the default container entrypoint by supplying a new one inside run_args.) - image_id: ID of the image to run. Supplying this will skip an image build. any extra kwargs supplied are passed through to build_image. """ if run_args is None: run_args = [] if script_args is None: script_args = [] if image_id is None: image_id = b.build_image(job_mode, **build_image_kwargs) base_cmd = _run_cmd(job_mode, run_args) command = base_cmd + [image_id] + script_args logging.info("Running command: {}".format(' '.join(command))) subprocess.call(command) return None
def generate_image_tag(project_id, docker_args, dry_run: bool = False): """Generates a new Docker image and pushes an image to the user's GCloud Container Repository, tagged using the UUID of the generated image. If dry_run is true, logs the Docker image build parameters and returns a bogus tag. """ logging.info("Generating Docker image with parameters:") logging.info(t.yellow(pformat(docker_args))) if dry_run: logging.info("Dry run - skipping actual 'docker build' and 'docker push'.") image_tag = "dry_run_tag" else: image_id = db.build_image(**docker_args) image_tag = dp.push_uuid_tag(project_id, image_id) return image_tag
def _rebuild_containers( jobs: Iterable[Job], project_id: Optional[str] = None, ) -> Dict[Job, str]: '''this utility rebuilds all the needed containers for the given jobs This also tags and uploads the containers to the appropriate project cloud registry if necessary. Args: jobs: iterable of jobs for which to rebuild containers project_id: project id Returns: dictionary mapping jobs to new image tags ''' image_id_map = {} container_specs = set([j.experiment.container_spec for j in jobs]) for c in container_specs: image_id = build_image(**c.spec) cs_jobs = filter(lambda x: x.experiment.container_spec == c, jobs) image_tag = None for j in cs_jobs: if j.spec.platform in [Platform.CAIP, Platform.GKE]: assert project_id != None, 'project id must be specified for CAIP, GKE jobs' if image_tag is None: image_tag = push_uuid_tag(project_id, image_id) image_id_map[j] = image_tag else: image_id_map[j] = image_id return image_id_map
def run_experiments(job_mode: c.JobMode, run_args: Optional[List[str]] = None, script_args: Optional[List[str]] = None, image_id: Optional[str] = None, dry_run: bool = False, experiment_config: Optional[ce.ExpConf] = None, xgroup: Optional[str] = None, **build_image_kwargs) -> None: """Builds an image using the supplied **build_image_kwargs and calls `docker run` on the resulting image using sensible defaults. Keyword args: - job_mode: c.JobMode. - run_args: extra arguments to supply to `docker run` after our defaults. - script_args: extra arguments to supply to the entrypoint. (You can - override the default container entrypoint by supplying a new one inside run_args.) - image_id: ID of the image to run. Supplying this will skip an image build. - experiment_config: dict of string to list, boolean, string or int. Any lists will trigger a cartesian product out with the rest of the config. A job will be executed for every combination of parameters in the experiment config. - dry_run: if True, no actual jobs will be executed and docker won't actually build; logging side effects will show the user what will happen without dry_run=True. any extra kwargs supplied are passed through to build_image. """ if run_args is None: run_args = [] if script_args is None: script_args = [] if experiment_config is None: experiment_config = {} docker_args = {k: v for k, v in build_image_kwargs.items()} docker_args['job_mode'] = job_mode engine = get_mem_engine() if dry_run else get_sql_engine() with session_scope(engine) as session: container_spec = generate_container_spec(session, docker_args, image_id) if image_id is None: if dry_run: logging.info("Dry run - skipping actual 'docker build'.") image_id = 'dry_run_tag' else: image_id = b.build_image(**docker_args) experiments = create_experiments( session=session, container_spec=container_spec, script_args=script_args, experiment_config=experiment_config, xgroup=xgroup, ) job_specs = [ JobSpec.get_or_create( experiment=x, spec=_create_job_spec_dict( experiment=x, job_mode=job_mode, run_args=run_args, image_id=image_id, ), platform=Platform.LOCAL, ) for x in experiments ] try: execute_jobs(job_specs=job_specs, dry_run=dry_run) except Exception as e: logging.error(f'exception: {e}') session.commit() # commit here, otherwise will be rolled back
def run_app(arg_input): """Main function to run the Caliban app. Accepts a Namespace-type output of an argparse argument parser. """ args = vars(arg_input) script_args = c.extract_script_args(args) command = args["command"] if command == "cluster": return gke.cli.run_cli_command(args) job_mode = cli.resolve_job_mode(args) docker_args = cli.generate_docker_args(job_mode, args) docker_run_args = args.get("docker_run_args", []) if command == "shell": mount_home = not args['bare'] image_id = args.get("image_id") shell = args['shell'] ps.run_interactive(job_mode, image_id=image_id, run_args=docker_run_args, mount_home=mount_home, shell=shell, **docker_args) elif command == "notebook": port = args.get("port") lab = args.get("lab") version = args.get("jupyter_version") mount_home = not args['bare'] pn.run_notebook(job_mode, port=port, lab=lab, version=version, run_args=docker_run_args, mount_home=mount_home, **docker_args) elif command == "build": package = args["module"] b.build_image(job_mode, package=package, **docker_args) elif command == 'status': caliban.history.cli.get_status(args) elif command == 'stop': caliban.history.cli.stop(args) elif command == 'resubmit': caliban.history.cli.resubmit(args) elif command == "run": dry_run = args["dry_run"] package = args["module"] image_id = args.get("image_id") exp_config = args.get("experiment_config") xgroup = args.get('xgroup') pr.run_experiments(job_mode, run_args=docker_run_args, script_args=script_args, image_id=image_id, experiment_config=exp_config, dry_run=dry_run, package=package, xgroup=xgroup, **docker_args) elif command == "cloud": project_id = c.extract_project_id(args) region = c.extract_region(args) cloud_key = c.extract_cloud_key(args) dry_run = args["dry_run"] package = args["module"] job_name = args.get("name") gpu_spec = args.get("gpu_spec") tpu_spec = args.get("tpu_spec") image_tag = args.get("image_tag") machine_type = args.get("machine_type") exp_config = args.get("experiment_config") labels = cu.sanitize_labels(args.get("label") or []) xgroup = args.get('xgroup') # Arguments to internally build the image required to submit to Cloud. docker_m = {"job_mode": job_mode, "package": package, **docker_args} cloud.submit_ml_job( job_mode=job_mode, docker_args=docker_m, region=region, project_id=project_id, credentials_path=cloud_key, dry_run=dry_run, job_name=job_name, machine_type=machine_type, gpu_spec=gpu_spec, tpu_spec=tpu_spec, image_tag=image_tag, labels=labels, script_args=script_args, experiment_config=exp_config, xgroup=xgroup, ) else: logging.info("Unknown command: {}".format(command)) sys.exit(1)