Example #1
0
def run(job_mode: c.JobMode,
        run_args: Optional[List[str]] = None,
        script_args: Optional[List[str]] = None,
        image_id: Optional[str] = None,
        **build_image_kwargs) -> None:
    """Builds an image using the supplied **build_image_kwargs and calls `docker
  run` on the resulting image using sensible defaults.
  Keyword args:
  - job_mode: c.JobMode.
  - run_args: extra arguments to supply to `docker run` after our defaults.
  - script_args: extra arguments to supply to the entrypoint. (You can
  - override the default container entrypoint by supplying a new one inside
    run_args.)
  - image_id: ID of the image to run. Supplying this will skip an image build.
  any extra kwargs supplied are passed through to build_image.
  """
    if run_args is None:
        run_args = []

    if script_args is None:
        script_args = []

    if image_id is None:
        image_id = b.build_image(job_mode, **build_image_kwargs)

    base_cmd = _run_cmd(job_mode, run_args)

    command = base_cmd + [image_id] + script_args

    logging.info("Running command: {}".format(' '.join(command)))
    subprocess.call(command)
    return None
Example #2
0
def generate_image_tag(project_id, docker_args, dry_run: bool = False):
  """Generates a new Docker image and pushes an image to the user's GCloud
  Container Repository, tagged using the UUID of the generated image.

  If dry_run is true, logs the Docker image build parameters and returns a
  bogus tag.

  """
  logging.info("Generating Docker image with parameters:")
  logging.info(t.yellow(pformat(docker_args)))

  if dry_run:
    logging.info("Dry run - skipping actual 'docker build' and 'docker push'.")
    image_tag = "dry_run_tag"
  else:
    image_id = db.build_image(**docker_args)
    image_tag = dp.push_uuid_tag(project_id, image_id)

  return image_tag
Example #3
0
def _rebuild_containers(
    jobs: Iterable[Job],
    project_id: Optional[str] = None,
) -> Dict[Job, str]:
    '''this utility rebuilds all the needed containers for the given jobs

  This also tags and uploads the containers to the appropriate project
  cloud registry if necessary.

  Args:
  jobs: iterable of jobs for which to rebuild containers
  project_id: project id

  Returns:
  dictionary mapping jobs to new image tags
  '''

    image_id_map = {}

    container_specs = set([j.experiment.container_spec for j in jobs])
    for c in container_specs:
        image_id = build_image(**c.spec)
        cs_jobs = filter(lambda x: x.experiment.container_spec == c, jobs)

        image_tag = None
        for j in cs_jobs:
            if j.spec.platform in [Platform.CAIP, Platform.GKE]:
                assert project_id != None, 'project id must be specified for CAIP, GKE jobs'

                if image_tag is None:
                    image_tag = push_uuid_tag(project_id, image_id)
                image_id_map[j] = image_tag
            else:
                image_id_map[j] = image_id

    return image_id_map
Example #4
0
def run_experiments(job_mode: c.JobMode,
                    run_args: Optional[List[str]] = None,
                    script_args: Optional[List[str]] = None,
                    image_id: Optional[str] = None,
                    dry_run: bool = False,
                    experiment_config: Optional[ce.ExpConf] = None,
                    xgroup: Optional[str] = None,
                    **build_image_kwargs) -> None:
    """Builds an image using the supplied **build_image_kwargs and calls `docker
  run` on the resulting image using sensible defaults.

  Keyword args:

  - job_mode: c.JobMode.

  - run_args: extra arguments to supply to `docker run` after our defaults.
  - script_args: extra arguments to supply to the entrypoint. (You can
  - override the default container entrypoint by supplying a new one inside
    run_args.)
  - image_id: ID of the image to run. Supplying this will skip an image build.
  - experiment_config: dict of string to list, boolean, string or int. Any
    lists will trigger a cartesian product out with the rest of the config. A
    job will be executed for every combination of parameters in the experiment
    config.
  - dry_run: if True, no actual jobs will be executed and docker won't
    actually build; logging side effects will show the user what will happen
    without dry_run=True.

  any extra kwargs supplied are passed through to build_image.
  """
    if run_args is None:
        run_args = []

    if script_args is None:
        script_args = []

    if experiment_config is None:
        experiment_config = {}

    docker_args = {k: v for k, v in build_image_kwargs.items()}
    docker_args['job_mode'] = job_mode

    engine = get_mem_engine() if dry_run else get_sql_engine()

    with session_scope(engine) as session:
        container_spec = generate_container_spec(session, docker_args,
                                                 image_id)

        if image_id is None:
            if dry_run:
                logging.info("Dry run - skipping actual 'docker build'.")
                image_id = 'dry_run_tag'
            else:
                image_id = b.build_image(**docker_args)

        experiments = create_experiments(
            session=session,
            container_spec=container_spec,
            script_args=script_args,
            experiment_config=experiment_config,
            xgroup=xgroup,
        )

        job_specs = [
            JobSpec.get_or_create(
                experiment=x,
                spec=_create_job_spec_dict(
                    experiment=x,
                    job_mode=job_mode,
                    run_args=run_args,
                    image_id=image_id,
                ),
                platform=Platform.LOCAL,
            ) for x in experiments
        ]

        try:
            execute_jobs(job_specs=job_specs, dry_run=dry_run)
        except Exception as e:
            logging.error(f'exception: {e}')
            session.commit()  # commit here, otherwise will be rolled back
Example #5
0
def run_app(arg_input):
    """Main function to run the Caliban app. Accepts a Namespace-type output of an
  argparse argument parser.

  """
    args = vars(arg_input)
    script_args = c.extract_script_args(args)

    command = args["command"]

    if command == "cluster":
        return gke.cli.run_cli_command(args)

    job_mode = cli.resolve_job_mode(args)
    docker_args = cli.generate_docker_args(job_mode, args)
    docker_run_args = args.get("docker_run_args", [])

    if command == "shell":
        mount_home = not args['bare']
        image_id = args.get("image_id")
        shell = args['shell']
        ps.run_interactive(job_mode,
                           image_id=image_id,
                           run_args=docker_run_args,
                           mount_home=mount_home,
                           shell=shell,
                           **docker_args)

    elif command == "notebook":
        port = args.get("port")
        lab = args.get("lab")
        version = args.get("jupyter_version")
        mount_home = not args['bare']
        pn.run_notebook(job_mode,
                        port=port,
                        lab=lab,
                        version=version,
                        run_args=docker_run_args,
                        mount_home=mount_home,
                        **docker_args)

    elif command == "build":
        package = args["module"]
        b.build_image(job_mode, package=package, **docker_args)

    elif command == 'status':
        caliban.history.cli.get_status(args)

    elif command == 'stop':
        caliban.history.cli.stop(args)

    elif command == 'resubmit':
        caliban.history.cli.resubmit(args)

    elif command == "run":
        dry_run = args["dry_run"]
        package = args["module"]
        image_id = args.get("image_id")
        exp_config = args.get("experiment_config")
        xgroup = args.get('xgroup')

        pr.run_experiments(job_mode,
                           run_args=docker_run_args,
                           script_args=script_args,
                           image_id=image_id,
                           experiment_config=exp_config,
                           dry_run=dry_run,
                           package=package,
                           xgroup=xgroup,
                           **docker_args)

    elif command == "cloud":
        project_id = c.extract_project_id(args)
        region = c.extract_region(args)
        cloud_key = c.extract_cloud_key(args)

        dry_run = args["dry_run"]
        package = args["module"]
        job_name = args.get("name")
        gpu_spec = args.get("gpu_spec")
        tpu_spec = args.get("tpu_spec")
        image_tag = args.get("image_tag")
        machine_type = args.get("machine_type")
        exp_config = args.get("experiment_config")
        labels = cu.sanitize_labels(args.get("label") or [])
        xgroup = args.get('xgroup')

        # Arguments to internally build the image required to submit to Cloud.
        docker_m = {"job_mode": job_mode, "package": package, **docker_args}

        cloud.submit_ml_job(
            job_mode=job_mode,
            docker_args=docker_m,
            region=region,
            project_id=project_id,
            credentials_path=cloud_key,
            dry_run=dry_run,
            job_name=job_name,
            machine_type=machine_type,
            gpu_spec=gpu_spec,
            tpu_spec=tpu_spec,
            image_tag=image_tag,
            labels=labels,
            script_args=script_args,
            experiment_config=exp_config,
            xgroup=xgroup,
        )
    else:
        logging.info("Unknown command: {}".format(command))
        sys.exit(1)