def setup(): """Creates examples treebeard.yaml configuration file""" if Path("treebeard.yaml").is_file(): fatal_exit("π found existing treebeard.yaml file here") create_example_yaml() click.echo( "π created example treebeard.yaml, please update it for your project")
def get_treebeard_config() -> TreebeardConfig: notebook_config = "treebeard.yaml" if not os.path.exists(notebook_config): return TreebeardConfig() with open(notebook_config) as f: conf = yaml.load(f, Loader=yaml.FullLoader) if not conf: fatal_exit("treebeard.yaml config file exists but is empty") try: return TreebeardConfig(**conf) except ValidationError as e: # type: ignore fatal_exit(f"Error parsing treebeard.yaml\n{e.json()}")
def get_treebeard_config() -> TreebeardConfig: notebook_config = get_config_file_name() if not os.path.exists(notebook_config): return TreebeardConfig() with open(notebook_config) as f: conf = yaml.load(f, Loader=yaml.FullLoader) # type: ignore if not conf: return TreebeardConfig() try: return TreebeardConfig(**conf) except ValidationError as e: # type: ignore fatal_exit( f"Error parsing {notebook_config}\n{e.json()}") # type: ignore
def validate_notebook_directory(treebeard_env: TreebeardEnv, treebeard_config: TreebeardConfig): if treebeard_env.project_id is None: click.echo( click.style( "No account config detected! Please run treebeard configure", fg="red", ), err=True, ) if not os.path.exists(treebeard_config.notebook): fatal_exit( f"Cannot run non-existent notebook '{treebeard_config.notebook}', you should be in a project directory with a treebeard.yaml file" )
def status(): """Show the status of the current notebook""" validate_notebook_directory(treebeard_env, treebeard_config) response = requests.get(runner_endpoint, headers=treebeard_env.dict()) # type: ignore if response.status_code != 200: raise click.ClickException(f"Request failed: {response.text}") json_data = json.loads(response.text) if len(json_data) == 0: fatal_exit( "This notebook has not been run. Try running it with `treebeard run`" ) click.echo("π² Recent runs:\n") max_results = 5 status_emoji = { "SUCCESS": "β ", "QUEUED": "π€", "WORKING": "β³", "FAILURE": "β", "TIMEOUT": "β°", "CANCELLED": "π", } runs: List[Run] = [ Run.parse_obj(run) for run in json_data["runs"][-max_results:] ] # type: ignore for run in runs: now = parser.isoparse(datetime.datetime.utcnow().isoformat() + "Z") start_time = parser.isoparse(run.start_time) time_string: str = timeago_format(start_time, now=now) mechanism: str = run.trigger["mechanism"] ran_via = "" if len(mechanism) == 0 else f"via {mechanism}" try: branch = f"π{run.trigger['branch']}" except: branch = "" click.echo( f" {status_emoji[run.status]} {time_string} {ran_via} {branch} -- {run.url}" )
def validate_notebook_directory(treebeard_env: TreebeardEnv, treebeard_config: TreebeardConfig, upload: bool): if upload and treebeard_env.user_name is None: fatal_exit( "No account config detected! Please run `treebeard configure`") notebook_files = treebeard_config.get_deglobbed_notebooks() if not notebook_files: fatal_exit( "No notebooks found in project! Treebeard expects at least one.") for notebook in notebook_files: if not os.path.exists(notebook): fatal_exit( f"Cannot run non-existent notebook '{notebook}', you should be in a project directory with a treebeard.yaml file" )
def run( cli_context: CliContext, watch: bool, notebooks: List[str], ignore: List[str], local: bool, confirm: bool, push_secrets: bool, dockerless: bool, upload: bool, ): """ Run a notebook and optionally schedule it to run periodically """ notebooks = list(notebooks) ignore = list(ignore) validate_notebook_directory(treebeard_env, treebeard_config) # Apply cli config overrides treebeard_yaml_path: str = tempfile.mktemp() # type: ignore with open(treebeard_yaml_path, "w") as yaml_file: if notebooks: treebeard_config.notebooks = notebooks yaml.dump(treebeard_config.dict(), yaml_file) # type: ignore if dockerless: click.echo( f"π² Running locally without docker using your current python environment" ) if not confirm and not click.confirm( f"Warning: This will clear the outputs of your notebooks, continue?", default=True, ): sys.exit(0) # Note: import runtime.run causes win/darwin devices missing magic to fail at start import treebeard.runtime.run treebeard.runtime.run.start(upload_outputs=upload) # will sys.exit params = {} if treebeard_config.schedule: if confirm or click.confirm( f"π treebeard.yaml contains schedule '{treebeard_config.schedule}'. Enable it?" ): params["schedule"] = treebeard_config.schedule if (not local and len(treebeard_config.secret) > 0 and not confirm and not push_secrets): push_secrets = click.confirm("Push secrets first?", default=True) if push_secrets: push_secrets_to_store([], confirm=confirm) if treebeard_config: ignore += (treebeard_config.ignore + treebeard_config.secret + treebeard_config.output_dirs) click.echo("π² Copying project to tempdir and stripping notebooks") temp_dir = tempfile.mkdtemp() copy_tree(os.getcwd(), str(temp_dir), preserve_symlinks=1) notebooks_files = treebeard_config.get_deglobbed_notebooks() for notebooks_file in notebooks_files: try: subprocess.check_output(["nbstripout"] + notebooks_file, cwd=temp_dir) except: print(f"Failed to nbstripout {notebooks_file}! Is it valid?") click.echo(notebooks_files) click.echo("π² Compressing Repo") with tempfile.NamedTemporaryFile("wb", suffix=".tar.gz", delete=False) as src_archive: with tarfile.open(fileobj=src_archive, mode="w:gz") as tar: def zip_filter(info: tarfile.TarInfo): if info.name.endswith("treebeard.yaml"): return None for ignored in ignore: if info.name in glob.glob(ignored, recursive=True): return None # if len(git_files) > 0 and info.name not in git_files: # return None click.echo(f" Including {info.name}") return info tar.add( str(temp_dir), arcname=os.path.basename(os.path.sep), filter=zip_filter, ) tar.add(config_path, arcname=os.path.basename(config_path)) tar.add(treebeard_yaml_path, arcname="treebeard.yaml") if not confirm and not click.confirm("Confirm source file set is correct?", default=True): click.echo("Exiting") sys.exit() if local: build_tag = str(time.mktime(datetime.datetime.today().timetuple())) repo_image_name = f"gcr.io/treebeard-259315/projects/{project_id}/{sanitise_notebook_id(str(notebook_id))}:{build_tag}" click.echo(f"π² Building {repo_image_name} Locally\n") secrets_archive = get_secrets_archive() repo_url = f"file://{src_archive.name}" secrets_url = f"file://{secrets_archive.name}" status = run_repo( str(project_id), str(notebook_id), treebeard_env.run_id, build_tag, repo_url, secrets_url, branch="cli", local=True, ) click.echo(f"Local build exited with status code {status}") sys.exit(status) size = os.path.getsize(src_archive.name) max_upload_size = "100MB" if size > parse_size(max_upload_size): fatal_exit( click.style( (f"ERROR: Compressed notebook directory is {format_size(size)}," f" max upload size is {max_upload_size}. \nPlease ensure you ignore any virtualenv subdirectory" " using `treebeard run --ignore venv`"), fg="red", )) time_seconds = int(time.mktime(datetime.datetime.today().timetuple())) build_tag = str(time_seconds) upload_api = f"{api_url}/source_upload_url/{project_id}/{notebook_id}/{build_tag}" resp = requests.get(upload_api) # type: ignore signed_url: str = resp.text put_resp = requests.put( # type: ignore signed_url, open(src_archive.name, "rb"), headers={"Content-Type": "application/x-tar"}, ) assert put_resp.status_code == 200 if os.getenv("GITHUB_ACTIONS"): params["event"] = os.getenv("GITHUB_EVENT_NAME") params["sha"] = os.getenv("GITHUB_SHA") params["branch"] = os.getenv("GITHUB_REF").split("/")[-1] workflow = os.getenv("GITHUB_WORKFLOW") params["workflow"] = (workflow.replace(".yml", "").replace(".yaml", "").split("/")[-1]) click.echo(f"π² submitting archive to runner ({format_size(size)})...") submit_endpoint = f"{api_url}/runs/{treebeard_env.project_id}/{treebeard_env.notebook_id}/{build_tag}" response = requests.post( # type: ignore submit_endpoint, params=params, headers={ "api_key": treebeard_env.api_key, "email": treebeard_env.email }, ) shutil.rmtree(temp_dir) if response.status_code != 200: raise click.ClickException(f"Request failed: {response.text}") try: json_data = json.loads(response.text) click.echo(f"β¨ Run has been accepted! {json_data['admin_url']}") except: click.echo("β Request to run failed") click.echo(sys.exc_info()) if watch: build_result = None while not build_result: time.sleep(5) response = requests.get( runner_endpoint, headers=treebeard_env.dict()) # type: ignore json_data = json.loads(response.text) if len(json_data["runs"]) == 0: status = "FAILURE" else: status = json_data["runs"][-1]["status"] click.echo(f"{get_time()} Build status: {status}") if status == "SUCCESS": build_result = status click.echo(f"Build result: {build_result}") elif status in [ "FAILURE", "TIMEOUT", "INTERNAL_ERROR", "CANCELLED" ]: fatal_exit(f"Build failed")
if not local: click.echo("Image run failed, pushing failed image...") client.images.push(latest_image_name) raise ex if __name__ == "__main__": build_tag_key = "TREEBEARD_BUILD_TAG" repo_url_key = "TREEBEARD_REPO_URL" secrets_url_key = "TREEBEARD_SECRETS_URL" subprocess.run(["bash", "-c", "echo Building repo"]) build_tag = os.getenv(build_tag_key) if not build_tag: fatal_exit(f"No build_tag provided inside {build_tag_key}") repo_url = os.getenv(repo_url_key) if not repo_url: fatal_exit(f"No repo_url provided inside {repo_url_key}") secrets_url = os.getenv(secrets_url_key) if not treebeard_env.notebook_id: raise Exception("No notebook ID at runtime") if not treebeard_env.project_id: raise Exception("No project ID at buildtime") run_repo( treebeard_env.project_id, treebeard_env.notebook_id, treebeard_env.run_id,
def run(cli_context: CliContext, t: str, watch: bool, ignore: List[str], local: bool): """ Run a notebook and optionally schedule it to run periodically """ validate_notebook_directory(treebeard_env, treebeard_config) params = {} if t: params["schedule"] = t spinner: Any = Halo(text="π² Compressing Repo\n", spinner="dots") spinner.start() if treebeard_config: ignore += (treebeard_config.ignore + treebeard_config.secret + treebeard_config.output_dirs) # Create a temporary file for the compressed directory # compressed file accessible at f.name # git_files: Set[str] = set( # subprocess.check_output( # "git ls-files || exit 0", shell=True, stderr=subprocess.DEVNULL # ) # .decode() # .splitlines() # ) with tempfile.NamedTemporaryFile("wb", suffix=".tar.gz", delete=False) as src_archive: click.echo("\n") with tarfile.open(fileobj=src_archive, mode="w:gz") as tar: def zip_filter(info: tarfile.TarInfo): for ignored in ignore: if info.name in glob.glob(ignored): return None # if len(git_files) > 0 and info.name not in git_files: # return None click.echo(f" Including {info.name}") return info tar.add(os.getcwd(), arcname=os.path.basename(os.path.sep), filter=zip_filter) tar.add(config_path, arcname=os.path.basename(config_path)) size = os.path.getsize(src_archive.name) max_upload_size = "100MB" if size > parse_size(max_upload_size): fatal_exit( click.style( (f"ERROR: Compressed notebook directory is {format_size(size)}," f" max upload size is {max_upload_size}. \nPlease ensure you ignore any virtualenv subdirectory" " using `treebeard run --ignore venv`"), fg="red", )) if local: spinner.stop() build_tag = str(time.mktime(datetime.today().timetuple())) repo_image_name = ( f"gcr.io/treebeard-259315/projects/{project_id}/{notebook_id}:{build_tag}" ) click.echo(f"π² Building {repo_image_name} Locally\n") secrets_archive = get_secrets_archive() repo_url = f"file://{src_archive.name}" secrets_url = f"file://{secrets_archive.name}" run_repo( str(project_id), str(notebook_id), treebeard_env.run_id, build_tag, repo_url, secrets_url, local=True, ) sys.exit(0) spinner.text = "π² submitting notebook to runner\n" response = requests.post( notebooks_endpoint, files={"repo": open(src_archive.name, "rb")}, params=params, headers=treebeard_env.dict(), ) if response.status_code != 200: raise click.ClickException(f"Request failed: {response.text}") spinner.stop() try: json_data = json.loads(response.text) click.echo(f"β¨ Run has been accepted! {json_data['admin_url']}") except: click.echo("β Request to run failed") click.echo(sys.exc_info()) if watch: # spinner = Halo(text='watching build', spinner='dots') # spinner.start() build_result = None while not build_result: time.sleep(5) response = requests.get(notebooks_endpoint, headers=treebeard_env.dict()) json_data = json.loads(response.text) status = json_data["runs"][-1]["status"] click.echo(f"{get_time()} Build status: {status}") if status == "SUCCESS": build_result = status # spinner.stop() click.echo(f"Build result: {build_result}") elif status in [ "FAILURE", "TIMEOUT", "INTERNAL_ERROR", "CANCELLED" ]: fatal_exit(f"Build failed")
def build( treebeard_context: TreebeardContext, repo_temp_dir: str, envs_to_forward: List[str], upload: bool, usagelogging: bool, ) -> int: click.echo(f"π² Treebeard buildtime, building repo") click.echo(f" Running repo setup") repo_setup_nb = "treebeard/repo_setup.ipynb" treebeard_env = treebeard_context.treebeard_env if os.path.exists(repo_setup_nb): try: subprocess.check_output( f""" papermill \ --stdout-file /dev/stdout \ --stderr-file /dev/stdout \ --kernel python3 \ --no-progress-bar \ {repo_setup_nb} \ {repo_setup_nb} \ """, shell=True, ) except Exception: if usagelogging: tb_helper.update( treebeard_context, update_url=f"{api_url}/{treebeard_env.run_path}/log", status="FAILURE", ) if upload: tb_helper.upload_meta_nbs(treebeard_context) tb_helper.update( treebeard_context, update_url=f"{api_url}/{treebeard_env.run_path}/update", status="FAILURE", ) return 2 else: return 1 client: Any = docker.from_env() # type: ignore default_image_name = f"{tb_helper.sanitise_repo_short_name(treebeard_env.user_name)}/{tb_helper.sanitise_repo_short_name(treebeard_env.repo_short_name)}" image_name = default_image_name if "TREEBEARD_IMAGE_NAME" in os.environ: image_name = os.environ["TREEBEARD_IMAGE_NAME"] elif "DOCKER_REGISTRY_PREFIX" in os.environ: image_name = f"{os.environ['DOCKER_REGISTRY_PREFIX']}/{default_image_name}" assert image_name is not None click.echo(f"π³ Building {image_name}") use_docker_registry = ( "TREEBEARD_IMAGE_NAME" in os.environ or "DOCKER_REGISTRY_PREFIX" in os.environ or (os.getenv("DOCKER_USERNAME") and os.getenv("DOCKER_PASSWORD")) ) if use_docker_registry and not is_valid_docker_image_name(image_name): fatal_exit( "π³β the docker image name you supplied is invalid. It must be lower case, alphanumeric, with only - and _ special chars." ) if os.getenv("DOCKER_USERNAME") and os.getenv("DOCKER_PASSWORD"): click.echo( f"π³ Logging into DockerHub using the username and password you provided" ) subprocess.check_output( f"printenv DOCKER_PASSWORD | docker login -u {os.getenv('DOCKER_USERNAME')} --password-stdin", shell=True, ) treebeard_config = treebeard_context.treebeard_config workdir = os.getcwd() try: os.chdir(repo_temp_dir) if os.path.exists("treebeard/container_setup.ipynb"): helper.create_start_script(treebeard_config.treebeard_ref) if os.path.exists("treebeard/post_install.ipynb"): helper.create_post_build_script(treebeard_config.treebeard_ref) notebook_files = get_treebeard_config().get_deglobbed_notebooks() if len(notebook_files) == 0: raise Exception( f"No notebooks found to run (cwd {os.getcwd()}). If you are using a treebeard.yaml file, check it is correct: https://treebeard.readthedocs.io/en/latest/project_config.html" ) try: subprocess.check_output(["nbstripout"] + notebook_files) except: print( f"βFailed to nbstripout a notebook! Do you have an invalid .ipynb?\nNotebooks: {notebook_files}" ) finally: click.echo("Treebeard Bundle Contents:") subprocess.run(["pwd"]) subprocess.run(["ls", "-la", repo_temp_dir]) # Build image but don't run versioned_image_name = f"{image_name}:{treebeard_env.run_id}" passing_image_name = f"{image_name}:{treebeard_env.branch}" latest_image_name = f"{image_name}:{treebeard_env.branch}-latest" helper.fetch_image_for_cache(client, latest_image_name) r2d_user_id = "1000" try: helper.run_repo2docker( treebeard_env.user_name, r2d_user_id, versioned_image_name, latest_image_name, repo_temp_dir, ) click.echo(f"β¨ Successfully built {versioned_image_name}") except: click.echo(f"\n\nβ Failed to build container from the source repo") if usagelogging: tb_helper.update( treebeard_context, update_url=f"{api_url}/{treebeard_env.run_path}/log", status="FAILURE", ) if upload: tb_helper.upload_meta_nbs(treebeard_context) tb_helper.update( treebeard_context, update_url=f"{api_url}/{treebeard_env.run_path}/update", status="FAILURE", ) return 2 else: return 1 finally: os.chdir(workdir) shutil.rmtree(repo_temp_dir) helper.tag_image(versioned_image_name, latest_image_name) if use_docker_registry: try: helper.push_image(versioned_image_name) helper.push_image(latest_image_name) except Exception: click.echo( f"π³β Failed to push image, will try again on success\n{format_exc()}" ) else: click.echo(f"π³ Not pushing docker image as no registry is configured.") click.echo(f"Image built successfully, now running.") status = helper.run_image( versioned_image_name, envs_to_forward, upload, usagelogging, treebeard_context, ) if status != 0: click.echo(f"Image run failed, not updated {passing_image_name}") return status helper.tag_image(versioned_image_name, passing_image_name) if use_docker_registry: helper.push_image(passing_image_name) return 0
import docker # type: ignore from treebeard.buildtime.helper import run_image from treebeard.conf import treebeard_env from treebeard.util import fatal_exit def run(project_id: str, notebook_id: str, run_id: str, image_name: str): client: Any = docker.from_env() # type: ignore client.images.pull(image_name) run_image(project_id, notebook_id, run_id, image_name) if __name__ == "__main__": if not treebeard_env.notebook_id: raise Exception("No notebook ID at buildtime") if not treebeard_env.project_id: raise Exception("No project ID at buildtime") image_name_key = "TREEBEARD_IMAGE_NAME" image_name = os.getenv(image_name_key) if not image_name: fatal_exit(f"No image supplied under {image_name_key}") run( treebeard_env.project_id, treebeard_env.notebook_id, treebeard_env.run_id, image_name, )