def deploy_images(cfg: Dict[str, str]) -> NoReturn: """ Deploy the images for a course to the appropriate S3 location. STUB. NOT CURRENTLY IMPLEMENTED. :param cfg: the loaded configuration :return: Nothing """ warn("'deploy-images' is not yet implemented.")
def run_command_on_notebooks(cfg: Dict[str, str], command: str, args: Sequence[str]) -> NoReturn: """ Runs a command on every notebook in the current course. :param cfg: the loaded configuration. :param command: the command to run :param args: any command arguments, as a list :return: Nothing """ check_config(cfg, 'COURSE_NAME', 'COURSE_REPO') for nb in bdc.bdc_get_notebook_paths(build_file_path(cfg)): if args: quoted = ' '.join([quote_shell_arg(arg) for arg in args]) shell_command = f'{command} {quoted} {nb}' else: shell_command = f'{command} {nb}' try: cmd(shell_command) except CourseError as e: warn(str(e))
def import_dbcs(cfg: Dict[str, str], build_dir: str, build_file: str) -> NoReturn: """ Find all DBC files under the build output directory for the current course, and upload them (import them) into the Databricks instance. :param cfg: The config. COURSE_NAME, COURSE_REMOTE_TARGET, and DB_PROFILE are assumed to be set. :param build_dir: The path to the build directory. :return: NOthing """ check_config(cfg) remote_target = cfg['COURSE_REMOTE_TARGET'] db_profile = cfg['DB_PROFILE'] def import_dbc(dbc: str, build: bdc.BuildData) -> NoReturn: ''' Import a single DBC. Assumes (a) the working directory is the build directory, and (b) that the remote target path has already been created. ''' w = databricks.Workspace(profile=db_profile) if build.has_profiles: parent_subpath = os.path.dirname(dbc) dir_to_make = f'{remote_target}/{os.path.dirname(parent_subpath)}' w.mkdirs(dir_to_make) remote_path = f'{remote_target}/{parent_subpath}' else: remote_path = remote_target info(f'Importing "{dbc}" to "{remote_path}"...') w.import_dbc(dbc, remote_path) # Get the build information. We'll need it later. build = bdc.bdc_load_build(build_file) print( f'Importing all DBCs under "{build_dir}" to remote "{remote_target}"') dbcs = [] with working_directory(build_dir) as pwd: for dirpath, _, filenames in os.walk('.'): for filename in filenames: _, ext = os.path.splitext(filename) if ext != '.dbc': continue dbcs.append(os.path.normpath(os.path.join(dirpath, filename))) if not dbcs: warn('No DBCs found.') else: clean(cfg) w = databricks.Workspace(profile=db_profile) # If we're doing a profile-based build, create the remote target. # The import operations will implicitly create the remote # subfolders. However, if we're not doing profile-based builds, # then creating the remote target ahead of time will cause the # import to fail, so don't do that. if build.has_profiles: w.mkdirs(remote_target) for dbc in dbcs: info(f'\nIn "{pwd}":') import_dbc(dbc, build)
def load_config(config_path: str, apply_defaults: bool = True, show_warnings: bool = False) -> Dict[str, str]: """ Load the configuration file. :param config_path: path to the configuration file :param apply_defaults: If True (default), apply all known default values. If False, just return what's in the config file. :param show_warnings: Warn about some things. Generally only desirable at program startup. :return: A dictionary of configuration items """ bad = False comment = re.compile("^\s*#.*$") cfg = {} parent_dir = os.path.dirname(config_path) if os.path.isfile(parent_dir): raise CourseError( f'''"{parent_dir}" already exists, but it isn't a directory.''') if not os.path.exists(parent_dir): os.makedirs(parent_dir) if os.path.exists(config_path): with open(config_path) as f: for (i, line) in enumerate([l.rstrip() for l in f.readlines()]): lno = i + 1 if len(line.strip()) == 0: continue if comment.search(line): continue fields = line.split('=') if len(fields) != 2: bad = True error(f'"{config_path}", line {lno}: Malformed line') continue cfg[fields[0]] = fields[1] if bad: raise CourseError("Configuration error(s).") setting_keys_and_defaults = ( # The second item in each tuple is a default value. The third item # indicates whether it can be overridden in the configuration or # the environment. # # The default is treated as a Python string template, so it can # substitute values from previous entries in the list. If the default # value is None, that generally means it can be overridden on the # command line (or depends on something else that can be), so it's # checked at runtime. ('DB_CONFIG_PATH', DB_CONFIG_PATH_DEFAULT, True), ('DB_PROFILE', DB_PROFILE_DEFAULT, True), ('DB_SHARD_HOME', None, True), ('PREFIX', None, True), # set later ('COURSE_NAME', None, True), # can be overridden ('COURSE_REPO', COURSE_REPO_DEFAULT, True), ('COURSE_HOME', None, False), # depends on COURSE_NAME ('COURSE_YAML', None, True), ('COURSE_MODULES', None, False), # depends on COURSE_NAME ('COURSE_REMOTE_SOURCE', None, False), # depends on COURSE_NAME ('COURSE_REMOTE_TARGET', None, False), # depends on COURSE_NAME ('COURSE_AWS_PROFILE', AWS_PROFILE_DEFAULT, True), ('SELF_PACED_PATH', SELF_PACED_PATH_DEFAULT, True), ('SOURCE', SOURCE_DEFAULT, True), ('TARGET', TARGET_DEFAULT, True), ('EDITOR', EDITOR_DEFAULT, True), ('PAGER', PAGER_DEFAULT, True), ('OPEN_DIR', OPEN_DIR_DEFAULT, True), ) # Remove anything that cannot be overridden. for e, default, allow_override in setting_keys_and_defaults: if (default is not None): continue v = cfg.get(e) if not v: continue if not allow_override: if show_warnings: warn(f'Ignoring "{e}" in the configuration file, because ' + "it's calculated at run-time.") del cfg[e] if apply_defaults: # Apply environment overrides. Then, check for missing ones where # appropriate, and apply defaults. for e, default, _ in setting_keys_and_defaults: v = os.environ.get(e) if v is not None and ("FORCE_" + e) not in cfg: cfg[e] = v if not cfg.get(e) and default: t = StringTemplate(default) cfg[e] = t.substitute(cfg) return cfg