Ejemplo n.º 1
0
def deploy_images(cfg: Dict[str, str]) -> NoReturn:
    """
    Deploy the images for a course to the appropriate S3 location.

    STUB. NOT CURRENTLY IMPLEMENTED.

    :param cfg: the loaded configuration

    :return: Nothing
    """
    warn("'deploy-images' is not yet implemented.")
Ejemplo n.º 2
0
def run_command_on_notebooks(cfg: Dict[str, str], command: str,
                             args: Sequence[str]) -> NoReturn:
    """
    Runs a command on every notebook in the current course.

    :param cfg:      the loaded configuration.
    :param command:  the command to run
    :param args:     any command arguments, as a list

    :return: Nothing
    """
    check_config(cfg, 'COURSE_NAME', 'COURSE_REPO')
    for nb in bdc.bdc_get_notebook_paths(build_file_path(cfg)):
        if args:
            quoted = ' '.join([quote_shell_arg(arg) for arg in args])
            shell_command = f'{command} {quoted} {nb}'
        else:
            shell_command = f'{command} {nb}'

        try:
            cmd(shell_command)
        except CourseError as e:
            warn(str(e))
Ejemplo n.º 3
0
def import_dbcs(cfg: Dict[str, str], build_dir: str,
                build_file: str) -> NoReturn:
    """
    Find all DBC files under the build output directory for the current course,
    and upload them (import them) into the Databricks instance.

    :param cfg:       The config. COURSE_NAME, COURSE_REMOTE_TARGET, and
                      DB_PROFILE are assumed to be set.
    :param build_dir: The path to the build directory.

    :return: NOthing
    """
    check_config(cfg)
    remote_target = cfg['COURSE_REMOTE_TARGET']
    db_profile = cfg['DB_PROFILE']

    def import_dbc(dbc: str, build: bdc.BuildData) -> NoReturn:
        '''
        Import a single DBC.

        Assumes (a) the working directory is the build directory, and
        (b) that the remote target path has already been created.
        '''
        w = databricks.Workspace(profile=db_profile)
        if build.has_profiles:
            parent_subpath = os.path.dirname(dbc)
            dir_to_make = f'{remote_target}/{os.path.dirname(parent_subpath)}'
            w.mkdirs(dir_to_make)
            remote_path = f'{remote_target}/{parent_subpath}'
        else:
            remote_path = remote_target

        info(f'Importing "{dbc}" to "{remote_path}"...')
        w.import_dbc(dbc, remote_path)

    # Get the build information. We'll need it later.
    build = bdc.bdc_load_build(build_file)

    print(
        f'Importing all DBCs under "{build_dir}" to remote "{remote_target}"')
    dbcs = []
    with working_directory(build_dir) as pwd:
        for dirpath, _, filenames in os.walk('.'):
            for filename in filenames:
                _, ext = os.path.splitext(filename)
                if ext != '.dbc':
                    continue
                dbcs.append(os.path.normpath(os.path.join(dirpath, filename)))

        if not dbcs:
            warn('No DBCs found.')
        else:
            clean(cfg)
            w = databricks.Workspace(profile=db_profile)
            # If we're doing a profile-based build, create the remote target.
            # The import operations will implicitly create the remote
            # subfolders. However, if we're not doing profile-based builds,
            # then creating the remote target ahead of time will cause the
            # import to fail, so don't do that.
            if build.has_profiles:
                w.mkdirs(remote_target)

            for dbc in dbcs:
                info(f'\nIn "{pwd}":')
                import_dbc(dbc, build)
Ejemplo n.º 4
0
def load_config(config_path: str,
                apply_defaults: bool = True,
                show_warnings: bool = False) -> Dict[str, str]:
    """
    Load the configuration file.

    :param config_path:    path to the configuration file
    :param apply_defaults: If True (default), apply all known default values.
                           If False, just return what's in the config file.
    :param show_warnings:  Warn about some things. Generally only desirable
                           at program startup.

    :return: A dictionary of configuration items
    """
    bad = False
    comment = re.compile("^\s*#.*$")
    cfg = {}
    parent_dir = os.path.dirname(config_path)
    if os.path.isfile(parent_dir):
        raise CourseError(
            f'''"{parent_dir}" already exists, but it isn't a directory.''')
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    if os.path.exists(config_path):
        with open(config_path) as f:
            for (i, line) in enumerate([l.rstrip() for l in f.readlines()]):
                lno = i + 1
                if len(line.strip()) == 0:
                    continue
                if comment.search(line):
                    continue
                fields = line.split('=')
                if len(fields) != 2:
                    bad = True
                    error(f'"{config_path}", line {lno}: Malformed line')
                    continue

                cfg[fields[0]] = fields[1]

        if bad:
            raise CourseError("Configuration error(s).")

    setting_keys_and_defaults = (
        # The second item in each tuple is a default value. The third item
        # indicates whether it can be overridden in the configuration or
        # the environment.
        #
        # The default is treated as a Python string template, so it can
        # substitute values from previous entries in the list. If the default
        # value is None, that generally means it can be overridden on the
        # command line (or depends on something else that can be), so it's
        # checked at runtime.
        ('DB_CONFIG_PATH', DB_CONFIG_PATH_DEFAULT, True),
        ('DB_PROFILE', DB_PROFILE_DEFAULT, True),
        ('DB_SHARD_HOME', None, True),
        ('PREFIX', None, True),  # set later
        ('COURSE_NAME', None, True),  # can be overridden
        ('COURSE_REPO', COURSE_REPO_DEFAULT, True),
        ('COURSE_HOME', None, False),  # depends on COURSE_NAME
        ('COURSE_YAML', None, True),
        ('COURSE_MODULES', None, False),  # depends on COURSE_NAME
        ('COURSE_REMOTE_SOURCE', None, False),  # depends on COURSE_NAME
        ('COURSE_REMOTE_TARGET', None, False),  # depends on COURSE_NAME
        ('COURSE_AWS_PROFILE', AWS_PROFILE_DEFAULT, True),
        ('SELF_PACED_PATH', SELF_PACED_PATH_DEFAULT, True),
        ('SOURCE', SOURCE_DEFAULT, True),
        ('TARGET', TARGET_DEFAULT, True),
        ('EDITOR', EDITOR_DEFAULT, True),
        ('PAGER', PAGER_DEFAULT, True),
        ('OPEN_DIR', OPEN_DIR_DEFAULT, True),
    )

    # Remove anything that cannot be overridden.

    for e, default, allow_override in setting_keys_and_defaults:
        if (default is not None):
            continue

        v = cfg.get(e)
        if not v:
            continue

        if not allow_override:
            if show_warnings:
                warn(f'Ignoring "{e}" in the configuration file, because ' +
                     "it's calculated at run-time.")
            del cfg[e]

    if apply_defaults:
        # Apply environment overrides. Then, check for missing ones where
        # appropriate, and apply defaults.
        for e, default, _ in setting_keys_and_defaults:
            v = os.environ.get(e)
            if v is not None and ("FORCE_" + e) not in cfg:
                cfg[e] = v

            if not cfg.get(e) and default:
                t = StringTemplate(default)
                cfg[e] = t.substitute(cfg)

    return cfg