Ejemplo n.º 1
0
def get_self_paced_courses(cfg: Dict[str, str]) -> Sequence[str]:
    """
    Find the names of all self-paced courses by querying the local Git repo
    clone.

    :param cfg  the loaded config. COURSE_REPO and SELF_PACED_PATH must be
                set

    :return: the names of all self-paced courses (as simple directory names)
    """

    self_paced_path = cfg['SELF_PACED_PATH']

    for rel_path in self_paced_path.split(':'):
        self_paced_dir = os.path.join(cfg['COURSE_REPO'], rel_path)
        if not os.path.isdir(self_paced_dir):
            debug(f'Directory "{self_paced_dir}" (in SELF_PACED_PATH) ' +
                  'does not exist.')
            continue

        for f in os.listdir(self_paced_dir):
            if f[0] == '.':
                continue
            full_path = os.path.join(self_paced_dir, f)

            if not os.path.isdir(full_path):
                continue
            for course_file in os.listdir(full_path):
                p = os.path.join(full_path, course_file)
                if os.path.isdir(p):
                    continue
                _, ext = os.path.splitext(course_file)
                if course_file.startswith("build") and ext == '.yaml':
                    yield f
                    break
Ejemplo n.º 2
0
def _parse_source_notebook(path: str, encoding: str) -> Notebook:
    """
    Parse a Databricks source notebook into a Notebook object.

    :param path:     the path to the notebook
    :param encoding: the encoding to use when reading the file

    :returns: a parsed Notebook object

    :raises NotebookParseError: if the notebook cannot be parsed
    :raises NotebookError:      other errors (e.g., invalid file type)
    """
    language = NotebookLanguage.from_path(path)
    comment_string = COMMENT_STRINGS[language]

    leading_comment = _leading_comment_pattern(comment_string)
    header = _notebook_header_re(comment_string)
    magic = re.compile(r"{}\s+MAGIC\s?([^\s]*)(.*)$".format(leading_comment))
    new_cell = re.compile(r"{}\s+COMMAND\s+-+.*$".format(leading_comment))

    def check_for_header(line):
        if not header.search(line):
            raise NotebookParseError(
                f'File "{path}" is missing expected Databricks header')

    cells = []
    cur_cell = EmptyCell
    command_buf = []

    lines = _read_notebook(path, encoding)
    if len(lines) == 0:
        raise NotebookParseError(f'File "{path}" is empty.')

    saw_new_cell = False
    check_for_header(lines[0])
    saw_new_cell = True
    skip_next = False
    for i, line in enumerate(lines[1:]):
        line_num = i + 2  # account for skipped header

        if skip_next:
            debug(f'"{path}", line {line_num}: Skipping...')
            skip_next = False
            continue

        # If this line matches the start of a new cell marker, save the
        # existing cell and reset all the variables.
        if new_cell.search(line):
            debug(f'"{path}", line {line_num}: New command')
            if cur_cell != EmptyCell:
                # The last line of any cell should be blank and should
                # be removed, as it is really just a separator before the
                # marker starting a new cell.
                if len(command_buf[-1].strip()) == 0:
                    command_buf = command_buf[:-1]
                cells.append(
                    dataclasses.replace(cur_cell,
                                        command="\n".join(command_buf)))
            cur_cell = EmptyCell
            saw_new_cell = True
            skip_next = True
            command_buf = []
            continue

        # Is this cell a "MAGIC" cell? If so, extract the contents without
        # the leading MAGIC indicator.
        m = magic.search(line)
        if m:
            line = f"{m.group(1)}{m.group(2)}"

        # If we didn't see the new cell marker, then keep accumulating the
        # current cell and move on to the next line.
        if not saw_new_cell:
            debug(f'"{path}", line {line_num}: Not first line')
            command_buf.append(line)
            continue

        # Start of a new cell requires additional processing.
        saw_new_cell = False

        if not m:
            # Not a magic line. It is, therefore, a code cell of the same
            # type as the base language of the notebook.
            debug(f"{path}, line {line_num}: No magic")
            command_buf.append(line)
            cur_cell = dataclasses.replace(
                cur_cell,
                cell_type=CellType.from_language(language),
                marked_magic=False)
            continue

        # Magic line as first line in cell. If it's an empty magic line, skip it.
        token = m.group(1).strip()
        if not token:
            debug(f'"{path}", line {line_num}: Skipping empty magic.')
            continue

        # Extract cell type, if it exists.
        debug(f'"{path}", line {line_num}: Magic')
        if (not token) or (token[0] != "%"):
            raise NotebookParseError(
                f'"{path}", line {line_num}: Bad first magic cell line: {line}'
            )

        command_buf.append(line)
        cur_cell = dataclasses.replace(
            cur_cell,
            cell_type=CellType.from_string(token),
        )

    # If there's an unfinished cell left, finish it.
    if cur_cell != EmptyCell:
        cells.append(
            dataclasses.replace(cur_cell, command="\n".join(command_buf)))

    cells = [
        dataclasses.replace(cell, position=i + 1, guid=uuid.uuid4())
        for i, cell in enumerate(cells)
    ]
    return Notebook(cells=cells, path=path)