Esempio n. 1
0
def test_walk_directory(siteroot: SiteRoot) -> None:

    file1 = siteroot.write_text(siteroot.contents / "dir1/file1", "")
    siteroot.write_text(siteroot.contents / "dir1/file1.props.yml",
                        "name: value")
    file2 = siteroot.write_text(siteroot.contents / "dir1/dir2/file2", "")
    siteroot.write_text(siteroot.contents / "dir1/file3.bak", "")

    results = loader.walk_directory(siteroot.contents, set(["*.bak"]))

    all = sorted(results, key=lambda d: str(d.srcpath))
    assert len(all) == 2

    assert all[0] == ContentSrc(
        srcpath=str(file2),
        contentpath=(
            (
                "dir1",
                "dir2",
            ),
            "file2",
        ),
        package="",
        metadata={},
        mtime=all[0].mtime,
    )
    assert all[1] == ContentSrc(
        srcpath=str(file1),
        contentpath=(("dir1", ), "file1"),
        package="",
        metadata={"name": "value"},
        mtime=all[1].mtime,
    )
Esempio n. 2
0
def split_cells(
    src: ContentSrc,
    cells: List[Dict[str,
                     Any]]) -> List[Tuple[ContentSrc, List[Dict[str, Any]]]]:
    if not cells:
        return [(src, cells)]

    cell = cells[0]
    filename = get_cellfilename(cell)
    if not filename:
        return [(src, cells)]

    ret = [(src.copy()._replace(contentpath=(src.contentpath[0], filename)),
            [cell])]

    for cell in cells[1:]:
        filename = get_cellfilename(cell)
        if not filename:
            ret[-1][-1].append(cell)
        else:
            subsrc = src.copy()._replace(contentpath=(src.contentpath[0],
                                                      filename))
            ret.append((subsrc, [cell]))

    return ret
Esempio n. 3
0
def yamlloader(site: site.Site, src: ContentSrc) -> Sequence[Tuple[ContentSrc, None]]:
    text = src.read_bytes()
    metadata = yaml.load(text, Loader=yaml.FullLoader) or {}
    if not isinstance(metadata, (dict, list, tuple)):
        logger.error(f"Error: {src.repr_filename()} is not valid YAML file.")

    if "type" not in metadata:
        metadata["type"] = "config"

    src.metadata.update(metadata)
    src.metadata["loader"] = "yaml"

    return [(src, None)]
Esempio n. 4
0
def walk_package(package: str, path: str, ignores: Set[str]) -> Iterator[ContentSrc]:
    logger.info(f"Loading {package}/{path}")

    if not path.endswith("/"):
        path = path + "/"

    packagepath = importlib_resources.files(package)  # type: ignore
    root = packagepath / path
    if not root.is_dir():
        return

    for srcpath in _iter_package_files(root, ignores):
        destname = posixpath.relpath(srcpath, root)

        dirname, fname = posixpath.split(posixpath.relpath(srcpath, packagepath))

        metadatapath = (
            srcpath.parent / f"{srcpath.name}{miyadaiku.METADATA_FILE_SUFFIX}"
        )

        if metadatapath.exists():
            text = metadatapath.read_bytes()
            metadata = yaml.load(text, Loader=yaml.FullLoader) or {}
        else:
            metadata = {}

        yield ContentSrc(
            package=package,
            srcpath=str(posixpath.relpath(srcpath, packagepath)),
            metadata=metadata,
            contentpath=to_contentpath(destname),
            mtime=None,
        )
Esempio n. 5
0
def loadfile(
    site: site.Site, src: ContentSrc, bin: bool, filecache: shelve.DbfilenameShelf
) -> List[Tuple[ContentSrc, Optional[bytes]]]:

    curstat = src.stat()

    key = f"{src.package}_::::_{src.srcpath}"

    stat, bodies = filecache.get(key, (None, None))
    if stat:
        if stat == curstat:
            return cast(List[Tuple[ContentSrc, Optional[bytes]]], bodies)

    if not bin:
        assert src.srcpath
        ext = os.path.splitext(src.srcpath)[1]
        loader = FILELOADERS.get(ext, binloader)
    else:
        loader = binloader

    ret: List[Tuple[ContentSrc, Optional[bytes]]] = []
    for contentsrc, body in loader(site, src):
        assert contentsrc.metadata["loader"]

        if isinstance(body, bytes):
            ret.append((contentsrc, body))
        if isinstance(body, str):
            ret.append((contentsrc, body.encode("utf-8")))
        else:
            ret.append((contentsrc, None))

    filecache[key] = curstat, ret
    return ret
Esempio n. 6
0
def load(src: ContentSrc) -> List[Tuple[ContentSrc, str]]:
    if src.package:
        s = src.read_text()
        metadata, body = _load_string(s)
    else:
        assert src.srcpath
        metadata, body = _load_file(src.srcpath)

    src.metadata.update(metadata)
    return [(src, body)]
Esempio n. 7
0
def load(src: ContentSrc) -> List[Tuple[ContentSrc, str]]:
    s = src.read_text()

    ret = []
    srces = parsesrc.splitsrc(src, s)
    for src, txt in srces:
        meta, html = _load_string(src, txt)
        src.metadata.update(meta)
        ret.append((src, html))

    return ret
Esempio n. 8
0
def test_walkpackage() -> None:
    results = loader.walk_package("package1", "contents", {"*.bak", ".*"})
    all = sorted(results, key=lambda d: str(d.srcpath))

    assert len(all) == 7
    assert all[0] == ContentSrc(
        package="package1",
        srcpath="contents/dir1/a",
        contentpath=(("dir1", ), "a"),
        metadata={"test": "value"},
        mtime=all[0].mtime,
    )
Esempio n. 9
0
    def add_bytes(self, type: str, path: str, body: bytes) -> Content:
        cpath = to_contentpath(path)

        contentsrc = ContentSrc(
            package=None,
            srcpath=None,
            metadata={"type": type},
            contentpath=cpath,
            mtime=0,
        )

        content = contents.build_content(contentsrc, body)
        self._contentfiles[content.src.contentpath] = content
        return content
Esempio n. 10
0
def test_package(siteroot: SiteRoot) -> None:
    site = siteroot.load({}, {})
    ipynb.init(site)

    contentsrc = ContentSrc(
        package="pkg_ipynb",
        srcpath="files/test.ipynb",
        metadata={},
        contentpath=((), "test.html"),
        mtime=0,
    )

    ((src, text), ) = ipynb.load(contentsrc)
    assert src.metadata["type"] == "article"
    print(text)
Esempio n. 11
0
def test_split(siteroot: SiteRoot) -> None:
    site = siteroot.load({}, {})
    ipynb.init(site)

    contentsrc = ContentSrc(
        package=None,
        srcpath=str(DIR / "test_splitsrc.ipynb"),
        metadata={},
        contentpath=((), "test_splitsrc.ipynb"),
        mtime=0,
    )

    (
        (src1, text1),
        (src2, text2),
    ) = ipynb.load(contentsrc)

    assert src1.contentpath == ((), "file1")
    soup = BeautifulSoup(text1, "html.parser")
    print(soup.text)
    assert "%%%" not in soup.text
    assert "1+1" in soup.text
    assert "2+2" in soup.text

    assert "meta" not in soup.text
    assert "test1" in soup.text

    assert src1.metadata == {
        "type": "article",
        "meta": "value1",
        "has_jinja": True,
        "loader": "ipynb",
    }

    assert src2.contentpath == ((), "file2")
    soup = BeautifulSoup(text2, "html.parser")
    print(soup.text)
    assert "%%%" not in soup.text
    assert "3+3" in soup.text

    assert "meta" not in soup.text
    assert src2.metadata == {
        "type": "article",
        "meta": "value2",
        "has_jinja": True,
        "loader": "ipynb",
    }
Esempio n. 12
0
def test_load(siteroot: SiteRoot) -> None:
    site = siteroot.load({}, {})
    ipynb.init(site)

    contentsrc = ContentSrc(
        package=None,
        srcpath=str(DIR / "test.ipynb"),
        metadata={},
        contentpath=((), "test.html"),
        mtime=0,
    )

    ((src, text), ) = ipynb.load(contentsrc)
    assert src.metadata["type"] == "article"
    assert "{{ 1+1 }}" in text
    assert "<p>{{ 1+1 }}</p>" not in text
    assert "hidden cell" not in text
Esempio n. 13
0
def walk_directory(path: Path, ignores: Set[str]) -> Iterator[ContentSrc]:
    logger.info(f"Loading {path}")
    path = path.expanduser().resolve()
    if not path.is_dir():
        return

    for root, dirs, files in os.walk(path):
        rootpath = Path(root)
        if rootpath.stem.startswith("."):
            continue

        dirs[:] = (dirname for dirname in dirs if not is_ignored(ignores, dirname))
        filenames = (
            filename for filename in files if not is_ignored(ignores, filename)
        )

        for name in filenames:
            filename = (rootpath / name).resolve()

            dirname, fname = os.path.split(filename)
            metadatafile = os.path.join(
                dirname, f"{fname}{miyadaiku.METADATA_FILE_SUFFIX}"
            )

            metadata: Dict[Any, Any] = {}

            if os.path.isfile(metadatafile):
                text = open(metadatafile, encoding=miyadaiku.YAML_ENCODING).read()
                metadata = yaml.load(text, Loader=yaml.FullLoader) or {}

            mtime = filename.stat().st_mtime
            yield ContentSrc(
                package="",
                srcpath=str(filename),
                metadata=metadata,
                contentpath=to_contentpath(str(filename.relative_to(path))),
                mtime=mtime,
            )
Esempio n. 14
0
def to_contentsrc(path: pathlib.Path) -> ContentSrc:
    return ContentSrc(package="",
                      srcpath=str(path),
                      metadata={},
                      contentpath=((), path.name),
                      mtime=0)
Esempio n. 15
0
def load(site: site.Site, src: ContentSrc) -> List[Tuple[ContentSrc, str]]:
    meta, html = _load_string(src.read_text())
    src.metadata.update(meta)
    return [(src, html)]
Esempio n. 16
0
def load(src: ContentSrc) -> List[Tuple[ContentSrc, str]]:
    s = src.read_text()
    json = nbformat.reads(s, nbformat.current_nbformat)

    cells = split_cells(src, json.get("cells", []))
    ret = []
    for subsrc, subcells in cells:
        subjson = copy.deepcopy(json)
        cellmeta: Dict[str, Any] = {}
        if subcells:
            top = subcells[0]
            if top.get("cell_type", "") in ("markdown", "raw"):
                srcstr = top.get("source", "")
                if srcstr:
                    cellmeta, srcstr = parsesrc.split_yaml(srcstr, "---")
                    top["source"] = srcstr

        # remove raw cells
        newcells = [c for c in subcells if c.get("cell_type", "") != "raw"]

        jinjatags = {}
        idx = 0

        def conv_jinjatag(s: str) -> str:
            nonlocal idx
            idx += 1
            digest = hashlib.md5(s.encode("utf-8")).hexdigest() + str(idx)
            jinjatags[digest] = s
            return digest

        # save jinja tag
        if cellmeta.get("has_jinja", True):
            for cell in subcells:
                if cell.get("cell_type", "") == "markdown":
                    newsrc = parsesrc.replace_jinjatag(cell.get("source", ""),
                                                       conv_jinjatag)
                    cell["source"] = newsrc

        # remove empty cells at bottom
        while len(newcells) > 1:
            c = newcells[-1]
            celltype = c.get("cell_type", "")

            if celltype == "markdown":
                if c["source"].strip():
                    break

            elif celltype == "code":
                if c["source"].strip() or c["outputs"]:
                    break

            else:
                break

            # remove cell
            del newcells[-1]

        subjson["cells"] = newcells

        meta, html = _export(
            subjson,
            cellmeta.get("nbconvert_template", None),
            cellmeta.get("nbconvert_templatefile", None),
        )
        meta.update(cellmeta)

        subsrc.metadata.update(meta)

        # restore jinja tag
        html = html.translate({ord("{"): "&#123;", ord("}"): "&#125;"})
        for hash, s in jinjatags.items():
            html = re.sub(rf"(<p>\s*{hash}\s*</p>)|{hash}", s, html, 1)

        ret.append((subsrc, html))

    return ret
Esempio n. 17
0
def binloader(
    site: site.Site, src: ContentSrc
) -> Sequence[Tuple[ContentSrc, Optional[str]]]:
    src.metadata["type"] = "binary"
    src.metadata["loader"] = "binary"
    return [(src, None)]