Exemple #1
0
def test_content_finder_specific_url_replacements(url,
                                                  content_relative_dirpath,
                                                  filename):
    content = WebsiteContentFactory.build(
        website=WebsiteFactory.build(uuid="website_uuid"),
        dirpath=f"content{content_relative_dirpath}",
        filename=filename,
        text_id="content-uuid",
    )

    with patch_website_contents_all([content]):
        content_lookup = ContentLookup()

        assert content_lookup.find_within_site("website_uuid", url) == content
Exemple #2
0
def test_content_finder_returns_metadata_for_site(site_uuid, content_index):
    contents = [
        WebsiteContentFactory.build(
            website=WebsiteFactory.build(uuid="website_one"),
            type="sitemetadata",
            text_id="content-1",
        ),
        WebsiteContentFactory.build(
            website=WebsiteFactory.build(uuid="website_two"),
            type="sitemetadata",
            text_id="content-2",
        ),
    ]
    with patch_website_contents_all(contents):
        content_lookup = ContentLookup()
        assert (content_lookup.find_within_site(
            site_uuid, "/") == contents[content_index])
Exemple #3
0
class ValidateUrls(RegexpCleanupRule):
    """
    This rule never changes anything. Its intent is to find links and validate
    that they work.
    """

    regex = (
        # Do not try to capture link text, else we'll miss images inside links
        # because the regex matches will overlap
        r"\\?\]"  # title closing "]" (or "\]")
        + r"\("  # url open
        + r"(?P<url>[^\s]*?)"  # capture the url
        + r"(\s\"(?P<title>.*?)\")?"  # capture optional title
        + r"\)"  # url close
    )

    alias = "validate_urls"

    fields = [
        "markdown",
        "metadata.related_resources_text",
        "metadata.image_metadata.caption",
        "metadata.image_metadata.credit",
        "metadata.optional_text",
        "metadata.description",
        "metadata.course_description",
    ]

    @dataclass
    class ReplacementNotes:
        link_type: str
        url_path: str
        links_to_course: str = ""

    def __init__(self) -> None:
        super().__init__()
        self.content_lookup = ContentLookup()

    def replace_match(self, match: re.Match, wc: WebsiteContent):
        url = urlparse(match.group("url"))
        original_text = match[0]
        notes = partial(self.ReplacementNotes, url_path=url.path)

        if url.scheme.startswith("http"):
            return original_text, notes(link_type="global link")

        if not url.path.startswith("/courses"):
            return original_text, notes(link_type="not course link")

        try:
            linked_content = self.content_lookup.find(url.path)
            return original_text, notes(
                link_type="course link",
                links_to_course=linked_content.website.name)
        except KeyError:
            return original_text, notes(link_type="content not found")
Exemple #4
0
def test_content_finder_is_site_specific():
    """Test that ContentLookup is site specific"""
    content_w1 = WebsiteContentFactory.build(
        website=WebsiteFactory.build(uuid="website-uuid-1"),
        dirpath="content/resources/path/to",
        filename="file1",
        text_id="content-uuid-1",
    )
    content_w2 = WebsiteContentFactory.build(
        website=WebsiteFactory.build(uuid="website-uuid-2"),
        dirpath="content/resources/path/to",
        filename="file1",
        text_id="content-uuid-1",
    )

    with patch_website_contents_all([content_w1, content_w2]):
        content_lookup = ContentLookup()

        url = "/resources/path/to/file1"
        assert content_lookup.find_within_site(content_w1.website_id,
                                               url) == content_w1
        assert content_lookup.find_within_site(content_w2.website_id,
                                               url) == content_w2
Exemple #5
0
class BaseurlReplacementRule(RegexpCleanupRule):
    """Replacement rule for use with WebsiteContentMarkdownCleaner. Replaces
    baseurl links with % resource_link % shortcodes.

    This is intentially limited in scope for now. Some baseurl links, such as
    those whose titles are images or include square brackets, are excluded from
    replacement.
    """

    regex = (r"\\?\[(?P<title>[^\[\]\n]*?)\\?\]" +
             r"\({{< baseurl >}}(?P<url>.*?)" + r"(/?(?P<fragment>#.*?))?" +
             r"\)")

    alias = "baseurl"

    def __init__(self) -> None:
        super().__init__()
        self.content_lookup = ContentLookup()

    def replace_match(self, match: re.Match, website_content: WebsiteContent):
        original_text = match[0]
        escaped_title = match.group("title").replace('"', '\\"')
        url = match.group("url")
        fragment = match.group("fragment")

        # This is probably a link with image as title, where the image is a < resource >
        if R"{{<" in match.group("title"):
            return original_text

        try:
            linked_content = self.content_lookup.find_within_site(
                website_content.website_id, url)
            fragment_arg = f' "{fragment}"' if fragment is not None else ""
            return f'{{{{% resource_link {linked_content.text_id} "{escaped_title}"{fragment_arg} %}}}}'
        except KeyError:
            return original_text
Exemple #6
0
 def __init__(self) -> None:
     super().__init__()
     self.content_lookup = ContentLookup()
Exemple #7
0
class ResolveUIDRule(PyparsingRule):
    """
    Find all links in all Websitecontent markdown bodies plus some metadata
    fields and log information about them to a csv.
    """

    alias = "resolveuid"

    Parser = LinkParser

    fields = [
        "markdown",
        # There is like 1 instance of resolveuid occurs in one metadata field.
        # Going to fix that manually to ensure it is a root-relative link
        # and not a shortcode... avoids a conditional here.
    ]

    @dataclass
    class ReplacementNotes:
        is_image: str
        linked_site_name: Union[str, None] = None
        linked_content_uuid: Union[str, None] = None
        note: str = ""

    def __init__(self) -> None:
        super().__init__()
        self.content_lookup = ContentLookup()

    def replace_match(self, s: str, l: int, toks, website_content):
        link: MarkdownLink = toks.link
        original_text = toks.original_text
        notes = partial(self.ReplacementNotes, is_image=link.is_image)

        if not link.destination.startswith("./resolveuid/"):
            return original_text, notes(note="not a resolveuid link")

        try:
            url = urlparse(remove_prefix(link.destination, "./resolveuid/"))
            uuid = UUID(url.path)
        except ValueError as error:
            return original_text, notes(note=str(error))

        try:
            linked_content = self.content_lookup.find_by_uuid(uuid)
        except KeyError as error:
            return original_text, notes(note=str(error))

        notes = notes(
            linked_content_uuid=linked_content.text_id,
            linked_site_name=linked_content.website.name,
        )

        if linked_content.website_id == website_content.website_id:
            if link.is_image:
                shortcode = ShortcodeTag.resource(uuid)
            else:
                shortcode = ShortcodeTag.resource_link(uuid=uuid,
                                                       text=link.text,
                                                       fragment=url.fragment)
            return shortcode.to_hugo(), notes
        else:
            new_link = MarkdownLink(
                text=link.text,
                destination=get_rootrelative_url_from_content(linked_content),
                is_image=link.is_image,
                title=link.
                title,  # should be empty, resolveuid links don't have this.
            )
            return new_link.to_markdown(), notes

    def should_parse(self, text: str):
        """Should the text be parsed?"""
        return "resolveuid" in text
Exemple #8
0
def test_content_finder_raises_keyerror():
    content_lookup = ContentLookup()
    with pytest.raises(KeyError):
        assert content_lookup.find_within_site("website_uuid", "url/to/thing")