예제 #1
0
def test_site_root_directive_href():
    """Test links with $ssServerRelativeSiteRoot directives."""
    result = html_to_markdown(
        '<a href="[!--$ssServerRelativeSiteRoot--]A/Bunch/Of/Things/'
        'ABC123?useSecondary=&#38;showpage=456">'
        "Page 456 of ABC 123</a>")
    assert "[Page 456 of ABC 123](/content/ABC123_456)" in result

    # Test with PDF content.
    md_converter = learning_importer.MHRAMarkdownConverter(
        content_prefix="/content/", asset_prefix="/asset/")
    result = md_converter.convert(
        '<a href="[!--$ssServerRelativeSiteRoot--]Opendocuments/OpenPDFdocuments/'
        'ABC123">ABC 123 document</a>')
    assert "[ABC 123 document](/asset/abc123.pdf)" in result
    assert md_converter.stellent_assets_to_download == set(["ABC123"])
    assert md_converter.assets_with_unknown_type == set()

    # Test with known content redirection.
    result = html_to_markdown('<a href="[!--$ssServerRelativeSiteRoot--]'
                              'Safetyinformation/DrugSafetyUpdate/CON123123">'
                              "Known content redirection</a>")
    assert ("[Known content redirection]"
            "(https://www.gov.uk/drug-safety-update/"
            "addiction-to-benzodiazepines-and-codeine)") in result

    # Test with unknown content.
    md_converter = learning_importer.MHRAMarkdownConverter(
        content_prefix="/content/", asset_prefix="/asset/")
    result = md_converter.convert(
        '<a href="[!--$ssServerRelativeSiteRoot--]A/Bunch/Of/Things/'
        'ABC123">ABC 123 thing</a>')
    assert "[ABC 123 thing](/asset/abc123.unknown)" in result
    assert md_converter.stellent_assets_to_download == set(["ABC123"])
    assert md_converter.assets_with_unknown_type == set(["ABC123"])
예제 #2
0
def test_http_relative_web_root_directive_href():
    """Test links with $HttpRelativeWebRoot directives."""
    md_converter = learning_importer.MHRAMarkdownConverter(
        content_prefix="/content/", asset_prefix="/asset/")
    result = md_converter.convert(
        "<a href='[!--$HttpRelativeWebRoot--]/something/abc123.pdf'>"
        "ABC 123 document</a>")
    assert "[ABC 123 document](/asset/abc123.pdf)" in result
    assert md_converter.stellent_assets_to_download == set(["abc123"])
예제 #3
0
def test_web_layout_url_src():
    """Test image URLs with $ssWeblayoutUrl directives."""
    md_converter = learning_importer.MHRAMarkdownConverter(
        content_prefix="/content/", asset_prefix="/asset/")
    result = md_converter.convert(
        "<img src=\"[!--$ssWeblayoutUrl('ab/cd/abc123.jpg')--]\" "
        "alt='ABC 123' title='Image for ABC 123' />"
        "<img src=\"[!--$ssWeblayoutUrl('ab/cd/abc123.jpg')--]\" />"
        "<img src=\"[!--$ssWeblayoutUrl('ab/cd/xyz789.jpg')--]\" />")
    assert '![ABC 123](/asset/abc123.jpg "Image for ABC 123")' in result
    assert md_converter.stellent_assets_to_download == set(
        ["abc123", "xyz789"])
def html_to_markdown(html):
    """Convert HTML to Markdown."""
    md_converter = learning_importer.MHRAMarkdownConverter(
        content_prefix="/content/", asset_prefix="/asset/"
    )
    return md_converter.convert(html)