def test_site_root_directive_href(): """Test links with $ssServerRelativeSiteRoot directives.""" result = html_to_markdown( '<a href="[!--$ssServerRelativeSiteRoot--]A/Bunch/Of/Things/' 'ABC123?useSecondary=&showpage=456">' "Page 456 of ABC 123</a>") assert "[Page 456 of ABC 123](/content/ABC123_456)" in result # Test with PDF content. md_converter = learning_importer.MHRAMarkdownConverter( content_prefix="/content/", asset_prefix="/asset/") result = md_converter.convert( '<a href="[!--$ssServerRelativeSiteRoot--]Opendocuments/OpenPDFdocuments/' 'ABC123">ABC 123 document</a>') assert "[ABC 123 document](/asset/abc123.pdf)" in result assert md_converter.stellent_assets_to_download == set(["ABC123"]) assert md_converter.assets_with_unknown_type == set() # Test with known content redirection. result = html_to_markdown('<a href="[!--$ssServerRelativeSiteRoot--]' 'Safetyinformation/DrugSafetyUpdate/CON123123">' "Known content redirection</a>") assert ("[Known content redirection]" "(https://www.gov.uk/drug-safety-update/" "addiction-to-benzodiazepines-and-codeine)") in result # Test with unknown content. md_converter = learning_importer.MHRAMarkdownConverter( content_prefix="/content/", asset_prefix="/asset/") result = md_converter.convert( '<a href="[!--$ssServerRelativeSiteRoot--]A/Bunch/Of/Things/' 'ABC123">ABC 123 thing</a>') assert "[ABC 123 thing](/asset/abc123.unknown)" in result assert md_converter.stellent_assets_to_download == set(["ABC123"]) assert md_converter.assets_with_unknown_type == set(["ABC123"])
def test_http_relative_web_root_directive_href(): """Test links with $HttpRelativeWebRoot directives.""" md_converter = learning_importer.MHRAMarkdownConverter( content_prefix="/content/", asset_prefix="/asset/") result = md_converter.convert( "<a href='[!--$HttpRelativeWebRoot--]/something/abc123.pdf'>" "ABC 123 document</a>") assert "[ABC 123 document](/asset/abc123.pdf)" in result assert md_converter.stellent_assets_to_download == set(["abc123"])
def test_web_layout_url_src(): """Test image URLs with $ssWeblayoutUrl directives.""" md_converter = learning_importer.MHRAMarkdownConverter( content_prefix="/content/", asset_prefix="/asset/") result = md_converter.convert( "<img src=\"[!--$ssWeblayoutUrl('ab/cd/abc123.jpg')--]\" " "alt='ABC 123' title='Image for ABC 123' />" "<img src=\"[!--$ssWeblayoutUrl('ab/cd/abc123.jpg')--]\" />" "<img src=\"[!--$ssWeblayoutUrl('ab/cd/xyz789.jpg')--]\" />") assert '![ABC 123](/asset/abc123.jpg "Image for ABC 123")' in result assert md_converter.stellent_assets_to_download == set( ["abc123", "xyz789"])
def html_to_markdown(html): """Convert HTML to Markdown.""" md_converter = learning_importer.MHRAMarkdownConverter( content_prefix="/content/", asset_prefix="/asset/" ) return md_converter.convert(html)