def test_wa_loader(): ld = WebAnnotatorLoader() tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa1.html')) res = lxml.html.tostring(tree) assert b"<p> __START_ORG__ Scrapinghub __END_ORG__ has an <b>office</b> in __START_CITY__ Montevideo __END_CITY__ </p>" in res, res assert b"wa-" not in res, res assert b"WA-" not in res, res
def test_wa_loader(): ld = WebAnnotatorLoader() tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa1.html')) res = lxml.html.tostring(tree) assert "<p> __START_ORG__ Scrapinghub __END_ORG__ has an <b>office</b> in __START_CITY__ Montevideo __END_CITY__ </p>" in res, res assert "wa-" not in res, res assert "WA-" not in res, res
def test_wa_loader_None_bug(): ld = WebAnnotatorLoader() tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa2.html')) res = lxml.html.tostring(tree) assert b'<em>Inc.</em> __END_ORG__ </p>' in res, res