Exemple #1
0
def test_wa_loader():
    ld = WebAnnotatorLoader()
    tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa1.html'))
    res = lxml.html.tostring(tree)
    assert b"<p> __START_ORG__ Scrapinghub __END_ORG__  has an <b>office</b> in  __START_CITY__ Montevideo __END_CITY__ </p>" in res, res
    assert b"wa-" not in res, res
    assert b"WA-" not in res, res
Exemple #2
0
def test_wa_loader():
    ld = WebAnnotatorLoader()
    tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa1.html'))
    res = lxml.html.tostring(tree)
    assert "<p> __START_ORG__ Scrapinghub __END_ORG__  has an <b>office</b> in  __START_CITY__ Montevideo __END_CITY__ </p>" in res, res
    assert "wa-" not in res, res
    assert "WA-" not in res, res
Exemple #3
0
def test_wa_loader_None_bug():
    ld = WebAnnotatorLoader()
    tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa2.html'))
    res = lxml.html.tostring(tree)
    assert b'<em>Inc.</em> __END_ORG__ </p>' in res, res
Exemple #4
0
def test_wa_loader_None_bug():
    ld = WebAnnotatorLoader()
    tree = ld.load(os.path.join(os.path.dirname(__file__), 'data', 'wa2.html'))
    res = lxml.html.tostring(tree)
    assert b'<em>Inc.</em> __END_ORG__ </p>' in res, res