Example #1
0
def test_match_string_in_text():
    text = "hello somethingthere world"
    match = match_names(text, ['foo', 'somethingthere', 'bar'])
    assert len(match) == 1
    assert match[0]['name'] == 'somethingthere'
    assert match[0]['token'].start == 6
    assert match[0]['token'].end == 20
Example #2
0
def test_match_string_in_text():
    text = "hello somethingthere world"
    match = match_names(text, [], ['foo', 'somethingthere', 'bar'])
    assert len(match) == 1
    assert match[0]['name'] == 'somethingthere'
    assert match[0]['token'].start == 6
    assert match[0]['token'].end == 20
Example #3
0
def test_ignore_signature_because_of_mp_name():
    text = ("foo bar baz blah blah blah Domnul VIRGIL GURAN, Deputat PNL "
            "Prahova Obiectul întrebării Modificarea Legii Sinaia foo bar")
    match = match_names(text, ['prahova', 'sinaia'],
                        mp_info={'name': "Guran Virgil",
                                 'county_name': "Prahova"})
    assert [m['name'] for m in match] == ['sinaia']
Example #4
0
def test_match_multiple_words():
    text = "let's match a complicated bit of text"
    match = match_names(text, ["complicated bit"])
    assert [m['name'] for m in match] == ['complicated bit']
    assert match[0]['name'] == "complicated bit"
    assert match[0]['token'].text == "complicated bit"
    assert match[0]['token'].start == 14
    assert match[0]['token'].end == 29
Example #5
0
def test_match_multiple_words():
    text = "let's match a complicated bit of text"
    match = match_names(text, [], ["complicated bit"])
    assert [m['name'] for m in match] == ['complicated bit']
    assert match[0]['name'] == "complicated bit"
    assert match[0]['token'].text == "complicated bit"
    assert match[0]['token'].start == 14
    assert match[0]['token'].end == 29
Example #6
0
def test_ignore_signature_because_of_mp_name():
    text = ("foo bar baz blah blah blah Domnul VIRGIL GURAN, Deputat PNL "
            "Prahova Obiectul întrebării Modificarea Legii Sinaia foo bar")
    match = match_names(text, ['prahova', 'sinaia'], [],
                        mp_info={
                            'name': "Guran Virgil",
                            'county_name': "Prahova"
                        })
    assert [m['name'] for m in match] == ['sinaia']
Example #7
0
def test_match_stemmed_name():
    text = "azi argeșenele se revoltă"
    match = match_names(text, ['Argeș'])
    assert [m['name'] for m in match] == ['Argeș']
Example #8
0
def test_match_words_with_hyphen():
    text = "something fishy at cluj-napoca today"
    match = match_names(text, ["Cluj-Napoca"])
    assert [m['name'] for m in match] == ["Cluj-Napoca"]
    assert match[0]['token'].text == "cluj napoca"
Example #9
0
def test_match_regardless_of_diacritics():
    text = "foo bar brașov campina hello world"
    match = match_names(text, ["brasov", "câmpina"])
    assert [m['name'] for m in match] == ['brasov', 'câmpina']
Example #10
0
def test_match_single_name_per_token():
    text = "hello theer world"
    match = match_names(text, ['there', 'theer'])
    assert [m['name'] for m in match] == ['theer']
Example #11
0
def test_match_stemmed_name():
    text = "azi Argeșenele se revoltă"
    match = match_names(text, ['Argeș'], [])
    assert [m['name'] for m in match] == ['Argeș']
Example #12
0
def test_match_words_with_hyphen():
    text = "something fishy at Cluj-Napoca today"
    match = match_names(text, ["Cluj-Napoca"], [])
    assert [m['name'] for m in match] == ["Cluj-Napoca"]
    assert match[0]['token'].text == "Cluj Napoca"
Example #13
0
def test_match_regardless_of_diacritics():
    text = "foo bar brașov campina hello world"
    match = match_names(text, [], ["brasov", "câmpina"])
    assert [m['name'] for m in match] == ['brasov', 'câmpina']
Example #14
0
def test_match_single_name_per_token():
    text = "hello theer world"
    match = match_names(text, [], ['there', 'theer'])
    assert [m['name'] for m in match] == ['theer']