Ejemplo n.º 1
0
def test_text_extractor():
    cache = {text: "This is some text foo bar nope bar foo"}
    assert solve(text_extractor, cache=cache) == ["foo bar", "bar foo"]
    cache = {text: None}
    assert solve(text_extractor, cache=cache) == []

    assert pickle.loads(pickle.dumps(text_extractor)) == text_extractor
Ejemplo n.º 2
0
def test_dict_values():
    cache = {my_dict: {"foo": 1, "bar": 2}}
    assert set(solve(my_values, cache=cache)) == {1, 2}
    cache = {my_dict: None}
    assert set(solve(my_values, cache=cache)) == set()

    assert pickle.loads(pickle.dumps(my_values)) == my_values
Ejemplo n.º 3
0
def test_dict_keys():
    cache = {my_dict: {"foo": 1, "bar": 2}}
    assert set(solve(my_keys, cache=cache)) == {"foo", "bar"}
    cache = {my_dict: None}
    assert set(solve(my_keys, cache=cache)) == set()

    assert pickle.loads(pickle.dumps(my_keys)) == my_keys
Ejemplo n.º 4
0
def test_badges_diff():
    cache = {revision_entity_doc: ALAN_TURING,
             parent_entity_doc: ALAN_TURING_OLD}

    badges_diff = solve(diff.datasources.badges_diff, cache=cache)
    assert badges_diff.added == {
        'lawiki',
        'aswiki',
        'enwiki',
        'ruwiki',
        'azwiki'}
    assert badges_diff.removed == set()
    assert badges_diff.intersection == set()
    assert badges_diff.changed == set()
    assert badges_diff.unchanged == set()

    assert (pickle.loads(pickle.dumps(diff.datasources.badges_diff)) ==
            diff.datasources.badges_diff)

    assert solve(diff.badges_added, cache=cache) == 5
    assert solve(diff.badges_removed, cache=cache) == 0
    assert solve(diff.badges_changed, cache=cache) == 0

    assert pickle.loads(pickle.dumps(diff.badges_added)) == diff.badges_added
    assert pickle.loads(pickle.dumps(diff.badges_removed)
                        ) == diff.badges_removed
    assert pickle.loads(pickle.dumps(diff.badges_changed)
                        ) == diff.badges_changed
Ejemplo n.º 5
0
def test_content():

    cache = {r_text: "This is some text.\n" +
             "== A heading! ==\n" +
             "{{Foo}} the [[bar]]!"}
    assert (solve(revision.datasources.content, cache=cache) ==
            "This is some text.\n" +
            " A heading! \n" +
            " the bar!")

    assert solve(revision.content_chars, cache=cache) == 41

    assert (pickle.loads(pickle.dumps(revision.content_chars)) ==
            revision.content_chars)

    test_data = """
        Playing games has always been thought to be
        important to the development of well-balanced and creative children;
        however, what part, if any, they should play in the lives of adults
        has never been researched that deeply. I believe that playing games
        is every bit as important for adults as for children. Not only is
        taking time out to play games with our children and other adults
        valuable to building interpersonal relationships but is also a
        wonderful way to release built up tension."""
    cache = {r_text: test_data}
    assert solve(revision.flesh_kincaid, cache=cache) == 52.23
    assert (pickle.loads(pickle.dumps(revision.flesh_kincaid)) ==
            revision.flesh_kincaid)
Ejemplo n.º 6
0
def check_feature(feature, expected):

    assert isinstance(expected, feature.returns)

    assert hash(pickle.loads(pickle.dumps(feature))) == hash(feature)

    assert solve(feature) == expected
    assert solve(pickle.loads(pickle.dumps(feature))) == expected
Ejemplo n.º 7
0
def test_feature():
    f = Feature("f")

    assert pickle.loads(pickle.dumps(f)) == f
    assert solve(f, cache={f: 5}) == 5
    assert solve(f, cache={"feature.f": 5}) == 5

    check_feature(five, 5)
Ejemplo n.º 8
0
def test_uppercase_words():
    assert (solve(revision.datasources.uppercase_words, cache={r_text: text}) ==
            ['TNT'])
    assert (pickle.loads(pickle.dumps(revision.uppercase_words)) ==
            revision.uppercase_words)
    assert (solve(revision.datasources.uppercase_word_frequency,
                  cache={r_text: text}) ==
            {'TNT': 1})
Ejemplo n.º 9
0
def test_punctuations():
    assert (solve(revision.datasources.punctuations, cache={r_text: text}) ==
            ['.', '.', ':', '。', '.', '?', '.'])
    assert (pickle.loads(pickle.dumps(revision.punctuations)) ==
            revision.punctuations)
    assert (solve(revision.datasources.punctuation_frequency,
                  cache={r_text: text}) ==
            {'.': 4, ':': 1, '?': 1, '。': 1})
Ejemplo n.º 10
0
def test_item_in_set():
    is_a_sysop = bools.item_in_set('sysop', my_set)

    assert solve(is_a_sysop, cache={my_set: {'foo', 'bar'}}) is False
    assert solve(is_a_sysop, cache={my_set: {'foo', 'sysop'}}) is True
    assert solve(is_a_sysop, cache={my_set: None}) is False

    assert pickle.loads(pickle.dumps(is_a_sysop)) == is_a_sysop
Ejemplo n.º 11
0
def test_set_contains_item():
    is_me = bools.set_contains_item({6877667}, my_item)

    assert solve(is_me, cache={my_item: 999}) is False
    assert solve(is_me, cache={my_item: 6877667}) is True
    assert solve(is_me, cache={my_item: None}) is False

    assert pickle.loads(pickle.dumps(is_me)) == is_me
Ejemplo n.º 12
0
def test_sets_intersect():
    has_small_odd = bools.sets_intersect({1, 2, 3, 5, 7, 9, 11, 13}, my_set)

    assert solve(has_small_odd, cache={my_set: {4, 18, 10}}) is False
    assert solve(has_small_odd, cache={my_set: {20, 10, 3, 5, 1}}) is True
    assert solve(has_small_odd, cache={my_set: None}) is False

    assert pickle.loads(pickle.dumps(has_small_odd)) == has_small_odd
Ejemplo n.º 13
0
def test_has_property_value():
    assert solve(has_p106_q82594, cache={r_text: ALAN_TEXT})
    assert not solve(has_p106_test, cache={r_text: ALAN_TEXT})
    assert not solve(has_p999_foo, cache={r_text: ALAN_TEXT})

    assert pickle.loads(pickle.dumps(has_p106_q82594)) == has_p106_q82594
    assert pickle.loads(pickle.dumps(has_p106_test)) == has_p106_test
    assert pickle.loads(pickle.dumps(has_p999_foo)) == has_p999_foo
Ejemplo n.º 14
0
def test_log():
    log_five = modifiers.log(5)

    assert solve(log_five) == math_log(5)

    assert solve(pickle.loads(pickle.dumps(log_five))) == math_log(5)

    assert repr(log_five) == "<feature.log(5)>"
Ejemplo n.º 15
0
def test_name_namespace_name_matches():
    starts_with_t = revision.page.namespace.name_matches(r"^t")
    assert (solve(starts_with_t,
                  cache={revision.datasources.page.namespace.name: "This"}) is
            True)
    assert (solve(starts_with_t,
                  cache={revision.datasources.page.namespace.name: "Foo"}) is
            False)
Ejemplo n.º 16
0
def test_page_namespace_id_in_set():
    wikipedia_namespace = revision.page.namespace.id_in_set({4, 5})
    assert (solve(wikipedia_namespace,
                  cache={revision.datasources.page.namespace.id: 5}) is
            True)
    assert (solve(wikipedia_namespace,
                  cache={revision.datasources.page.namespace.id: 0}) is
            False)
Ejemplo n.º 17
0
def test_stopwords():
    cache = {r_text: "আন চলচ্চিত্র."}
    assert (solve(bengali.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["আন"])
    assert (solve(bengali.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ['চলচ্চিত্র'])

    assert bengali.stopwords == pickle.loads(pickle.dumps(bengali.stopwords))
Ejemplo n.º 18
0
def test_dictionary():
    cache = {r_text: "diferència dels animals worngly"}
    assert (solve(catalan.dictionary.revision.datasources.dict_words,
            cache=cache) == ['diferència', 'dels', 'animals'])
    assert (solve(catalan.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ['worngly'])

    assert catalan.dictionary == pickle.loads(pickle.dumps(catalan.dictionary))
Ejemplo n.º 19
0
def test_stopwords():
    cache = {revision_oriented.revision.text: 'novirze būt vidējiem'}
    assert (solve(latvian.stopwords.revision.datasources.stopwords, cache=cache) ==
            ['būt'])
    assert (solve(latvian.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ['novirze', 'vidējiem'])

    assert latvian.stopwords == pickle.loads(pickle.dumps(latvian.stopwords))
Ejemplo n.º 20
0
def test_stopwords():
    cache = {r_text: 'Αυτό είναι γραμμένο λθος. <td>'}
    assert (solve(greek.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["Αυτό", "είναι"])
    assert (solve(greek.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["γραμμένο", "λθος"])

    assert greek.stopwords == pickle.loads(pickle.dumps(greek.stopwords))
Ejemplo n.º 21
0
def test_dictionary():
    cache = {r_text: "Hinzu kamen rund sechs m80 Personen."}
    assert (solve(german.dictionary.revision.datasources.dict_words, cache=cache) ==
            ["Hinzu", "kamen", "rund", "sechs", "Personen"])
    assert (solve(german.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["m80"])

    assert german.dictionary == pickle.loads(pickle.dumps(german.dictionary))
Ejemplo n.º 22
0
def test_stopwords():
    cache = {r_text: "im Lager oder in der Verbannung."}
    assert (solve(german.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["im", "oder", "in", "der"])
    assert (solve(german.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["Lager", "Verbannung"])

    assert german.stopwords == pickle.loads(pickle.dumps(german.stopwords))
Ejemplo n.º 23
0
def test_max():

    max_five_six_seven = modifiers.max(5, 6, 7)

    assert solve(max_five_six_seven) == 7

    assert solve(pickle.loads(pickle.dumps(max_five_six_seven))) == 7

    assert repr(max_five_six_seven) == "<feature.max(5, 6, 7)>"
Ejemplo n.º 24
0
def test_stopwords():
    cache = {revision_oriented.revision.text: 'być barwnikowych pomocniczą'}
    assert (solve(polish.stopwords.revision.datasources.stopwords, cache=cache) ==
            ['być'])
    assert (solve(polish.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ['barwnikowych', 'pomocniczą'])

    assert polish.stopwords == pickle.loads(pickle.dumps(polish.stopwords))
Ejemplo n.º 25
0
def test_stopwords():
    cache = {r_text: 'Türkiye\'deki en üst seviye futbol ligi.'}
    assert (solve(turkish.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["en"])
    assert (solve(turkish.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["Türkiye'deki", 'üst', 'seviye', 'futbol', 'ligi'])

    assert turkish.stopwords == pickle.loads(pickle.dumps(turkish.stopwords))
Ejemplo n.º 26
0
def test_tokens_matching():
    cache = {p_text: "This is not 55 a sring.",
             r_text: "This is too 56 a tring."}
    assert (solve(revision.diff.datasources.tokens_added_matching("^t"),
                  cache=cache) ==
            ['too', 'tring'])
    assert (solve(revision.diff.datasources.tokens_removed_matching("^(5|s)"),
                  cache=cache) ==
            ['55', 'sring'])
Ejemplo n.º 27
0
def test_dictionary():
    cache = {r_text: 'Αυτό είναι γραμμένο λθος. <td>'}
    assert (solve(greek.dictionary.revision.datasources.dict_words, cache=cache) ==
            ["Αυτό", "είναι", "γραμμένο"])
    assert (solve(greek.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["λθος"])

    assert greek.dictionary == pickle.loads(pickle.dumps(greek.dictionary))
Ejemplo n.º 28
0
def test_stopwords():
    cache = {r_text: "hercegovine jakiel kroz postojanje."}
    assert (solve(bosnian.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["hercegovine", "jakiel", "kroz"])
    assert (solve(bosnian.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["postojanje"])

    assert bosnian.stopwords == pickle.loads(pickle.dumps(bosnian.stopwords))
Ejemplo n.º 29
0
def test_stopwords():
    cache = {revision_oriented.revision.text: 'التي لم تكن معروفة'}
    assert (solve(arabic.stopwords.revision.datasources.stopwords,
            cache=cache) == ['التي', 'لم'])
    assert (solve(arabic.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ['تكن', 'معروفة'])

    assert arabic.stopwords == pickle.loads(pickle.dumps(arabic.stopwords))
Ejemplo n.º 30
0
def test_dictionary():
    cache = {revision_oriented.revision.text: 'التي لم تكن معروفة  worngly.'}
    assert (solve(arabic.dictionary.revision.datasources.dict_words, cache=cache) ==
            ["التي", "لم", "تكن", "معروفة"])
    assert (solve(arabic.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["worngly"])

    assert arabic.dictionary == pickle.loads(pickle.dumps(arabic.dictionary))
Ejemplo n.º 31
0
def test_entity_doc():
    solve(revision.datasources.entity_doc, cache={r_text: ALAN_TEXT})
    assert solve(revision.datasources.entity_doc, cache={r_text: None}) is None

    assert (pickle.loads(pickle.dumps(
        revision.datasources.entity_doc)) == revision.datasources.entity_doc)
Ejemplo n.º 32
0
def test_entity():
    assert solve(revision.datasources.entity, cache={
        r_text: None
    }).properties == {}

    solve(revision.datasources.entity, cache={r_text: ALAN_TEXT})

    assert (pickle.loads(pickle.dumps(
        revision.datasources.entity)) == revision.datasources.entity)

    assert solve(revision.properties, cache={r_text: ALAN_TEXT}) == 57
    assert (solve(revision.datasources.properties, cache={
        r_text: ALAN_TEXT
    }).keys() == {
        'P1430', 'P906', 'P1816', 'P570', 'P31', 'P1343', 'P2021', 'P535',
        'P800', 'P569', 'P373', 'P1819', 'P108', 'P227', 'P185', 'P910',
        'P1273', 'P69', 'P244', 'P20', 'P101', 'P106', 'P18', 'P1563', 'P25',
        'P646', 'P1296', 'P214', 'P950', 'P463', 'P1006', 'P268', 'P21',
        'P1417', 'P22', 'P1207', 'P19', 'P91', 'P735', 'P1412', 'P166', 'P269',
        'P1741', 'P1196', 'P27', 'P140', 'P512', 'P1415', 'P691', 'P345',
        'P949', 'P1263', 'P549', 'P184', 'P935', 'P349', 'P213'
    })

    assert solve(revision.claims, cache={r_text: ALAN_TEXT}) == 71
    assert (solve(revision.datasources.claims, cache={r_text: ALAN_TEXT}) == {
        ('P1430', "'368'"), ('P1412', 'Q1860'), ('P691', "'jn19990008646'"),
        ('P935', "'Alan Turing'"), ('P25', 'Q20895935'),
        ('P1263', "'952/000023883'"), ('P166', 'Q15631401'),
        ('P949', "'000133188'"), ('P569', '+1912-06-23T00:00:00Z'),
        ('P512', 'Q230899'), ('P166', 'Q10762848'), ('P1207', "'n98045497'"),
        ('P1816', "'mp18700'"), ('P373', "'Alan Turing'"), ('P69', 'Q924289'),
        ('P950', "'XX945020'"), ('P244', "'n83171546'"), ('P800', 'Q20895949'),
        ('P1296', "'0067958'"), ('P106', 'Q82594'), ('P800', 'Q772056'),
        ('P800', 'Q20895966'), ('P1006', "'070580685'"), ('P101', 'Q897511'),
        ('P140', 'Q7066'), ('P213', "'0000 0001 1058 9902'"),
        ('P1819', "'I00586443'"), ('P19', 'Q20895942'),
        ('P269', "'030691621'"), ('P108', 'Q220798'), ('P22', 'Q20895930'),
        ('P2021', '5 (5-5) 1'), ('P185', 'Q249984'), ('P106', 'Q81096'),
        ('P549', "'8014'"), ('P1343', 'Q2627728'), ('P1741', "'226316'"),
        ('P268', "'12205670t'"), ('P1563', "'Turing'"), ('P106', 'Q11513337'),
        ('P570', '+1954-06-07T00:00:00Z'), ('P512', 'Q21578'),
        ('P69', 'Q2278254'), ('P31', 'Q5'), ('P227', "'118802976'"),
        ('P1196', 'Q10737'), ('P108', 'Q230899'), ('P21', 'Q6581097'),
        ('P1417', "'609739'"), ('P1343', 'Q17329836'), ('P349', "'00621580'"),
        ('P535', "'12651680'"), ('P463', 'Q123885'), ('P101', 'Q21198'),
        ('P91', 'Q6636'), ('P345', "'nm6290133'"), ('P735', 'Q294833'),
        ('P214', "'41887917'"), ('P906', "'254262'"), ('P910', 'Q9384007'),
        ('P27', 'Q145'), ('P106', 'Q170790'), ('P184', 'Q92741'),
        ('P646', "'/m/0n00'"), ('P18', "'Alan Turing Aged 16.jpg'"),
        ('P19', 'Q122744'), ('P106', 'Q4964182'), ('P69', 'Q21578'),
        ('P20', 'Q2011497'), ('P1273', "'a11455408'"), ('P1415', "'101036578'")
    })
    assert solve(revision.aliases, cache={r_text: ALAN_TEXT}) == 9
    assert (solve(revision.datasources.aliases, cache={r_text: ALAN_TEXT}) == {
        'de': ['Alan Mathison Turing'],
        'en': ['Alan Mathison Turing'],
        'fr': ['Alan Mathison Turing'],
        'ru': ['Тьюринг, Алан'],
        'jbo': ['alan turin'],
        'it': ['Alan Mathison Turing'],
        'ko': ['앨런 매티슨 튜링'],
        'be-tarask': ["Элан Т'юрынг", 'Алан Цюрынг', "Т'юрынг"],
        'ja': ['アラン・テューリング']
    })
    assert solve(revision.sources, cache={r_text: ALAN_TEXT}) == 56
    assert (solve(revision.datasources.sources, cache={r_text: ALAN_TEXT}) == {
        ('P108', 'Q220798', 'P248', 'Q20895922'),
        ('P106', 'Q4964182', 'P143', 'Q48952'),
        ('P69', 'Q924289', 'P248', 'Q20895922'),
        ('P570', '+1954-06-07T00:00:00Z', 'P143', 'Q206855'),
        ('P31', 'Q5', 'P248', 'Q20666306'),
        ('P800', 'Q20895949', 'P248', 'Q20895922'),
        ('P25', 'Q20895935', 'P248', 'Q20895922'),
        ('P349', "'00621580'", 'P143', 'Q48183'),
        ('P549', "'8014'", 'P143', 'Q328'),
        ('P569', '+1912-06-23T00:00:00Z', 'P854',
         "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
        ('P18', "'Alan Turing Aged 16.jpg'", 'P143', 'Q11920'),
        ('P569', '+1912-06-23T00:00:00Z', 'P248', 'Q20666306'),
        ('P19', 'Q122744', 'P143', 'Q328'),
        ('P214', "'41887917'", 'P143', 'Q8447'),
        ('P569', '+1912-06-23T00:00:00Z', 'P143', 'Q328'),
        ('P569', '+1912-06-23T00:00:00Z', 'P345', "'nm6290133'"),
        ('P91', 'Q6636', 'P248', 'Q20895922'),
        ('P1273', "'a11455408'", 'P854', "'https://viaf.org/viaf/41887917/'"),
        ('P1412', 'Q1860', 'P143', 'Q20666306'),
        ('P69', 'Q2278254', 'P248', 'Q20895922'),
        ('P31', 'Q5', 'P813', '+2015-10-10T00:00:00Z'),
        ('P512', 'Q230899', 'P143', 'Q8447'),
        ('P512', 'Q21578', 'P143', 'Q8447'),
        ('P1412', 'Q1860', 'P813', '+2015-10-10T00:00:00Z'),
        ('P1412', 'Q1860', 'P854',
         "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
        ('P535', "'12651680'", 'P143', 'Q328'),
        ('P19', 'Q20895942', 'P248', 'Q20895922'),
        ('P227', "'118802976'", 'P143', 'Q1419226'),
        ('P570', '+1954-06-07T00:00:00Z', 'P248', 'Q20666306'),
        ('P21', 'Q6581097', 'P143', 'Q54919'),
        ('P800', 'Q20895966', 'P248', 'Q20895922'),
        ('P935', "'Alan Turing'", 'P143', 'Q191168'),
        ('P31', 'Q5', 'P143', 'Q206855'),
        ('P21', 'Q6581097', 'P248', 'Q36578'),
        ('P646', "'/m/0n00'", 'P248', 'Q15241312'),
        ('P646', "'/m/0n00'", 'P577', '+2013-10-28T00:00:00Z'),
        ('P1563', "'Turing'", 'P143', 'Q11921'),
        ('P19', 'Q122744', 'P854',
         "'http://www.telegraph.co.uk/technology/news/9314910/Britain-still-owes-Alan-Turing-a-debt.html'"
         ), ('P269', "'030691621'", 'P143', 'Q8447'),
        ('P108', 'Q230899', 'P248', 'Q20895922'),
        ('P22', 'Q20895930', 'P248', 'Q20895922'),
        ('P906', "'254262'", 'P143', 'Q877583'),
        ('P21', 'Q6581097', 'P813', '+2014-04-09T00:00:00Z'),
        ('P244', "'n83171546'", 'P143', 'Q328'),
        ('P570', '+1954-06-07T00:00:00Z', 'P854',
         "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
        ('P570', '+1954-06-07T00:00:00Z', 'P813', '+2015-10-10T00:00:00Z'),
        ('P106', 'Q82594', 'P143', 'Q328'),
        ('P570', '+1954-06-07T00:00:00Z', 'P345', "'nm6290133'"),
        ('P213', "'0000 0001 1058 9902'", 'P143', 'Q423048'),
        ('P31', 'Q5', 'P854', "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
        ('P569', '+1912-06-23T00:00:00Z', 'P813', '+2015-10-10T00:00:00Z'),
        ('P1196', 'Q10737', 'P248', 'Q20895922'),
        ('P268', "'12205670t'", 'P143', 'Q8447'),
        ('P800', 'Q772056', 'P248', 'Q20895922'),
        ('P20', 'Q2011497', 'P248', 'Q20895922'),
        ('P27', 'Q145', 'P143', 'Q48183')
    })
    assert solve(revision.qualifiers, cache={r_text: ALAN_TEXT}) == 6
    assert (solve(revision.datasources.qualifiers, cache={
        r_text: ALAN_TEXT
    }) == {
        ('P1343', 'Q17329836', 'P854', 0,
         "'http://www.larousse.fr/encyclopedie/personnage/Alan_Mathison_Turing/147690'"
         ), ('P108', 'Q220798', 'P580', 0, '+1938-00-00T00:00:00Z'),
        ('P1343', 'Q2627728', 'P854', 0,
         "'http://krugosvet.ru/enc/gumanitarnye_nauki/lingvistika/TYURING_ALAN_MATISON.html'"
         ), ('P108', 'Q220798', 'P582', 0, '+1945-00-00T00:00:00Z'),
        ('P108', 'Q230899', 'P580', 0, '+1948-03-00T00:00:00Z'),
        ('P69', 'Q2278254', 'P580', 0, '+1926-00-00T00:00:00Z')
    })

    assert solve(revision.badges, cache={r_text: ALAN_TEXT}) == 5
    assert (solve(revision.datasources.badges, cache={r_text: ALAN_TEXT}) == {
        'aswiki': ['Q17437798'],
        'ruwiki': ['Q17437798'],
        'azwiki': ['Q17437796'],
        'lawiki': ['Q17437796'],
        'enwiki': ['Q17437798']
    })
    assert solve(revision.labels, cache={r_text: ALAN_TEXT}) == 126
    assert (solve(revision.datasources.labels, cache={r_text: ALAN_TEXT}) == {
        'th': 'แอลัน ทัวริง',
        'is': 'Alan Turing',
        'ku': 'Alan Turing',
        'sgs': 'Alans Tiorėngs',
        'ar': 'آلان تورنج',
        'kk': 'Алан Тьюринг',
        'yue': '圖靈',
        'ta': 'அலன் டூரிங்',
        'cs': 'Alan Turing',
        'li': 'Alan Turing',
        'bn': 'অ্যালান টুরিং',
        'sl': 'Alan Turing',
        'gsw': 'Alan Turing',
        'sv': 'Alan Turing',
        'hif': 'Alan Turing',
        'en-gb': 'Alan Turing',
        'en': 'Alan Turing',
        'az': 'Alan Türinq',
        'ja': 'アラン・チューリング',
        'oc': 'Alan Turing',
        'pt-br': 'Alan Turing',
        'da': 'Alan Turing',
        'ca': 'Alan Turing',
        'eo': 'Alan TURING',
        'el': 'Άλαν Τούρινγκ',
        'yi': 'עלן טיורינג',
        'nan': 'Alan Turing',
        'sh': 'Alan Turing',
        'as': 'এলান ট্যুৰিং',
        'hy': 'Ալան Թյուրինգ',
        'fa': 'آلن تورینگ',
        'en-ca': 'Alan Turing',
        'tr': 'Alan Turing',
        'mn': 'Алан Матисон Тюринг',
        'he': 'אלן טיורינג',
        'scn': 'Alan Turing',
        'vo': 'Alan Turing',
        'yo': 'Alan Turing',
        'et': 'Alan Turing',
        'ur': 'ایلن تورنگ',
        'fo': 'Alan Turing',
        'io': 'Alan Turing',
        'ilo': 'Alan Turing',
        'ru': 'Алан Тьюринг',
        'gl': 'Alan Turing',
        'war': 'Alan Turing',
        'kn': 'ಅಲೆನ್ ಟ್ಯೂರಿಂಗ್',
        'uz': 'Tyuring',
        'de': 'Alan Turing',
        'zh-cn': '艾伦·图灵',
        'la': 'Alanus Mathison Turing',
        'sk': 'Alan Mathison Turing',
        'mk': 'Алан Тјуринг',
        'hr': 'Alan Turing',
        'uk': 'Алан Тюрінг',
        'pl': 'Alan Turing',
        'ro': 'Alan Turing',
        'nl': 'Alan Turing',
        'nb': 'Alan Turing',
        'br': 'Alan Turing',
        'fr': 'Alan Turing',
        'mt': 'Alan Turing',
        'it': 'Alan Turing',
        'ce': 'Тьюринг, Алан',
        'te': 'అలాన్ ట్యూరింగ్\u200c',
        'fi': 'Alan Turing',
        'pa': 'ਅਲਾਨ ਟੂਰਿੰਗ',
        'nn': 'Alan Turing',
        'zh-hans': '艾伦·图灵',
        'af': 'Alan Turing',
        'be': 'Алан Матысан Цьюрынг',
        'ga': 'Alan Turing',
        'ckb': 'ئالان تیورینگ',
        'es': 'Alan Turing',
        'arz': 'الان تورينج',
        'new': 'एलेन त्युरिङ्ग',
        'tt': 'Alan Tyuring',
        'ht': 'Alan Turing',
        'cy': 'Alan Turing',
        'mwl': 'Alan Turing',
        'or': 'ଆଲାନ ଟ୍ୟୁରିଙ୍ଗ',
        'jbo': '.alan turin',
        'ml': 'അലൻ ട്യൂറിംഗ്',
        'sa': 'एलेन ट्यूरिंग',
        'bs': 'Alan Turing',
        'tg': 'Алан Тюринг',
        'ms': 'Alan Turing',
        'lv': 'Alans Tjūrings',
        'fur': 'Alan Turing',
        'sco': 'Alan Turing',
        'sah': 'Алан Матисон Тьюринг',
        'lmo': 'Alan Turing',
        'mr': 'ॲलन ट्युरिंग',
        'pnb': 'الان ٹورنگ',
        'eu': 'Alan Turing',
        'zh': '艾伦·图灵',
        'de-ch': 'Alan Turing',
        'gu': 'ઍલન ટ્યુરિંગ',
        'gan': '圖靈',
        'sw': 'Alan Turing',
        'mg': 'Alan Turing',
        'be-tarask': 'Элан Т’юрынг',
        'hu': 'Alan Turing',
        'lij': 'Alan Turing',
        'an': 'Alan Turing',
        'pt': 'Alan Turing',
        'pms': 'Alan Turing',
        'gd': 'Alan Turing',
        'lt': 'Alan Turing',
        'jv': 'Alan Turing',
        'fy': 'Alan Turing',
        'sq': 'Alan Turing',
        'ka': 'ალან ტიურინგი',
        'vi': 'Alan Turing',
        'sr': 'Алан Тјуринг',
        'pam': 'Alan Turing',
        'ast': 'Alan Turing',
        'co': 'Alanu Turing',
        'ko': '앨런 튜링',
        'tl': 'Alan Turing',
        'rue': 'Алан Тюрінґ',
        'lb': 'Alan M. Turing',
        'id': 'Alan Turing',
        'bg': 'Алън Тюринг',
        'ba': 'Алан Тьюринг',
        'hi': 'एलेन ट्यूरिंग'
    })
    assert solve(revision.sitelinks, cache={r_text: ALAN_TEXT}) == 134
    assert (solve(revision.datasources.sitelinks,
                  cache={r_text: ALAN_TEXT}) == {
                      'mrwiki': 'ॲलन ट्युरिंग',
                      'warwiki': 'Alan Turing',
                      'mkwiki': 'Алан Тјуринг',
                      'bawiki': 'Алан Тьюринг',
                      'mnwiki': 'Алан Матисон Тюринг',
                      'mgwiki': 'Alan Turing',
                      'tawiki': 'அலன் டூரிங்',
                      'yowiki': 'Alan Turing',
                      'ttwiki': 'Alan Tyuring',
                      'ruewiki': 'Алан Тюрінґ',
                      'gdwiki': 'Alan Turing',
                      'liwiki': 'Alan Turing',
                      'pamwiki': 'Alan Turing',
                      'scnwiki': 'Alan Turing',
                      'scowiki': 'Alan Turing',
                      'fowiki': 'Alan Turing',
                      'fywiki': 'Alan Turing',
                      'bnwiki': 'অ্যালান টুরিং',
                      'jbowiki': '.alan turin',
                      'guwiki': 'ઍલન ટ્યુરિંગ',
                      'knwiki': 'ಅಲೆನ್ ಟ್ಯೂರಿಂಗ್',
                      'dewiki': 'Alan Turing',
                      'be_x_oldwiki': 'Элан Т’юрынг',
                      'eswiki': 'Alan Turing',
                      'hrwiki': 'Alan Turing',
                      'mwlwiki': 'Alan Turing',
                      'afwiki': 'Alan Turing',
                      'sqwiki': 'Alan Turing',
                      'mtwiki': 'Alan Turing',
                      'cawiki': 'Alan Turing',
                      'zh_min_nanwiki': 'Alan Turing',
                      'trwiki': 'Alan Turing',
                      'hiwiki': 'एलेन ट्यूरिंग',
                      'nlwiki': 'Alan Turing',
                      'cswikiquote': 'Alan Turing',
                      'azwiki': 'Alan Türinq',
                      'kkwiki': 'Алан Тьюринг',
                      'plwikiquote': 'Alan Turing',
                      'hywiki': 'Ալան Թյուրինգ',
                      'cewiki': 'Тьюринг, Алан',
                      'nnwiki': 'Alan Turing',
                      'ruwikiquote': 'Алан Матисон Тьюринг',
                      'tgwiki': 'Алан Тюринг',
                      'commonswiki': 'Alan Turing',
                      'lawiki': 'Alanus Mathison Turing',
                      'itwiki': 'Alan Turing',
                      'eowiki': 'Alan Turing',
                      'dawiki': 'Alan Turing',
                      'kowiki': '앨런 튜링',
                      'bewiki': 'Алан Матысан Цьюрынг',
                      'rowiki': 'Alan Turing',
                      'ocwiki': 'Alan Turing',
                      'newwiki': 'एलेन त्युरिङ्ग',
                      'lbwiki': 'Alan M. Turing',
                      'pawiki': 'ਅਲਾਨ ਟੂਰਿੰਗ',
                      'enwikiquote': 'Alan Turing',
                      'hifwiki': 'Alan Turing',
                      'mlwiki': 'അലൻ ട്യൂറിംഗ്',
                      'jawiki': 'アラン・チューリング',
                      'viwiki': 'Alan Turing',
                      'htwiki': 'Alan Turing',
                      'furwiki': 'Alan Turing',
                      'zhwikiquote': '艾伦·图灵',
                      'lijwiki': 'Alan Turing',
                      'plwiki': 'Alan Turing',
                      'vowiki': 'Alan Turing',
                      'bswiki': 'Alan Turing',
                      'tewiki': 'అలాన్ ట్యూరింగ్\u200c',
                      'sawiki': 'एलेन ट्यूरिंग',
                      'ptwiki': 'Alan Turing',
                      'urwiki': 'ایلن تورنگ',
                      'arwiki': 'آلان تورنج',
                      'iswiki': 'Alan Turing',
                      'huwiki': 'Alan Turing',
                      'tlwiki': 'Alan Turing',
                      'uzwiki': 'Alan Tyuring',
                      'frwikiquote': 'Alan Turing',
                      'zh_yuewiki': '圖靈',
                      'pnbwiki': 'الان ٹورنگ',
                      'dewikiquote': 'Alan Turing',
                      'swwiki': 'Alan Turing',
                      'itwikiquote': 'Alan Turing',
                      'lvwiki': 'Alans Tjūrings',
                      'anwiki': 'Alan Turing',
                      'aswiki': 'এলান ট্যুৰিং',
                      'arzwiki': 'الان تورينج',
                      'srwiki': 'Алан Тјуринг',
                      'eswikiquote': 'Alan Mathison Turing',
                      'elwiki': 'Άλαν Τούρινγκ',
                      'frwiki': 'Alan Turing',
                      'brwiki': 'Alan Turing',
                      'fiwiki': 'Alan Turing',
                      'fawiki': 'آلن تورینگ',
                      'ilowiki': 'Alan Turing',
                      'cswiki': 'Alan Turing',
                      'kawiki': 'ალან ტიურინგი',
                      'yiwiki': 'עלן טיורינג',
                      'gawiki': 'Alan Turing',
                      'skwiki': 'Alan Turing',
                      'shwiki': 'Alan Turing',
                      'sahwiki': 'Тьюринг Алан Матисон',
                      'ukwiki': 'Алан Тюрінг',
                      'bat_smgwiki': 'Alans Tiorėngs',
                      'hewiki': 'אלן טיורינג',
                      'enwiki': 'Alan Turing',
                      'bgwiki': 'Алън Тюринг',
                      'svwiki': 'Alan Turing',
                      'orwiki': 'ଆଲାନ ଟ୍ୟୁରିଙ୍ଗ',
                      'lmowiki': 'Alan Turing',
                      'glwiki': 'Alan Turing',
                      'mswiki': 'Alan Turing',
                      'zhwiki': '艾伦·图灵',
                      'alswiki': 'Alan Turing',
                      'etwiki': 'Alan Turing',
                      'jvwiki': 'Alan Turing',
                      'hewikiquote': 'אלן טיורינג',
                      'astwiki': 'Alan Turing',
                      'kuwiki': 'Alan Turing',
                      'cywikiquote': 'Alan Turing',
                      'idwiki': 'Alan Turing',
                      'thwiki': 'แอลัน ทัวริง',
                      'pmswiki': 'Alan Turing',
                      'ruwiki': 'Тьюринг, Алан',
                      'iowiki': 'Alan Turing',
                      'nowiki': 'Alan Turing',
                      'cywiki': 'Alan Turing',
                      'euwiki': 'Alan Turing',
                      'ltwiki': 'Alan Turing',
                      'cawikiquote': 'Alan Turing',
                      'simplewiki': 'Alan Turing',
                      'cowiki': 'Alanu Turing',
                      'ganwiki': '圖靈',
                      'ckbwiki': 'ئالان تیورینگ',
                      'slwiki': 'Alan Turing'
                  })
    assert solve(revision.descriptions, cache={r_text: ALAN_TEXT}) == 22
    assert (solve(
        revision.datasources.descriptions, cache={r_text: ALAN_TEXT}) == {
            'da':
            'britisk informatiker, matematiker og ingeniør',
            'ko':
            '영국의 수학자, 논리학자, 암호해독학자, 컴퓨터 과학자',
            'it':
            'matematico, logico e crittografo britannico',
            'fr':
            'mathématicien britannique',
            'nn':
            'britisk informatikar, matematikar og ingeniør',
            'gl':
            'matemático, filósofo e criptógrafo británico',
            'pam':
            'Computer scientist, mathematician, and cryptographer',
            'nl':
            'Brits wiskundige',
            'de':
            'britischer Logiker, Mathematiker und Kryptoanalytiker',
            'zh-cn':
            '英国数学家,逻辑学家,密码学家和计算机科学家',
            'en':
            'British mathematician, logician, cryptanalyst, and computer ' +
            'scientist',
            'as':
            'Computer scientist, mathematician, and cryptographer',
            'zh':
            '英国数学家,逻辑学家,密码学家和计算机科学家',
            'ru':
            'английский математик, логик, криптограф',
            'pl':
            'angielski matematyk',
            'sv':
            'brittisk datavetare, matematiker och ingenjör',
            'es':
            'matemático, filósofo y criptógrafo británico',
            'sk':
            'britský matematik, logik, kryptograf a vojnový hrdina',
            'ilo':
            'Britaniko a matematiko, lohiko, kriptoanalista, ken ' +
            'sientista ti kompiuter',
            'zh-hans':
            '英国数学家,逻辑学家,密码学家和计算机科学家',
            'fa':
            'دانشمند کامپیوتر، رمزشکن، منطق\u200cدان و ریاضی' +
            '\u200cدان بریتانیایی',
            'nb':
            'britisk informatiker, matematiker og ingeniør'
        })
    assert solve(revision.reference_claims, cache={r_text: ALAN_TEXT}) == 56
    assert (solve(
        revision.datasources.reference_claims, cache={r_text: ALAN_TEXT}) == {
            ('P108', 'Q220798', 'P248', 0, 'Q20895922'),
            ('P106', 'Q4964182', 'P143', 0, 'Q48952'),
            ('P69', 'Q924289', 'P248', 0, 'Q20895922'),
            ('P570', '+1954-06-07T00:00:00Z', 'P143', 0, 'Q206855'),
            ('P31', 'Q5', 'P248', 0, 'Q20666306'),
            ('P800', 'Q20895949', 'P248', 0, 'Q20895922'),
            ('P25', 'Q20895935', 'P248', 0, 'Q20895922'),
            ('P349', "'00621580'", 'P143', 0, 'Q48183'),
            ('P549', "'8014'", 'P143', 0, 'Q328'),
            ('P569', '+1912-06-23T00:00:00Z', 'P854', 0,
             "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
            ('P18', "'Alan Turing Aged 16.jpg'", 'P143', 0, 'Q11920'),
            ('P569', '+1912-06-23T00:00:00Z', 'P248', 0, 'Q20666306'),
            ('P19', 'Q122744', 'P143', 0, 'Q328'),
            ('P214', "'41887917'", 'P143', 0, 'Q8447'),
            ('P569', '+1912-06-23T00:00:00Z', 'P143', 0, 'Q328'),
            ('P569', '+1912-06-23T00:00:00Z', 'P345', 0, "'nm6290133'"),
            ('P91', 'Q6636', 'P248', 0, 'Q20895922'),
            ('P1273', "'a11455408'", 'P854', 0,
             "'https://viaf.org/viaf/41887917/'"),
            ('P1412', 'Q1860', 'P143', 0, 'Q20666306'),
            ('P69', 'Q2278254', 'P248', 0, 'Q20895922'),
            ('P31', 'Q5', 'P813', 0, '+2015-10-10T00:00:00Z'),
            ('P512', 'Q230899', 'P143', 0, 'Q8447'),
            ('P512', 'Q21578', 'P143', 0, 'Q8447'),
            ('P1412', 'Q1860', 'P813', 0, '+2015-10-10T00:00:00Z'),
            ('P1412', 'Q1860', 'P854', 0,
             "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
            ('P535', "'12651680'", 'P143', 0, 'Q328'),
            ('P19', 'Q20895942', 'P248', 0, 'Q20895922'),
            ('P227', "'118802976'", 'P143', 0, 'Q1419226'),
            ('P570', '+1954-06-07T00:00:00Z', 'P248', 0, 'Q20666306'),
            ('P21', 'Q6581097', 'P143', 0, 'Q54919'),
            ('P800', 'Q20895966', 'P248', 0, 'Q20895922'),
            ('P935', "'Alan Turing'", 'P143', 0, 'Q191168'),
            ('P31', 'Q5', 'P143', 0, 'Q206855'),
            ('P21', 'Q6581097', 'P248', 0, 'Q36578'),
            ('P646', "'/m/0n00'", 'P248', 0, 'Q15241312'),
            ('P646', "'/m/0n00'", 'P577', 0, '+2013-10-28T00:00:00Z'),
            ('P1563', "'Turing'", 'P143', 0, 'Q11921'),
            ('P19', 'Q122744', 'P854', 0,
             "'http://www.telegraph.co.uk/technology/news/9314910/Britain-still-owes-Alan-Turing-a-debt.html'"
             ), ('P269', "'030691621'", 'P143', 0, 'Q8447'),
            ('P108', 'Q230899', 'P248', 0, 'Q20895922'),
            ('P22', 'Q20895930', 'P248', 0, 'Q20895922'),
            ('P906', "'254262'", 'P143', 0, 'Q877583'),
            ('P21', 'Q6581097', 'P813', 0, '+2014-04-09T00:00:00Z'),
            ('P244', "'n83171546'", 'P143', 0, 'Q328'),
            ('P570', '+1954-06-07T00:00:00Z', 'P854', 0,
             "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
            ('P570', '+1954-06-07T00:00:00Z', 'P813', 0,
             '+2015-10-10T00:00:00Z'), ('P106', 'Q82594', 'P143', 0, 'Q328'),
            ('P570', '+1954-06-07T00:00:00Z', 'P345', 0, "'nm6290133'"),
            ('P213', "'0000 0001 1058 9902'", 'P143', 0, 'Q423048'),
            ('P31', 'Q5', 'P854', 0,
             "'http://data.bnf.fr/ark:/12148/cb12205670t'"),
            ('P569', '+1912-06-23T00:00:00Z', 'P813', 0,
             '+2015-10-10T00:00:00Z'),
            ('P1196', 'Q10737', 'P248', 0, 'Q20895922'),
            ('P268', "'12205670t'", 'P143', 0, 'Q8447'),
            ('P800', 'Q772056', 'P248', 0, 'Q20895922'),
            ('P20', 'Q2011497', 'P248', 0, 'Q20895922'),
            ('P27', 'Q145', 'P143', 0, 'Q48183')
        })
Ejemplo n.º 33
0
def test_has_property():
    assert solve(has_p106, cache={r_text: ALAN_TEXT})
    assert not solve(has_p999, cache={r_text: ALAN_TEXT})

    assert pickle.loads(pickle.dumps(has_p106)) == has_p106
    assert pickle.loads(pickle.dumps(has_p999)) == has_p999
Ejemplo n.º 34
0
def test_has_commons_media(q7251, crab_nebula):
    assert solve(wikidatawiki.has_commons_media, cache={entity: q7251}) is True
    assert solve(wikidatawiki.has_commons_media, cache={entity: crab_nebula
                                                        }) is False
Ejemplo n.º 35
0
def test_item_completeness_empty():
    cache = {present_properties: {}, suggested_properties: {}}

    assert solve(wikidatawiki.item_completeness, cache=cache) == 0.0
Ejemplo n.º 36
0
def test_stemmed():
    cache = {revision_oriented.revision.text: 'Door middel van een!'}
    assert (solve(dutch.stemmed.revision.datasources.stems,
                  cache=cache) == ["dor", "middel", "van", "een"])

    assert dutch.stemmed == pickle.loads(pickle.dumps(dutch.stemmed))
Ejemplo n.º 37
0
def test_missing_key():
    with raises(RuntimeError):
        my_dict = Datasource("my_dict")
        foobar = key(['foo', 'bar'], my_dict, if_missing=(RuntimeError))
        assert solve(foobar, cache={my_dict: {'bar': 1}}) is None
Ejemplo n.º 38
0
def test_key_exists():
    my_dict = Datasource("my_dict")
    foo_exists = key_exists('foo', my_dict)
    assert solve(foo_exists, cache={my_dict: {'foo': "bar"}}) is True
    assert solve(foo_exists, cache={my_dict: {'baz': "bar"}}) is False
    assert pickle.loads(pickle.dumps(foo_exists)) == foo_exists
Ejemplo n.º 39
0
def test_stopwords():
    cache = {
        p_text: "My hat is the king of France.",
        r_text: "My waffle is the king of Normandy and the king of York."
    }

    assert (solve(my_stops.revision.datasources.stopwords, cache=cache) == [
        'My', 'is', 'the', 'of', 'and', 'the', 'of'
    ])
    assert (solve(my_stops.revision.parent.datasources.stopwords,
                  cache=cache) == ['My', 'is', 'the', 'of'])

    assert (solve(
        my_stops.revision.datasources.non_stopwords,
        cache=cache) == ['waffle', 'king', 'Normandy', 'king', 'York'])
    assert (solve(my_stops.revision.parent.datasources.non_stopwords,
                  cache=cache) == ['hat', 'king', 'France'])

    assert (solve(my_stops.revision.datasources.stopword_frequency,
                  cache=cache) == {
                      'my': 1,
                      'is': 1,
                      'the': 2,
                      'and': 1,
                      'of': 2
                  })
    assert (solve(my_stops.revision.datasources.non_stopword_frequency,
                  cache=cache) == {
                      'waffle': 1,
                      'king': 2,
                      'normandy': 1,
                      'york': 1
                  })
    assert (solve(my_stops.revision.parent.datasources.stopword_frequency,
                  cache=cache) == {
                      'my': 1,
                      'is': 1,
                      'the': 1,
                      'of': 1
                  })
    assert (solve(my_stops.revision.parent.datasources.non_stopword_frequency,
                  cache=cache) == {
                      'hat': 1,
                      'king': 1,
                      'france': 1
                  })

    diff = my_stops.revision.diff
    assert (solve(diff.datasources.stopword_delta, cache=cache) == {
        'of': 1,
        'the': 1,
        'and': 1
    })
    pd = solve(diff.datasources.stopword_prop_delta, cache=cache)
    assert pd.keys() == {'of', 'the', 'and'}
    assert round(pd['of'], 2) == 0.50
    assert round(pd['the'], 2) == 0.50
    assert round(pd['and'], 2) == 1

    assert (solve(diff.datasources.non_stopword_delta, cache=cache) == {
        'hat': -1,
        'waffle': 1,
        'king': 1,
        'normandy': 1,
        'york': 1,
        'france': -1
    })
    pd = solve(diff.datasources.non_stopword_prop_delta, cache=cache)
    assert pd.keys() == {'hat', 'waffle', 'king', 'normandy', 'york', 'france'}
    assert round(pd['hat'], 2) == -1
    assert round(pd['waffle'], 2) == 1
    assert round(pd['king'], 2) == 0.50
    assert round(pd['normandy'], 2) == 1
    assert round(pd['york'], 2) == 1

    assert solve(my_stops.revision.stopwords, cache=cache) == 7
    assert solve(my_stops.revision.parent.stopwords, cache=cache) == 4
    assert solve(my_stops.revision.non_stopwords, cache=cache) == 5
    assert solve(my_stops.revision.parent.non_stopwords, cache=cache) == 3

    assert solve(diff.stopword_delta_sum, cache=cache) == 3
    assert solve(diff.stopword_delta_increase, cache=cache) == 3
    assert solve(diff.stopword_delta_decrease, cache=cache) == 0
    assert solve(diff.non_stopword_delta_sum, cache=cache) == 2
    assert solve(diff.non_stopword_delta_increase, cache=cache) == 4
    assert solve(diff.non_stopword_delta_decrease, cache=cache) == -2

    assert round(solve(diff.stopword_prop_delta_sum, cache=cache), 2) == 2
    assert round(solve(diff.stopword_prop_delta_increase, cache=cache), 2) == 2
    assert round(solve(diff.stopword_prop_delta_decrease, cache=cache), 2) == 0
    assert round(solve(diff.non_stopword_prop_delta_sum, cache=cache),
                 2) == 1.5
    assert (round(solve(diff.non_stopword_prop_delta_increase, cache=cache),
                  2) == 3.5)
    assert (round(solve(diff.non_stopword_prop_delta_decrease, cache=cache),
                  2) == -2)
Ejemplo n.º 40
0
def test_stemmmed():
    cache = {r_text: 'This is spelled worngly. <td>'}
    assert (solve(english.stemmed.revision.datasources.stems,
                  cache=cache) == ["this", "is", "spell", "worng"])

    assert english.stemmed == pickle.loads(pickle.dumps(english.stemmed))
Ejemplo n.º 41
0
def test_regexes():
    cache = {
        p_text: "This is notabadword.  There're bad words butts already.",
        r_text: "This is bad superbadword. There're bad words already."
    }

    assert (solve(badwords.revision.datasources.matches,
                  cache=cache) == ['bad', 'superbadword', 'bad'])
    assert solve(badwords.revision.matches, cache=cache) == 3
    assert (solve(badwords.revision.parent.datasources.matches,
                  cache=cache) == ['bad', 'butts'])
    assert solve(badwords.revision.parent.matches, cache=cache) == 2

    diff = badwords.revision.diff
    assert (solve(diff.datasources.matches_added,
                  cache=cache) == ['bad', 'superbadword'])
    assert solve(diff.matches_added, cache=cache) == 2
    assert (solve(diff.datasources.matches_removed, cache=cache) == ['butts'])
    assert solve(diff.matches_removed, cache=cache) == 1

    assert (solve(diff.datasources.match_delta, cache=cache) == {
        'bad': 1,
        'superbadword': 1,
        'butts': -1
    })
    pd = solve(diff.datasources.match_prop_delta, cache=cache)
    assert pd.keys() == {'bad', 'superbadword', 'butts'}
    assert round(pd['bad'], 2) == 0.50
    assert round(pd['superbadword'], 2) == 1
    assert round(pd['butts'], 2) == -1

    assert round(solve(diff.match_delta_sum, cache=cache), 2) == 1
    assert round(solve(diff.match_prop_delta_sum, cache=cache), 2) == 0.50

    cache = {r_text: "This is bad but also notbad."}
    assert (solve(badwords_notbad.revision.datasources.matches,
                  cache=cache) == ['bad'])
Ejemplo n.º 42
0
def score_text(model, text):
    try:
        feature_values = list(solve(model.features, cache={r_text: text}))
        return None, model.score(feature_values)
    except Exception as e:
        return e, None
Ejemplo n.º 43
0
 def fake_solve(dependents, cache=None):
     cache = cache if cache is not None else {}
     cache.update({len_func: len, literal_fake: "fake"})
     return dependencies.solve(dependents, cache=cache)
Ejemplo n.º 44
0
def test_stemmmed():
    cache = {r_text: "În timpul acestei perioade"}
    assert (solve(romanian.stemmed.revision.datasources.stems,
                  cache=cache) == ['în', 'timp', 'aceste', 'perioad'])

    assert romanian.stemmed == pickle.loads(pickle.dumps(romanian.stemmed))
Ejemplo n.º 45
0
def test_positive():
    cache = {old_tokens: ["a"] * 3 + ["b"] * 2 + ["c"] * 45 + ["e"] * 2,
             new_tokens: ["a"] * 1 + ["b"] * 5 + ["d"] * 3 + ["e"] * 3}
    assert (solve(pos_delta, cache=cache) ==
            {'b': 3, 'd': 3, 'e': 1})
Ejemplo n.º 46
0
def test_comment_features():
    comment_ds = revision_oriented.revision.comment
    cache = {comment_ds: "/* wbmergeitems-to:0||Q928543 */ "}
    assert solve(wikidatawiki.is_merge_into, cache=cache)
    assert solve(wikidatawiki.is_merge_from, cache=cache) is False
    assert solve(wikidatawiki.is_item_creation, cache=cache) is False

    cache = {comment_ds: "/* wbmergeitems-from:0||Q928543 */ "}
    assert solve(wikidatawiki.is_merge_from, cache=cache)
    assert solve(wikidatawiki.is_merge_into, cache=cache) is False

    cache = {comment_ds: "/* clientsitelink-remove:1||enwiki */ Boris Kok"}
    assert solve(wikidatawiki.is_client_delete, cache=cache)
    assert solve(wikidatawiki.is_client_move, cache=cache) is False

    cache = {comment_ds: "/* clientsitelink-update:0|uk|uk:A|uk:B *"}
    assert solve(wikidatawiki.is_client_move, cache=cache)
    assert solve(wikidatawiki.is_client_delete, cache=cache) is False
    assert solve(wikidatawiki.is_revert, cache=cache) is False

    cache = {comment_ds: "Undid revision 1448592 by [[Special:Contributions/"}
    assert solve(wikidatawiki.is_revert, cache=cache)
    cache = {comment_ds: "Reverted edits by [[Special:Contributions/"}
    assert solve(wikidatawiki.is_revert, cache=cache)
    cache = {comment_ds: "rvv racial slurs"}
    assert solve(wikidatawiki.is_revert, cache=cache)

    cache = {comment_ds: "Restored revision 123456"}
    assert solve(wikidatawiki.is_restore, cache=cache)
    assert solve(wikidatawiki.is_item_creation, cache=cache) is False
    assert solve(wikidatawiki.is_revert, cache=cache) is False

    cache = {comment_ds: "/* wbeditentity-create:0| */"}
    assert solve(wikidatawiki.is_item_creation, cache=cache)