Beispiel #1
0
def test_text_comparer():
    comparer = text_comparison.get()("Cycling through the examples")
    assert isinstance(comparer, TextComparison)
    assert comparer.context == "Cycling through the examples"
    assert comparer.text == comparer.context
    assert comparer.tokens == [
        word for word in comparer.split(comparer.text.lower())
        if word not in comparer.stopwords
    ]
    assert comparer.stems == set(comparer.stemmer(t) for t in comparer.tokens)
    other_text = "cycle home"
    other = text_comparison.get()(other_text)
    assert (comparer.jaccard_similarity(other) == (
        len(other.stems.intersection(comparer.stems)) /
        float(len(set(other.stems).union(comparer.stems)))))
    assert (comparer.levenshtein_distance(other) == (
        Levenshtein.distance(comparer.text, other.text) /
        max(len(comparer.text), len(other.text))))
    assert (comparer.tokens_present(other) == (
        len(set(comparer.tokens).intersection(other.tokens)) /
        float(len(other.tokens))))
    assert (comparer.stems_present(other) == (
        len(set(comparer.stems).intersection(other.stems)) /
        float(len(other.stems))))
    assert (comparer.similarity(other_text) == (
        (comparer.jaccard_similarity(other) +
         comparer.levenshtein_distance(other) +
         comparer.tokens_present(other) + comparer.stems_present(other)) / 4))
Beispiel #2
0
def test_text_comparer():
    comparer = text_comparison.get()("Cycling through the examples")
    assert isinstance(comparer, TextComparison)
    assert comparer.context == "Cycling through the examples"
    assert comparer.text == comparer.context
    assert comparer.tokens == [
        word for word
        in comparer.split(comparer.text.lower())
        if word not in comparer.stopwords]
    assert comparer.stems == set(comparer.stemmer(t) for t in comparer.tokens)
    other_text = "cycle home"
    other = text_comparison.get()(other_text)
    assert (
        comparer.jaccard_similarity(other)
        == (len(other.stems.intersection(comparer.stems))
            / float(len(set(other.stems).union(comparer.stems)))))
    assert (
        comparer.levenshtein_distance(other)
        == (Levenshtein.distance(comparer.text, other.text)
            / max(len(comparer.text), len(other.text))))
    assert (
        comparer.tokens_present(other)
        == (len(set(comparer.tokens).intersection(other.tokens))
            / float(len(other.tokens))))
    assert (
        comparer.stems_present(other)
        == (len(set(comparer.stems).intersection(other.stems))
            / float(len(other.stems))))
    assert (
        comparer.similarity(other_text)
        == ((comparer.jaccard_similarity(other)
             + comparer.levenshtein_distance(other)
             + comparer.tokens_present(other)
             + comparer.stems_present(other))
            / 4))
Beispiel #3
0
 def comparison(self):
     return text_comparison.get()(self.text)
Beispiel #4
0
 def comparison(self):
     return text_comparison.get()(self.text)