def test_simple(self):
        expected1 = {"ratio_to_passive_verbs": 0.0}
        expected2 = {"ratio_to_passive_verbs": 0.25}

        a1 = translationese.Analysis(
            """Tomorrow i will be there to answer your call""")
        a2 = translationese.Analysis(
            """If It can be bought for less money, I will go and buy it""")

        self.assertEqual(expected1, ratio_to_passive_verbs.quantify(a1))
        self.assertEqual(expected2, ratio_to_passive_verbs.quantify(a2))
Ejemplo n.º 2
0
 def test_simple(self):
     expected1 = {"lexical_density" : (7.0) / (15.0)}
     expected2 = {"lexical_density" : 0.5 }
     expected3 = {"lexical_density" : (3.0) / (4.0) }
     
     a1 = translationese.Analysis("""I'm certain he said I am later than I am, 
                                  let's go""")
     a2 = translationese.Analysis("""we will go tomorrow to see a movie""")
     a3 = translationese.Analysis("""What about punctuation?""")
     
     self.assertEqual(expected1, lexical_density.quantify(a1))
     self.assertEqual(expected2, lexical_density.quantify(a2))
     self.assertEqual(expected3, lexical_density.quantify(a3))
Ejemplo n.º 3
0
 def test_simple(self):
     a = translationese.Analysis("""Hello hello world world.""")
     quantifier = lexical_variety.LexicalVarietyQuantifier(a)
     self.assertAlmostEqual(6 * (3 / 5.0), quantifier.type_token_ratio(), 1)
     self.assertAlmostEqual(6 * math.log(3) / math.log(5.0),
                            quantifier.log_type_token_ratio(), 1)
     self.assertAlmostEqual(100 * math.log(5.0) / (1 - 1 / 3.0),
                            quantifier.unique_type_token_ratio(), 1)
Ejemplo n.º 4
0
 def test_bigrams(self):
     a = translationese.Analysis("Which witch should watch which witch watch?")
     self.assertDictEqual(a.bigrams(),
                          {
                           ("which", "witch"): 2,
                           ("witch", "should"): 1,
                           ("should", "watch"): 1,
                           ("watch", "which"): 1,
                           ("witch", "watch"): 1,
                           ("watch", "?"): 1,
                         })
Ejemplo n.º 5
0
def analyze_file(filename,
                 analyzer_module,
                 variant=None,
                 stanfordnlp=None,
                 lang=None):
    with translationese.Analysis(filename=filename,
                                 stanfordnlp=stanfordnlp,
                                 lang=lang) as analysis:
        if variant is not None:
            return analyzer_module.quantify_variant(analysis, variant)
        else:
            return analyzer_module.quantify(analysis)
Ejemplo n.º 6
0
    def test_quantifier_variants(self):
        a = translationese.Analysis("""Hello hello world world.""")

        result = lexical_variety.quantify_variant(a, 0)
        expected = {"TTR1": 6 * (3 / 5.0)}
        self.assertDictEqual(expected, result)

        result = lexical_variety.quantify_variant(a, 1)
        expected = {"TTR2": 6 * math.log(3) / math.log(5.0)}
        self.assertDictEqual(expected, result)

        result = lexical_variety.quantify_variant(a, 2)
        expected = {"TTR3": 100 * math.log(5.0) / (1 - 1 / 3.0)}
        self.assertDictEqual(expected, result)
Ejemplo n.º 7
0
 def test_simple(self):
     sentence = "How much wood would a woodchuck chuck if a " \
         "woodchuck could chuck wood?"
     a = translationese.Analysis(sentence)
     self.assertEquals(
                       {
                        "how": 1 / 14.0,
                        "much": 1 / 14.0,
                        "wood": 2 / 14.0,
                        "would": 1 / 14.0,
                        "a": 2 / 14.0,
                        "woodchuck": 2 / 14.0,
                        "if": 1 / 14.0,
                        "chuck": 2 / 14.0,
                        "could": 1 / 14.0,
                        "?": 1 / 14.0,
                        },
                       a.histogram_normalized()
                       )
Ejemplo n.º 8
0
    def test_pmi(self):
        sentence = "a b a b b"
        analysis = translationese.Analysis(sentence)

        # White box test so this can be followed externally

        self.assertDictEqual(analysis.bigrams(), {
            ("a","b"): 2, # freq = 0.5
            ("b","a"): 1, # freq = 0.25
            ("b","b"): 1, # freq = 0.25
            })

        self.assertDictEqual(analysis.histogram_normalized(), {
            "a": 2/5.0,
            "b": 3/5.0,
            })

        expected_pmi = {
                ("a","b"): math.log(0.5 / (2/5.0 * 3/5.0)),
                ("b","a"): math.log(0.25 / (3/5.0 * 2/5.0)),
                ("b","b"): math.log(0.25 / (3/5.0 * 3/5.0)),
                }

        expected_logs = {
                ("a","b"): 0.73397,
                ("b","a"): 0.04082,
                ("b","b"): -0.36464,
                }

        expected_average = sum(expected_logs.values()) / 3.0
        self.assertAlmostEqual(0.13672, expected_average, 5)

        actual_pmi = analysis.pmi()

        for i in expected_pmi.keys():
            self.assertAlmostEqual(expected_pmi[i], expected_logs[i], 5)
            self.assertAlmostEqual(expected_pmi[i], actual_pmi[i], 5)
Ejemplo n.º 9
0
 def test_infinite_uniqueness(self):
     a = translationese.Analysis("Only different words.")
     quantifier = lexical_variety.LexicalVarietyQuantifier(a)
     self.assertEqual(float("infinity"),
                      quantifier.unique_type_token_ratio())