Beispiel #1
0
 def test_weka_sanitizer_escape_grave_accent(self):
     """ given a text with a grave accent, escape them """
     given = "that sweater is Chris'"
     expect = "that sweater is Chris\\\'"
     self.assertEqual(
         expect, text_sanitizer.sanitize_weka(given),
         "Sanitized string is not what weka would expect it to be")
Beispiel #2
0
    def test_weka_sanitizer_replace_separator(self):
        """given a text with a separator char, replace its occurrences with blank spaces"""

        given = "this is, a text, with many, lines"
        expect = "this is  a text  with many  lines"
        self.assertEqual(
            expect, text_sanitizer.sanitize_weka(given, remove_separator=","),
            "Sanitized string is not what weka would expect it to be")
Beispiel #3
0
    def test_weka_sanitizer_newline(self):
        """given a text with new lines replace them with blank spaces"""

        given = "this is\na text\nwith many\n lines"
        expect = "this is a text with many  lines"
        self.assertEqual(
            expect, text_sanitizer.sanitize_weka(given),
            "Sanitized string is not what weka would expect it to be")
Beispiel #4
0
 def test_weka_sanitizer_grave_accent(self):
     """ given a text with a grave accent, remove them """
     given = "that sweater is Chris'"
     expect = "that sweater is Chris"
     self.assertEqual(
         expect,
         text_sanitizer.sanitize_weka(given, escape_singlequote=False),
         "Sanitized string is not what weka would expect it to be")
Beispiel #5
0
    def test_weka_sanitizer_escape_double_quoting(self):
        """ given a text with quotes, escape them """
        given = '""No es posible que no le podamos garantizar dignidad a los que toda su vida trabajaron. Vamos a recomponer el ingreso de los jubilados. Y vamos a hacer una ley que diga que los jubilados no pagan los medicamentos y el Estado los va a subsidiar"'
        expect = "\\\"\\\"No es posible que no le podamos garantizar dignidad a los que toda su vida trabajaron. Vamos a recomponer el ingreso de los jubilados. Y vamos a hacer una ley que diga que los jubilados no pagan los medicamentos y el Estado los va a subsidiar\\\""

        self.assertEqual(
            expect, text_sanitizer.sanitize_weka(given),
            "Sanitized string is not what weka would expect it to be")