Beispiel #1
0
class TestCleanTextUtil(unittest.TestCase):
    """ Tests the CleanTextUtil class.

    """
    def setUp(self):
        self.ctu = CleanTextUtil("french")
        self.words = [u"Nous", u"allions", u"à", u"la", u"plage"]

    def tearDown(self):
        rm_data_dir()

    def test_stem_words(self):
        """ Tests stem_words.   

        1- Verify is the result is correct.
        from [u"Nous", u"allions", u"à", u"la", u"plage"]
        to [u"Nous", u"allion", u"à", u"la", u"plag"]

        """
        wanted = [u"Nous", u"allion", u"à", u"la", u"plag"]

        get = self.ctu.stem_words(self.words)
        self.assertEquals(get, wanted) # 1

    def test_rm_stop_words(self):
        """ Tests rm_stop_words.

        1- Verify is the result is correct.
        from [u"Nous", u"allions", u"à", u"la", u"plage"]
        to [u"allions", u"plage"]

        """
        wanted = [u"allions", u"plage"]

        get = self.ctu.rm_stop_words(self.words)
        self.assertEquals(get, wanted) # 1

    def test_clean_text(self):
        """ Tests clean_text.

        1- Verify is the result is correct.
        from "Nous allions à la plage"
        to ["allion", "plag"]

        """
        wanted = ["allion", "plag"]

        get = self.ctu.clean_text(" ".join(self.words))
        self.assertEquals(get, wanted) # 1
Beispiel #2
0
 def setUp(self):
     self.ctu = CleanTextUtil("french")
     self.words = [u"Nous", u"allions", u"à", u"la", u"plage"]