Ejemplo n.º 1
0
    def test_porter2(self):
        """Test abydos.stemmer.Porter2."""
        # base case
        self.assertEqual(self.stmr.stem(''), '')

        # simple cases
        self.assertEqual(self.stmr.stem('c'), 'c')
        self.assertEqual(self.stmr.stem('da'), 'da')
        self.assertEqual(self.stmr.stem('ad'), 'ad')
        self.assertEqual(self.stmr.stem('sing'), 'sing')
        self.assertEqual(self.stmr.stem('singing'), 'sing')

        # missed branch test cases
        self.assertEqual(self.stmr.stem('capitalism'), 'capit')
        self.assertEqual(self.stmr.stem('fatalism'), 'fatal')
        self.assertEqual(self.stmr.stem("dog's"), 'dog')
        self.assertEqual(self.stmr.stem("A's'"), 'a')
        self.assertEqual(self.stmr.stem('agreedly'), 'agre')
        self.assertEqual(self.stmr.stem('feedly'), 'feed')
        self.assertEqual(self.stmr.stem('stional'), 'stional')
        self.assertEqual(self.stmr.stem('palism'), 'palism')
        self.assertEqual(self.stmr.stem('sization'), 'sizat')
        self.assertEqual(self.stmr.stem('licated'), 'licat')
        self.assertEqual(self.stmr.stem('lical'), 'lical')
        self.assertEqual(self.stmr.stem('clessly'), 'clessli')
        self.assertEqual(self.stmr.stem('tably'), 'tabli')
        self.assertEqual(self.stmr.stem('sizer'), 'sizer')
        self.assertEqual(self.stmr.stem('livity'), 'liviti')

        # Test wrapper
        self.assertEqual(porter2('capitalism'), 'capit')
Ejemplo n.º 2
0
    def test_porter2(self):
        """Test abydos.stemmer.Porter2."""
        # base case
        self.assertEqual(self.stmr.stem(''), '')

        # simple cases
        self.assertEqual(self.stmr.stem('c'), 'c')
        self.assertEqual(self.stmr.stem('da'), 'da')
        self.assertEqual(self.stmr.stem('ad'), 'ad')
        self.assertEqual(self.stmr.stem('sing'), 'sing')
        self.assertEqual(self.stmr.stem('singing'), 'sing')

        # missed branch test cases
        self.assertEqual(self.stmr.stem('capitalism'), 'capit')
        self.assertEqual(self.stmr.stem('fatalism'), 'fatal')
        self.assertEqual(self.stmr.stem("dog's"), 'dog')
        self.assertEqual(self.stmr.stem("A's'"), 'a')
        self.assertEqual(self.stmr.stem('agreedly'), 'agre')
        self.assertEqual(self.stmr.stem('feedly'), 'feed')
        self.assertEqual(self.stmr.stem('stional'), 'stional')
        self.assertEqual(self.stmr.stem('palism'), 'palism')
        self.assertEqual(self.stmr.stem('sization'), 'sizat')
        self.assertEqual(self.stmr.stem('licated'), 'licat')
        self.assertEqual(self.stmr.stem('lical'), 'lical')
        self.assertEqual(self.stmr.stem('clessly'), 'clessli')
        self.assertEqual(self.stmr.stem('tably'), 'tabli')
        self.assertEqual(self.stmr.stem('sizer'), 'sizer')
        self.assertEqual(self.stmr.stem('livity'), 'liviti')

        # Test wrapper
        self.assertEqual(porter2('capitalism'), 'capit')
Ejemplo n.º 3
0
    def test_porter2_snowball(self):
        """test abydos.stemmer.porter2 (Snowball testset)

        These test cases are from
        http://snowball.tartarus.org/algorithms/english/diffs.txt
        """
        #  Snowball Porter test set
        with open(TESTDIR+'/corpora/snowball_porter2.csv') as snowball_testset:
            next(snowball_testset)
            for line in snowball_testset:
                if line[0] != '#':
                    line = line.strip().split(',')
                    word, stem = line[0], line[1]
                    self.assertEqual(porter2(word), stem.lower())
Ejemplo n.º 4
0
    def test_porter2_early_english(self):
        """Test abydos.stemmer.porter2 (early English)."""
        # base case
        self.assertEqual(porter2('', early_english=True), '')

        # simple cases (no different from regular stemmer)
        self.assertEqual(porter2('c', early_english=True), 'c')
        self.assertEqual(porter2('da', early_english=True), 'da')
        self.assertEqual(porter2('ad', early_english=True), 'ad')
        self.assertEqual(porter2('sing', early_english=True), 'sing')
        self.assertEqual(porter2('singing', early_english=True), 'sing')

        # make
        self.assertEqual(porter2('make', early_english=True), 'make')
        self.assertEqual(porter2('makes', early_english=True), 'make')
        self.assertEqual(porter2('maketh', early_english=True), 'make')
        self.assertEqual(porter2('makest', early_english=True), 'make')

        # say
        self.assertEqual(porter2('say', early_english=True), 'say')
        self.assertEqual(porter2('says', early_english=True), 'say')
        self.assertEqual(porter2('sayeth', early_english=True), 'say')
        self.assertEqual(porter2('sayest', early_english=True), 'say')

        # missed branch test cases
        self.assertEqual(porter2('best', early_english=True), 'best')
        self.assertEqual(porter2('meth', early_english=True), 'meth')
Ejemplo n.º 5
0
    def test_porter2_early_english(self):
        """test abydos.stemmer.porter2 (early English)
        """
        # base case
        self.assertEqual(porter2('', early_english=True), '')

        # simple cases (no different from regular stemmer)
        self.assertEqual(porter2('c', early_english=True), 'c')
        self.assertEqual(porter2('da', early_english=True), 'da')
        self.assertEqual(porter2('ad', early_english=True), 'ad')
        self.assertEqual(porter2('sing', early_english=True), 'sing')
        self.assertEqual(porter2('singing', early_english=True), 'sing')

        # make
        self.assertEqual(porter2('make', early_english=True), 'make')
        self.assertEqual(porter2('makes', early_english=True), 'make')
        self.assertEqual(porter2('maketh', early_english=True), 'make')
        self.assertEqual(porter2('makest', early_english=True), 'make')

        # say
        self.assertEqual(porter2('say', early_english=True), 'say')
        self.assertEqual(porter2('says', early_english=True), 'say')
        self.assertEqual(porter2('sayeth', early_english=True), 'say')
        self.assertEqual(porter2('sayest', early_english=True), 'say')

        # missed branch test cases
        self.assertEqual(porter2('best', early_english=True), 'best')
        self.assertEqual(porter2('meth', early_english=True), 'meth')
Ejemplo n.º 6
0
    def test_porter2(self):
        """test abydos.stemmer.porter2
        """
        # base case
        self.assertEqual(porter2(''), '')

        # simple cases
        self.assertEqual(porter2('c'), 'c')
        self.assertEqual(porter2('da'), 'da')
        self.assertEqual(porter2('ad'), 'ad')
        self.assertEqual(porter2('sing'), 'sing')
        self.assertEqual(porter2('singing'), 'sing')

        # missed branch test cases
        self.assertEqual(porter2('capitalism'), 'capit')
        self.assertEqual(porter2('fatalism'), 'fatal')
        self.assertEqual(porter2('dog\'s'), 'dog')
        self.assertEqual(porter2('A\'s\''), 'a')
        self.assertEqual(porter2('agreedly'), 'agre')
        self.assertEqual(porter2('feedly'), 'feed')
        self.assertEqual(porter2('stional'), 'stional')
        self.assertEqual(porter2('palism'), 'palism')
        self.assertEqual(porter2('sization'), 'sizat')
        self.assertEqual(porter2('licated'), 'licat')
        self.assertEqual(porter2('lical'), 'lical')
        self.assertEqual(porter2('clessly'), 'clessli')
        self.assertEqual(porter2('tably'), 'tabli')
        self.assertEqual(porter2('sizer'), 'sizer')
        self.assertEqual(porter2('livity'), 'liviti')