예제 #1
0
def noun_stem(s):
    """extracts the stem from a plural noun, or returns empty string"""
    # add code here
    if s in unchanging_plurals:
        return s
    elif re.match("men", s[len(s) - 3:len(s)]):
        return re.sub("men", "man", s)
    elif verb_stem(s) in unchanging_plurals:
        return ''
    else:
        return verb_stem(s)
예제 #2
0
def restore_words_aux(tr,wds):
    if (isinstance(tr,str)):
        wd = wds.pop()
        if (tr=='Is'):
            return ('I_' + verb_stem(wd), tr)
        elif (tr=='Ts'):
            return ('T_' + verb_stem(wd), tr)
        elif (tr=='Np'):
            return ('N_' + noun_stem(wd), tr)
        elif (tr=='Ip' or tr=='Tp' or tr=='Ns' or tr=='A'):
            return (tr[0] + '_' + wd, tr)
        else:
            return (wd, tr)
    else:
        return Tree(tr.node, [restore_words_aux(t,wds) for t in tr])
예제 #3
0
def test_stemming_with_tagging():
    failures = []
    for word, stem in word2stem.items():
        if not verb_stem(word) == stem:
            failures.append(word)
    # some words are just not in brown :(
    assert failures == ['fizzes', 'dazes', 'analyses']
예제 #4
0
def tag_word(lx, wd):
    """returns a list of all possible tags for wd relative to lx"""
    # add code here
    printlist = []
    for nom in function_words_tags:
        if nom[0] == wd:
            add(printlist, nom[1])

    if len(printlist) == 0:
        if wd in lx.getAll('P'):
            add(printlist, 'P')

        if wd in lx.getAll('A'):
            add(printlist, 'A')

        if wd in lx.getAll('N'):
            if wd in unchanging_plurals:
                add(printlist, 'Ns')
                add(printlist, 'Np')
            if noun_stem(wd) is '':
                add(printlist, 'Ns')
            else:
                add(printlist, 'Np')

        elif noun_stem(wd) in lx.getAll('N'):
            if wd in unchanging_plurals:
                add(printlist, 'Ns')
                add(printlist, 'Np')
            if noun_stem(wd) is not '':
                add(printlist, 'Np')
            else:
                add(printlist, 'Ns')

        if wd in lx.getAll('I'):
            if verb_stem(wd) is '':
                add(printlist, 'Ip')
            else:
                add(printlist, 'Is')

        elif verb_stem(wd) in lx.getAll('I'):
            if verb_stem(wd) is '':
                add(printlist, 'Ip')
            else:
                add(printlist, 'Is')

        if wd in lx.getAll('T'):
            if verb_stem(wd) is '':
                add(printlist, 'Tp')
            else:
                add(printlist, 'Ts')

        elif verb_stem(wd) in lx.getAll('T'):
            if verb_stem(wd) is '':
                add(printlist, 'Tp')
            else:
                add(printlist, 'Ts')

        return printlist
    else:
        return printlist
예제 #5
0
def test_tagger():
    assert not verb_stem('cats')
 def test_verb_stem(self):
     self.assertEqual("fly", s.verb_stem("flies"))
     self.assertEqual("eat", s.verb_stem("eats"))
     self.assertEqual("tell", s.verb_stem("tells"))
     self.assertEqual("show", s.verb_stem("shows"))
     self.assertEqual("pay", s.verb_stem("pays"))
     self.assertEqual("buy", s.verb_stem("buys"))
     self.assertEqual("fly", s.verb_stem("flies"))
     self.assertEqual("try", s.verb_stem("tries"))
     self.assertEqual("unify", s.verb_stem("unifies"))
     self.assertEqual("die", s.verb_stem("dies"))
     self.assertEqual("lie", s.verb_stem("lies"))
     self.assertEqual("tie", s.verb_stem("ties"))
     self.assertEqual("go", s.verb_stem("goes"))
     self.assertEqual("box", s.verb_stem("boxes"))
     self.assertEqual("attach", s.verb_stem("attaches"))
     self.assertEqual("wash", s.verb_stem("washes"))
     self.assertEqual("dress", s.verb_stem("dresses"))
     #self.assertEqual("fizz", s.verb_stem("fizzes"))
     self.assertEqual("lose", s.verb_stem("loses"))
     #self.assertEqual("daze", s.verb_stem("dazes"))
     self.assertEqual("lapse", s.verb_stem("lapses"))
     #self.assertEqual("analyse", s.verb_stem("analyses"))
     self.assertEqual("have", s.verb_stem("has"))
     self.assertEqual("like", s.verb_stem("likes"))
     self.assertEqual("hate", s.verb_stem("hates"))
     self.assertEqual("bathe", s.verb_stem("bathes"))
예제 #7
0
def test_verb_stem():
    assert st.verb_stem('tells') == 'tell'
    assert st.verb_stem('buys') == 'buy'
    assert st.verb_stem('buysa') == ''
    assert st.verb_stem('tries') == 'try'
    assert st.verb_stem('flies') == 'fly'
    assert st.verb_stem('dies') == 'die'
    assert st.verb_stem('fixes') == ''  # not in Brown Corpus
    assert st.verb_stem('goes') == 'go'
    assert st.verb_stem('boxes') == ''  # not in Brown Corpus
    assert st.verb_stem('attaches') == 'attach'
    assert st.verb_stem('washes') == ''  # not in Brown Corpus
    assert st.verb_stem('fizzes') == ''  # not in Brown Corpus
    assert st.verb_stem('dresses') == ''  # not in Brown Corpus
    assert st.verb_stem('loses') == 'lose'
    assert st.verb_stem('dazes') == ''  # not in Brown Corpus
    assert st.verb_stem(
        'has'
    ) == ''  # should be ignored according to https://piazza.com/class/jkuzor9eypxov?cid=240
    assert st.verb_stem('likes') == 'like'
    assert st.verb_stem('hates') == 'hate'
    assert st.verb_stem('bathes') == ''  # not in Brown Corpus
    assert st.verb_stem(
        'is'
    ) == ''  # should be ignored according https://piazza.com/class/jkuzor9eypxov?cid=240
    assert st.verb_stem('unties') == ''  # not in Brown Corpus
    assert st.verb_stem('cats') == ''
    assert st.verb_stem('analyses') == ''  # not in Brown Corpus
예제 #8
0
def test_verb_stem(self):
self.assertEqual("fly", s.verb_stem("flies"))
self.assertEqual("eat", s.verb_stem("eats"))
self.assertEqual("tell", s.verb_stem("tells"))
self.assertEqual("show", s.verb_stem("shows"))
self.assertEqual("pay", s.verb_stem("pays"))
self.assertEqual("buy", s.verb_stem("buys"))
self.assertEqual("fly", s.verb_stem("flies"))
self.assertEqual("try", s.verb_stem("tries"))
self.assertEqual("unify", s.verb_stem("unifies"))
self.assertEqual("die", s.verb_stem("dies"))
self.assertEqual("lie", s.verb_stem("lies"))
self.assertEqual("tie", s.verb_stem("ties"))
self.assertEqual("go", s.verb_stem("goes"))
self.assertEqual("box", s.verb_stem("boxes"))
self.assertEqual("attach", s.verb_stem("attaches"))
self.assertEqual("wash", s.verb_stem("washes"))
self.assertEqual("dress", s.verb_stem("dresses"))
#self.assertEqual("fizz", s.verb_stem("fizzes"))
self.assertEqual("lose", s.verb_stem("loses"))
#self.assertEqual("daze", s.verb_stem("dazes"))
self.assertEqual("lapse", s.verb_stem("lapses"))
#self.assertEqual("analyse", s.verb_stem("analyses"))
self.assertEqual("have", s.verb_stem("has"))
self.assertEqual("like", s.verb_stem("likes"))
self.assertEqual("hate", s.verb_stem("hates"))
self.assertEqual("bathe", s.verb_stem("bathes"))


class TestPOS(unittest.TestCase):
def test_noun_stem(self):
self.assertEqual(p.noun_stem("sheep"), "sheep")
self.assertEqual(p.noun_stem("sheeps"), "")
self.assertEqual(p.noun_stem("buffalo"), "buffalo")
self.assertEqual(p.noun_stem("buffalos"), "")
self.assertEqual(p.noun_stem("women"), "woman")
self.assertEqual(p.noun_stem("men"), "man")
self.assertEqual(p.noun_stem("ashes"), "ash")
self.assertEqual(p.noun_stem("countries"), "country")
self.assertEqual(p.noun_stem("dogs"), "dog")

def test_tag_words(self):
lx = s.Lexicon()
lx.add("John", "P")
lx.add("orange", "A")
lx.add("orange", "N")
lx.add("fish", "N")
lx.add("fish", "I")
lx.add("fish", "T")
self.assertEqual(["P"], p.tag_word(lx, "John"))
self.assertEqual(["A", "Ns"], p.tag_word(lx, "orange"))
self.assertEqual(["Ns", "Np", "Ip", "Tp"], p.tag_word(lx, "fish"))
self.assertEqual(["AR"], p.tag_word(lx, "a"))
self.assertEqual([], p.tag_word(lx, "zxghqw"))


class TestAgreement(unittest.TestCase):
def test_can_parse(self):
lx = s.Lexicon()
lx.add('John', 'P')
lx.add('like', 'T')
lx.add("fly", "I")
lx.add("Mary", "P")
lx.add("duck", "N")
lx.add("swim", "I")
lx.add("like", "T")
lx.add("frog", "N")
lx.add("orange", "A")
lx.add("orange", "N")
lx.add("purple", "A")
lx.add("fish", "N")
lx.add("fish", "I")
lx.add("fish", "T")
lx.add("student", "N")
lx.add("old", "A")
self.assertGreaterEqual(
len(a.all_valid_parses(lx, "Who likes John ?".split(" "))), 1)
self.assertGreaterEqual(
len(a.all_valid_parses(lx, "Who is a duck ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Which orange duck likes a frog ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Who does John like ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Who is an orange duck ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Which ducks are orange ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Which ducks like a frog ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Which ducks like frogs ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Who likes a duck who flies ?".split(" "))), 1)
self.assertGreaterEqual(len(a.all_valid_parses(
lx, "Which purple ducks fly ?".split(" "))), 1)


if __name__ == '__main__':
unittest.main()
예제 #9
0
    def test_verb_stem(self):
        # Rule 1
        self.assertEqual(statements.verb_stem("eats"), "eat")
        self.assertEqual(statements.verb_stem("tells"), "tell")
        self.assertEqual(statements.verb_stem("shows"), "show")

        # Rule 2
        self.assertEqual(statements.verb_stem("pays"), "pay")
        self.assertEqual(statements.verb_stem("buys"), "buy")

        # Rule 3
        self.assertEqual(statements.verb_stem("flies"), "fly")
        self.assertEqual(statements.verb_stem("tries"), "try")
        self.assertEqual(statements.verb_stem("unifies"), "unify")

        # Rule 4
        self.assertEqual(statements.verb_stem("dies"), "die")
        self.assertEqual(statements.verb_stem("lies"), "lie")
        self.assertEqual(statements.verb_stem("ties"), "tie")
        self.assertNotEqual(statements.verb_stem("unties"), "unities")

        # Rule 5
        self.assertEqual(statements.verb_stem("goes"), "go")
        self.assertEqual(statements.verb_stem("boxes"), "box")
        self.assertEqual(statements.verb_stem("attaches"), "attach")
        self.assertEqual(statements.verb_stem("washes"), "wash")
        self.assertEqual(statements.verb_stem("dresses"), "dress")
        #self.assertEqual(statements.verb_stem("fizzes"), "fizz")

        # Rule 6
        self.assertEqual(statements.verb_stem("loses"), "lose")
        self.assertEqual(statements.verb_stem("dazes"), "daze")
        self.assertEqual(statements.verb_stem("lapses"), "lapse")
        self.assertEqual(statements.verb_stem("analyses"), "analyse")

        # Rule 7
        self.assertEqual(statements.verb_stem("has"), "have")

        # Rule 8
        self.assertEqual(statements.verb_stem("likes"), "like")
        self.assertEqual(statements.verb_stem("hates"), "hate")
        self.assertEqual(statements.verb_stem("bathes"), "bathe")

        # Base Case
        self.assertEqual(statements.verb_stem("flys"), "")
        self.assertEqual(statements.verb_stem("inchs"), "")


        # Check whether point 4 of Part 1 was implemented
        self.assertEqual(statements.verb_stem("cats"), "")
        self.assertEqual(statements.verb_stem("Johns"), "")
        self.assertEqual(statements.verb_stem("Marys"), "")
        self.assertEqual(statements.verb_stem("dogs"), "")
        self.assertEqual(statements.verb_stem("doggies"), "")