def extend(self, annotated_sentence): ''' Extend an existing annotated sentence with types (from wordlists). ''' extended_sentence = [] for word in annotated_sentence.words: # wordnet lemmatiser only accepts these 4 chars as pos-tags lemma = self.stemmer.stem(word.lemma.lower()) if word.lemma == None or len(word.lemma) < 1: postag = word.pos[0].lower() postag = postag if postag in ['a','r','n','v'] else 'n' lemma = self.stemmer.stem(self.lemmatiser.lemmatize(word.word.lower(), pos=postag)) # convert the current word types to a list word_types = word.types.split(',') if word.types != '' else [] # for all the wordsets whose part of speech tags match the current word for wordset in [s for s in self.wordsets if StringMatching.is_match(word.pos, s.pos)]: if lemma in wordset.words: word_types.append(wordset.label) word.types = ','.join(word_types) extended_sentence.append(word) return AnnotatedSentence(extended_sentence)
def test_match_contains_pass(self): word = 'Orange Fruit Mangoes' strings = '*Fruit*,*Mangoe*'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_exact_fail(self): word = 'Orange Fruit Mangoes' strings = 'Orange Fruit Lemons'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings))
def test_match_exact_pass(self): word = 'Mangoes' strings = 'Mangoes'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_ends_pass(self): word = 'Apples' strings = '*le,*les'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_starts_fail(self): word = 'Apples' strings = 'Apl*,Apple'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings))
def test_match_case_fail2(self): word = 'Orange Fruit Mangoes' strings = '*fruit*'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings, False))
def test_match_surround_fail(self): word = 'Orange Fruit Mangoes' strings = 'A*Fruit Mangoes'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings))
def test_match_case_pass3(self): word = 'AbCdE' strings = 'AbCdE'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings), True)
def test_match_case_pass1(self): word = 'ABCdefGEH' strings = 'ABCd*'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_mixed_fail2(self): word = 'ABCDEFg' strings = 'ABCDEF,*BCDEF'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings))
def test_match_mixed_fail1(self): word = 'Potatoes' strings = 'Potatos*,*topato*,POTATO'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings))
def test_match_mixed_pass2(self): word = 'Orange Fruit Mangoes' strings = 'Or*eg,Orange Fruit Mango,*ruit*'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_mixed_pass1(self): word = 'Oranges' strings = 'Fruit*,*papers,*ang*'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_contains_fail(self): word = 'Orange Fruit Mangoes' strings = '*Paper*'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings))
def test_match_surround_pass(self): word = 'Orange Fruit Mangoes' strings = 'Orange*oes'.split(',') self.assertEqual(True, StringMatching.is_match(word, strings))
def test_match_case_fail1(self): word = 'AbCdE' strings = 'aBcde'.split(',') self.assertEqual(False, StringMatching.is_match(word, strings, False))