Beispiel #1
0
  def extend(self, annotated_sentence):
    ''' Extend an existing annotated sentence with types (from wordlists). '''
    extended_sentence = []
    for word in annotated_sentence.words:
      # wordnet lemmatiser only accepts these 4 chars as pos-tags
      lemma = self.stemmer.stem(word.lemma.lower())
      if word.lemma == None or len(word.lemma) < 1:
        postag = word.pos[0].lower()
        postag = postag if postag in ['a','r','n','v'] else 'n'
        lemma = self.stemmer.stem(self.lemmatiser.lemmatize(word.word.lower(), pos=postag))

      # convert the current word types to a list
      word_types = word.types.split(',') if word.types != '' else []
      # for all the wordsets whose part of speech tags match the current word
      for wordset in [s for s in self.wordsets if StringMatching.is_match(word.pos, s.pos)]:
        if lemma in wordset.words:
          word_types.append(wordset.label)

      word.types = ','.join(word_types)
      extended_sentence.append(word)
    return AnnotatedSentence(extended_sentence)
Beispiel #2
0
 def test_match_contains_pass(self):
     word = 'Orange Fruit Mangoes'
     strings = '*Fruit*,*Mangoe*'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #3
0
 def test_match_exact_fail(self):
     word = 'Orange Fruit Mangoes'
     strings = 'Orange Fruit Lemons'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings))
Beispiel #4
0
 def test_match_exact_pass(self):
     word = 'Mangoes'
     strings = 'Mangoes'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #5
0
 def test_match_ends_pass(self):
     word = 'Apples'
     strings = '*le,*les'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #6
0
 def test_match_starts_fail(self):
     word = 'Apples'
     strings = 'Apl*,Apple'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings))
Beispiel #7
0
 def test_match_case_fail2(self):
     word = 'Orange Fruit Mangoes'
     strings = '*fruit*'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings, False))
Beispiel #8
0
 def test_match_surround_fail(self):
     word = 'Orange Fruit Mangoes'
     strings = 'A*Fruit Mangoes'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings))
Beispiel #9
0
 def test_match_case_pass3(self):
     word = 'AbCdE'
     strings = 'AbCdE'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings), True)
Beispiel #10
0
 def test_match_case_pass1(self):
     word = 'ABCdefGEH'
     strings = 'ABCd*'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #11
0
 def test_match_mixed_fail2(self):
     word = 'ABCDEFg'
     strings = 'ABCDEF,*BCDEF'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings))
Beispiel #12
0
 def test_match_mixed_fail1(self):
     word = 'Potatoes'
     strings = 'Potatos*,*topato*,POTATO'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings))
Beispiel #13
0
 def test_match_mixed_pass2(self):
     word = 'Orange Fruit Mangoes'
     strings = 'Or*eg,Orange Fruit Mango,*ruit*'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #14
0
 def test_match_mixed_pass1(self):
     word = 'Oranges'
     strings = 'Fruit*,*papers,*ang*'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #15
0
 def test_match_contains_fail(self):
     word = 'Orange Fruit Mangoes'
     strings = '*Paper*'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings))
Beispiel #16
0
 def test_match_surround_pass(self):
     word = 'Orange Fruit Mangoes'
     strings = 'Orange*oes'.split(',')
     self.assertEqual(True, StringMatching.is_match(word, strings))
Beispiel #17
0
 def test_match_case_fail1(self):
     word = 'AbCdE'
     strings = 'aBcde'.split(',')
     self.assertEqual(False, StringMatching.is_match(word, strings, False))