Python AnnotatedWord Examples

Programming Language: Python

Namespace/Package Name: core.structures

Class/Type: AnnotatedWord

Examples at hotexamples.com: 6

Python AnnotatedWord - 6 examples found. These are the top rated real world Python examples of core.structures.AnnotatedWord extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AnnotatedWord(6)

Frequently Used Methods

AnnotatedWord (6)

Example #1

Show file

 def test_pattern_to_word_matching6(self):
     anword = AnnotatedWord(index=5,
                            word='baboons',
                            lemma='bongo',
                            pos='NN',
                            ner='O',
                            dependencies='cc-conj-d')
     pattern = pattern_pfx + '<word deps="cc-con*" exdeps="*-d"/>'
     tree = etree.fromstring(pattern)
     pattern_word = PatternWord(tree)
     self.assertEqual(
         False, PatternMatcher.word_matches_pattern(anword, pattern_word))

Example #2

Show file

 def test_pattern_to_word_matching1(self):
     anword = AnnotatedWord(index=7,
                            word='bongoes',
                            lemma='bongo',
                            pos='IN',
                            ner='O',
                            dependencies='cc-conj-d')
     pattern = pattern_pfx + '<word pos="IN" lemma="bon*" max="1"/>'
     tree = etree.fromstring(pattern)
     pattern_word = PatternWord(tree)
     self.assertEqual(
         True, PatternMatcher.word_matches_pattern(anword, pattern_word))

Example #3

Show file

  def annotate(self, sentence):
    ''' Use the NLTK library to add basic NLP info to sentence.
        Return an AnnotatedSentence. '''
    tokens = word_tokenize(sentence)
    pos_tagged_tokens = pos_tag(tokens)
    anno_words = []
    for i,(token,pos) in enumerate(pos_tagged_tokens):
      lemma_pos = 'n' if pos[0].lower() != 'v' else 'v'
      word_lemma = self.lemmatiser.lemmatize(token, pos=lemma_pos)
      anno_words.append(AnnotatedWord(index=i,word=token,pos=pos,lemma=word_lemma))

    return AnnotatedSentence(anno_words)

Example #4

Show file

 def test_pattern_to_word_matching3(self):
     anword = AnnotatedWord(index=5,
                            word='baboons',
                            lemma='bongo',
                            pos='NNS',
                            ner='O',
                            dependencies='cc-conj-d')
     pattern = pattern_pfx + '<word pos="*" deps="cc*"/>'
     tree = etree.fromstring(pattern)
     pattern_word = PatternWord(tree)
     self.assertEqual(
         True,
         PatternMatcher.word_matches_pattern(anword,
                                             pattern_word,
                                             verbose=True))

Example #5

Show file

  def annotate(self, sentence):
    ''' Uses the CoreNLP server to create an AnnotatedSentence from a string. '''
    annotated_data = json.loads(self.nlp.annotate(sentence))
    annotated_sentence = annotated_data['sentences'][0]
    anno_words = []
    for token in annotated_sentence['tokens']:
      dependencies = self._get_dependency_string(token['index'],
                                                 annotated_sentence['basicDependencies'])
      # -1 the index because CoreNLP makes them 1-based rather than 0-based, so fix.
      anword = AnnotatedWord(index=token['index']-1,
                             word=token['word'],
                             lemma=token['lemma'],
                             pos=token['pos'],
                             ner=token['ner'],
                             dependencies=dependencies)
      anno_words.append(anword)

    return AnnotatedSentence(anno_words)

Example #6

Show file

def get_reduced_sentence(patterns, annotated_words):
    ''' Replaces preprocessor patterns with a single classname word,
      e.g. "the book" would become "NOUN". '''
    skip_num = 0
    words = []
    index = 0
    dependencies = []
    for word in annotated_words:
        if skip_num > 0:
            skip_num -= 1
            dependencies.append(word.dependencies)
            continue

        # if found dependencies, it means the last word will be a preprocessor chunk
        if len(dependencies) > 0:
            # when merging dependencies, governor and dependent dependencies of the same type
            # should cancel out. A noun compound that contains both compound-g and compound-d
            # has _internal_ dependencies. These don't matter and may confuse other patterns.
            # We're only interested in 'unresolved' dependencies (only a dependent or a governor)
            dep_list = list(set(dependencies))
            reduced_dependencies = []
            for dep in dep_list:
                depname = dep.split('-')[0]
                if depname + '-g' in dep_list and depname + '-d' in dep_list:
                    # both found, ignore
                    continue
                else:
                    # 'unresolved' dependency, save this
                    reduced_dependencies.append(dep)

            # append the found dependencies to the preprocessed chunk
            words[-1].dependencies = ','.join(reduced_dependencies)
        # reset dependencies
        dependencies = []

        found = False
        for ptype, pattern_words in patterns:
            if found:
                break
            for pword in pattern_words:
                if found:
                    break
                if pword.index == word.index:
                    words.append(
                        AnnotatedWord(word=ptype.classname,
                                      index=index,
                                      lemma=word.lemma,
                                      pos='NULL'))
                    skip_num = len(pattern_words) - 1
                    found = True
                    dependencies.append(word.dependencies)
                    break
        if not found:
            word.index = index
            words.append(word)

    # repair indices
    index = 0
    for word in words:
        word.index = index
        index += 1
    return AnnotatedSentence(words)