Example #1
0
def sentence_to_tree(sentence):
    """
    Given a sentence (as a text), it will transform it to a tree.

    Args:
        sentence: text of a sentence
    Return:
        sentence tree
    """
    assert isinstance(sentence, basestring)

    sentence = pos_tag(word_tokenize(sentence))
    tree = ParentedTree('S', [])
    for token in sentence:
        word, pos = token
        tree.append(ParentedTree(pos, [word]))
    return tree
Example #2
0
def terms_inference(sentences, terms_trie):
    """
    Given (tokenized and tagged) sentences and a trie of terms, it will
    infere terms occurences and return list of sentence trees.

    Args:
        sentences: shallow-parsed text
        terms_trie: trie of terms
    Return:
        list of shallow parse trees with inferred terms,
        dictionary of refferences to terms positions
    """
    parsed_sentences = []
    terms_positions = defaultdict(list)
    for sentence in sentences:
        parsed_sentence = ParentedTree('S', [])

        token_index = 0
        while token_index < len(sentence):
            term_label, term_length = _longest_matching_term(
                sentence, token_index, terms_trie)

            if term_length > 0:
                # term found
                term_node = ParentedTree('TERM', [])

                term = name_to_term(term_label)
                term_node.term = term
                terms_positions[term].append(term_node)

                for token in sentence[token_index:token_index + term_length]:
                    _append_word_token(term_node, token)
                parsed_sentence.append(term_node)

                token_index += term_length

            else:
                # there is no term starting from current postion
                token = sentence[token_index]
                _append_word_token(parsed_sentence, token)
                token_index += 1

        parsed_sentences.append(parsed_sentence)

    return parsed_sentences, terms_positions