Python Tree Beispiele, nltk.tree.Tree Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: gui.py Projekt: ndm25/eecs405

def BP_tree_to_nltk_tree(tree):
    root = Tree(str(tree.keys), children = [])
    if isinstance(tree, BPnode) or isinstance(tree, Node):
        for child in tree.children:
            root.append(BP_tree_to_nltk_tree(child))

    return root

Beispiel #2

0

Datei anzeigen

Datei: POSTagger.py Projekt: Juicechuan/POSTagger

def wsjtree2pos(wsj_corpus_path):
    print >> sys.stderr, "Reading in corpus..."
    sentences = []
    for d in os.listdir(wsj_corpus_path):
        if os.path.isdir(wsj_corpus_path + "/" + d) and d != "CVS" and int(d) < 8:
            for f in os.listdir(wsj_corpus_path + "/" + d):
                if f.endswith(".mrg"):
                    fname = wsj_corpus_path + "/" + d + "/" + f
                    # print fname
                    tree_f = open(fname, "r")
                    tree_string = ""
                    for line in tree_f:
                        if line.strip():
                            if line.startswith("( (") or line.startswith("(("):
                                if tree_string:
                                    tr = Tree(tree_string)
                                    sentences.append(tr.pos())
                                    tree_string = line.strip()
                                else:
                                    tree_string = line.strip()
                            else:
                                tree_string += line.strip()
                    if tree_string:
                        tr = Tree(tree_string)
                        sentences.append(tr.pos())

    return sentences

Beispiel #3

0

Datei anzeigen

Datei: syntagrus_ds2ps.py Projekt: luutuntin/SynTagRus_DS2PS

def attach_tree(head,dep,attachment,chain,indexes,flag,coindex=None):
    #head,dep: trees; flag: 'right'/'left'
    """ attach dep's projection chain to head's projection chain """
    if isinstance(coindex,int): # handle coindex tag
        label = attachment['label2']
        offset = attachment['offset2']
        dep = Tree(dep.label(),['*-'+str(coindex)])        
    else:
        label = attachment['label']
        offset = attachment['offset']
        
    l_index = [l[0] for l in chain[0]].index(label)
    count = sum([l[1] for l in chain[0]][:l_index+1])-offset
    if flag=='right':
        a_index = indexes[count-1]+1
    elif flag=='left':
        a_index = indexes[count-1]
        indexes[count-1] += 1
    else:
        return "Invalid flag!"
    if head.label()=='PRN':
        s = 'head[0]'
    else:
        s = 'head'
    for i in range(count-1):
        s += '['+str(indexes[i])+']'
    eval(s+'.insert('+str(a_index)+',dep)') # insert() vs pop()
    
    if 'f_tag' in attachment:
        if attachment['f_tag'] not in {'PRD','PRDs'}:
            eval(s+'.set_label('+s+'.label()+"-"+attachment["f_tag"])')
        else:
            s += '['+str(indexes[count-1])+']'
            eval(s+'.set_label('+s+'.label()+"-"+attachment["f_tag"])')
    return head,indexes

Beispiel #4

0

Datei anzeigen

Datei: util.py Projekt: miyamofigo/Japanese-corpus-and-utility

 def __init__(self, node, children, parent_node=None,
              rel=None, attrs=None, head=None):
   self.parent_node = parent_node
   self.rel = rel
   self.attrs = attrs
   self.head = head  
   Tree.__init__(self, node, children)

Beispiel #5

0

Datei anzeigen

Datei: parser.py Projekt: jonasrothfuss/equity_news_thesis

    def parse_tree(self, text, binary=False, preprocessed=False):
        nlp_output = self.nlp.annotate(text, properties={
            'annotators': 'tokenize,ssplit,pos,parse',
            'outputFormat': 'json',
            'parse.binaryTrees': 'true'
        })
        if type(nlp_output) == str:
            nlp_output = json.loads(nlp_output, strict=False)

        if len(nlp_output['sentences']) > 1:
            #merge trees from sentences
            tree_string = "(Top "
            for s in nlp_output['sentences']:
                p_tree = Tree.fromstring(s['parse'])
                tree_string += str(p_tree[0])
            tree_string += ")"
            merged_tree = Tree.fromstring(tree_string)
        else:
            #no merging required
            merged_tree = Tree.fromstring(nlp_output['sentences'][0]['parse'])
            #remove root
            merged_tree = merged_tree[0]

        if binary:
            nltk.treetransforms.chomsky_normal_form(merged_tree)

        if preprocessed:
            merged_tree = preprocess_parse_tree(merged_tree)

        return merged_tree

Beispiel #6

0

Datei anzeigen

Datei: old_parser_scorer.py Projekt: jonpiffle/ltag_parser

def parser_output_to_parse_deriv_trees(output):
    lines = output.strip().split("\n")
    deriv_tree_lines = lines[::2]
    parse_tree_lines = lines[1::2]

    parse_trees = [Tree.fromstring(line.replace('\x06', 'epsilon_')) for line in parse_tree_lines if line != '']
    deriv_trees = [Tree.fromstring(line) for line in deriv_tree_lines if line != '']
    return parse_trees, deriv_trees

Beispiel #7

0

Datei anzeigen

Datei: syn-agreement.py Projekt: arnsholt/syn-agreement

 def munge(t):
     if type(t) == Tree:
         toks = t.leaves()
         t = Tree(t.label(), [munge(child) for child in t])
         setattr(t, "tokens", toks)
         return t
     else:
         return Tree(t, [])

Beispiel #8

0

Datei anzeigen

Datei: simplify.py Projekt: DerrickZhu1/11611teamproject-YenYuan-

def removeNounMods(tree):
    tree_str = tsurgeon.remove_internal_mods(tree)
    if tree_str != '':
        tree = Tree.fromstring(tree_str)
    tree_str = tsurgeon.remove_participle_mods(tree)
    if tree_str != '':
        tree = Tree.fromstring(tree_str)
    return tree

Beispiel #9

0

Datei anzeigen

Datei: baselines.py Projekt: acapello/PLN-2015

    def parse(self, tagged_sent):
        """Parse a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        t = Tree(self.start, [Tree(tag, [word]) for word, tag in tagged_sent])
        t.chomsky_normal_form(factor='left', horzMarkov=0)
        return t

Beispiel #10

0

Datei anzeigen

Datei: temporal_feature_functions.py Projekt: OlafLee/brown_coref_implicit

	def _get_tense(cls, parse, token_indices, use_gold=False):
		if len(token_indices) == 1:
			return 'one_token'
		parse_tree = Tree(parse['parsetree'])
		start_index = min(token_indices)
		end_index = max(token_indices) + 1
		tree_position = parse_tree.treeposition_spanning_leaves(start_index, end_index)
		arg_subtree = parse_tree[tree_position]
		return cls._recurse_search_tag(arg_subtree, ['VP'], [])

Beispiel #11

0

Datei anzeigen

Datei: test_baselines.py Projekt: acapello/PLN-2015

    def test_lbranch_parse(self):
        model = LBranch([], 'S')  # empty training set

        trees = [model.parse(s) for s in self.tagged_sents]

        trees2 = [
            Tree.fromstring("""(S (S|<> (S|<> (S|<> (D El) (N gato)) (V come)) (N pescado)) (P .))"""),
            Tree.fromstring("""(S (S|<> (S|<> (S|<> (D La) (N gata)) (V come)) (N salmón)) (P .))"""),
        ]
        self.assertEqual(trees, trees2)

Beispiel #12

0

Datei anzeigen

Datei: test_baselines.py Projekt: acapello/PLN-2015

    def test_flat_parse(self):
        model = Flat([], 'S')  # empty training set

        trees = [model.parse(s) for s in self.tagged_sents]

        trees2 = [
            Tree.fromstring("(S (D El) (N gato) (V come) (N pescado) (P .))"),
            Tree.fromstring("(S (D La) (N gata) (V come) (N salmón) (P .))"),
        ]
        self.assertEqual(trees, trees2)

Beispiel #13

0

Datei anzeigen

Datei: chunk_tagger.py Projekt: Mo-Talha/Nomad

 def tags2tree(sentence, root_label='S', strict=False):
     tree = Tree(root_label, [])
     for (word, postag, chunktag) in sentence:
         if chunktag is None:
             if strict:
                 raise ValueError("Bad tag sequence")
             else:
                 # Treat as O
                 tree.append((word, postag))
         elif chunktag.startswith('B'):
             tree.append(Tree(chunktag[2:], [(word, postag)]))
         elif chunktag.startswith('I'):
             if (len(tree) == 0 or not isinstance(tree[-1], Tree) or
                         tree[-1].label() != chunktag[2:]):
                 if strict:
                     raise ValueError("Bad tag sequence")
                 else:
                     # Treat as B-*
                     tree.append(Tree(chunktag[2:], [(word, postag)]))
             else:
                 tree[-1].append((word, postag))
         elif chunktag == 'O':
             tree.append((word, postag))
         else:
             raise ValueError("Bad tag %r" % chunktag)
     return tree

Beispiel #14

0

Datei anzeigen

Datei: util.py Projekt: DrDub/nltk

def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
                   root_label='S', strict=False):
    """
    Convert the CoNLL IOB format to a tree.
    """
    tree = Tree(root_label, [])
    for (word, postag, chunktag) in sentence:
        if chunktag is None:
            if strict:
                raise ValueError("Bad conll tag sequence")
            else:
                # Treat as O
                tree.append((word,postag))
        elif chunktag.startswith('B-'):
            tree.append(Tree(chunktag[2:], [(word,postag)]))
        elif chunktag.startswith('I-'):
            if (len(tree)==0 or not isinstance(tree[-1], Tree) or
                tree[-1].label() != chunktag[2:]):
                if strict:
                    raise ValueError("Bad conll tag sequence")
                else:
                    # Treat as B-*
                    tree.append(Tree(chunktag[2:], [(word,postag)]))
            else:
                tree[-1].append((word,postag))
        elif chunktag == 'O':
            tree.append((word,postag))
        else:
            raise ValueError("Bad conll tag {0!r}".format(chunktag))
    return tree

Beispiel #15

0

Datei anzeigen

Datei: maltparser_tree_builder.py Projekt: pln-fing-udelar/inco-pln-nltk-extensions

    def __build_tree(self, node_num):
        word_tuple = self.words[node_num]
        tree_node = Tree(word_tuple[1], [])

        node_dependencies = self.dependencies.get(node_num)
        if node_dependencies is not None:
            for dependency in node_dependencies:
                dependency_node = self.__build_tree(dependency[0])
                tree_node.append(dependency_node)

        return tree_node

Beispiel #16

0

Datei anzeigen

Datei: BguCorpusReader.py Projekt: jedimonster/nlp

 def __str2BguTree(self,text):
     lines = text.split('\n')
     tree = Tree('s',[])
     for line in lines:
         if line=='':
             continue
         mlist = line.split("\t")
         word = mlist[0]
         raw = mlist[1]
         tree.append((word,bguTag(raw)))
     return tree

Beispiel #17

0

Datei anzeigen

Datei: simplify.py Projekt: DerrickZhu1/11611teamproject-YenYuan-

def extractParticiple(tree):
    part_mod = tsurgeon.hasParticipleMod(tree)
    if part_mod != '':
        subject = tsurgeon.findSubject(tree)
        subject_words = Tree.fromstring(subject).leaves()
        part_tree = Tree.fromstring(part_mod)
        part_words = part_tree.leaves()
        # Ignoring inflection
        result_words = subject_words + ['is'] + part_words[1:]
        sentence = ' '.join(result_words).strip() + '.'
        return sentence
    pass

Beispiel #18

0

Datei anzeigen

Datei: penn_tree_bank.py Projekt: ziaridoy20/allennlp

    def _strip_functional_tags(self, tree: Tree) -> None:
        """
        Removes all functional tags from constituency labels in an NLTK tree.
        We also strip off anything after a =, - or | character, because these
        are functional tags which we don't want to use.

        This modification is done in-place.
        """
        clean_label = tree.label().split("=")[0].split("-")[0].split("|")[0]
        tree.set_label(clean_label)
        for child in tree:
            if not isinstance(child[0], str):
                self._strip_functional_tags(child)

Beispiel #19

0

Datei anzeigen

Datei: informationextraction.py Projekt: chatbotimporved/chatbot

def postag_tree(tree):
    # Part-of-speech tagging.
    words = tree.leaves()
    tag_iter = (pos for (word, pos) in pos_tag(words))
    newtree = Tree('S', [])
    for child in tree:
        if isinstance(child, Tree):
            newtree.append(Tree(child.label(), []))
            for subchild in child:
                newtree[-1].append( (subchild, next(tag_iter)) )
        else:
            newtree.append( (child, next(tag_iter)) )
    return newtree

Beispiel #20

0

Datei anzeigen

Datei: shallow_parsing.py Projekt: 000Nelson000/text-analytics-with-python

def visualize_sentence_tree(sentence_tree):
    
    processed_tree = process_sentence_tree(sentence_tree)
    processed_tree = [
                        Tree( item[0],
                             [
                                 Tree(x[1], [x[0]])
                                 for x in item[1]
                             ]
                            )
                            for item in processed_tree
                     ]
    tree = Tree('S', processed_tree )
    tree.draw()

Beispiel #21

0

Datei anzeigen

Datei: file_reader.py Projekt: pkarmstr/relation-extraction

def _add_entity(t,tpl,entity_type):
    """
    Does the work of adding the entity-type node
    """

    parent_positions=[]
    parents=[]

    first_parent_position=t.leaf_treeposition(tpl[0])[:-1]
    first_grandparent_position=first_parent_position[:-1]

    for i in range(tpl[0],tpl[-1]):
        parent_position=t.leaf_treeposition(i)[:-1]
        parent=t[parent_position]
        parent_positions.append(parent_position)
        parents.append(parent)

    if 'parent_position' in locals():
        grandparent_position=parent_position[:-1]
        grandparent=t[grandparent_position]

        if grandparent_position==first_grandparent_position:
            # augment the nodes ONLY if every token in the mention has the same grandparent
            # i.e., if 'Barack Hussein Obama' is one NP, replace it with (NP (E-PER (NNP Barack)(NNP Hussein)(NNP Obama)))
            # but if we have "National Rifle" in one NP and "Association" in another NP, we don't bother adding E-ORG at all
            # (hopefully that doesn't exclude too many trees)
            aug_node='E-'+entity_type

            new_tree=Tree(aug_node,parents)

            if len(parent_positions)>1:
                if parent_positions[-1][-1]!=len(grandparent.leaves())-1: #if the last member of the tuple is NOT the rightmost child
                    #giving up on slices; collecting all of gp's children, then adding b
                    new_leaves=new_tree.leaves()
                    new_kids=[]
                    for kid in grandparent:
                        if kid[0] not in new_leaves:
                            new_kids.append(kid)
                        elif kid[0]==new_leaves[0]:
                            new_kids.append(new_tree)
                        else:
                            pass
                    new_grandparent=Tree(grandparent.node,new_kids)
                    ggparent=t[grandparent_position[:-1]]
                    ggparent[grandparent_position[-1]]=new_grandparent
                else: #it is the rightmost child
                    grandparent[parent_positions[0][-1]:len(grandparent.leaves())]=[new_tree]
            else: #one-word node
                grandparent[parent_positions[0][-1]]=new_tree

Beispiel #22

0

Datei anzeigen

Datei: test_upcfg.py Projekt: acapello/PLN-2015

    def test_productions(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)

        # Bugfix from official test (, start='S')
        model = UPCFG([t], start='S')

        prods = model.productions()

        prods2 = [
            ProbabilisticProduction(N('S'), [N('NP'), N('VP')], prob=1.0),
            ProbabilisticProduction(N('NP'), [N('Det'), N('Noun')], prob=0.5),
            ProbabilisticProduction(N('Det'), ['Det'], prob=1.0),
            ProbabilisticProduction(N('Noun'), ['Noun'], prob=1.0),
            ProbabilisticProduction(N('VP'), [N('Verb'), N('NP')], prob=1.0),
            ProbabilisticProduction(N('Verb'), ['Verb'], prob=1.0),
            ProbabilisticProduction(N('NP'), [N('Noun'), N('Adj')], prob=0.5),
            ProbabilisticProduction(N('Adj'), ['Adj'], prob=1.0),
        ]

        self.assertEqual(set(prods), set(prods2))

Beispiel #23

0

Datei anzeigen

Datei: segmentation_tree.py Projekt: WladimirSidorenko/DiscourseSegmenter

def read_segtree_file(fn):
    """reads a string representing a discourse tree (from the seg.
       annotation) and returns a list of its child tree objects"""
    with codecs.open(fn, 'r', 'utf-8') as f:
        s = f.read()
        text_tree = Tree.fromstring(s, read_leaf=prefix_number_seg_token)
        return [segment for segment in text_tree]

Beispiel #24

0

Datei anzeigen

Datei: try_parse_1.py Projekt: folagit/resumatcher

def test_tree4():   
    
    annotator=Annotator()
    sent = "There are people dying make this world a better place for you and for me."
    sent = "Biplab is a good boy." 
    sent = "He created the robot and broke it after making it."
    sent = "Bachelor 's degree in computer science , design or related field."    
    sent = "B.S. in Computer Science , a related degree or its equivalent"    
    sent = "BS , MS , or PhD in Computer Science or a similar field preferred"
    sent = "Computer Science or related technical degree from an accredited four year university "
    sent = "Degree in Computer Science or Engineering with a high GPA ."    
    sent = "A Master's degree in Computer Science or Engineering is mandatory ."
    
    sent = "A Computer Science or related degree "
    sent = "I love science and SciFi book"
    sent = "I love music and SciFi book"
   
    result = annotator.getAnnotations(sent)
    tree_str = result['syntax_tree']
    print     
    print tree_str
    
    tree = Tree.fromstring(tree_str)[0]
    print
    print "Root label=",tree.label()
    tree.draw()

Beispiel #25

0

Datei anzeigen

Datei: treeprettyprinter.py Projekt: CaptainAL/Spyder

def test():
    """Do some tree drawing tests."""
    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
        print()
        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
        print(tree)
        print()
        drawtree = TreePrettyPrinter(tree, sentence)
        try:
            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False, **xargs))

    from nltk.corpus import treebank
    for n in [0, 1440, 1591, 2771, 2170]:
        tree = treebank.parsed_sents()[n]
        print_tree(n, tree, nodedist=2, maxwidth=8)
    print()
    print('ASCII version:')
    print(TreePrettyPrinter(tree).text(nodedist=2))

    tree = Tree.fromstring(
        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
        '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
    sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
                ' zwemmen of terrassen .'.split())
    print_tree('Discontinuous tree', tree, sentence, nodedist=2)

Beispiel #26

0

Datei anzeigen

Datei: preprocess.py Projekt: jonpiffle/ltag_parser

def merge_tree_nnps(tree):
    """
    Takes a parse tree and merges any consecutive leaf nodes that come from NNPs
    For example if there is a segment of:
        (NP
            (JJ old)
            (NNP Pierre)
            (NNP Vinken)
        )
    Returns:
        (NP
            (JJ old)
            (NNP PierreVinken)
        )
    """

    # require a parented tree to get a subtrees tree position
    p = ParentedTree.convert(tree)

    # iterates subtrees of height 3. This is where NP's leading to NNP's leading to lexicalizations will be
    for s in p.subtrees(filter=lambda s: s.height() == 3):
        # merge NNP's in the list representation of this trees children: [(POS, word), ...] 
        new_noun_phrase = merge_tagged_nnps([(c.label(), c[0]) for c in s])
        child_str = " ".join("(%s %s)" % (pos, word) for pos, word in new_noun_phrase)
        # create new subtree with merged NNP's
        new_s = ParentedTree.fromstring("(%s %s)" % (s.label(), child_str))

        # replace old subtree with new subtree
        p[s.treeposition()] = new_s
    return Tree.convert(p)

Beispiel #27

0

Datei anzeigen

Datei: 7_3_SVArule.py Projekt: eachsaj/Python-Natural-Language-Processing

def rulelogic(sentnece):
    leaves_list = []
    text = (sentnece)

    output = nlp.annotate(text, properties={
        'annotators': 'tokenize,ssplit,pos,depparse,parse',
        'outputFormat': 'json'
    })
    parsetree = output['sentences'][0]['parse']
    #print parsetree
    for i in Tree.fromstring(parsetree).subtrees():
        if i.label() == 'PRP':
            #print i.leaves(), i.label()
            leaves_list.append(i.leaves())
        if i.label() == 'VBP' or i.label() == 'VBZ':
            #print i.leaves(), i.label()
            leaves_list.append(i.label())
    #print leaves_list
    if (any("We" in x for x in leaves_list) or any("I" in x for x in leaves_list) or any(
                    "You" in x for x in leaves_list) or any("They" in x for x in leaves_list)) and any("VBZ" in x for x in leaves_list):
        print "Alert: \nPlease check Subject and verb in the sentence.\nYou may have plural subject and singular verb. "
    elif(any("He" in x for x in leaves_list) or any("She" in x for x in leaves_list) or any(
                    "It" in x for x in leaves_list)) and any("VBP" in x for x in leaves_list):
        print "Alert: \nPlease check subject and verb in the sentence.\n" \
              "You may have singular subject and plural verb."
    else:
        print "You have correct sentence."

Beispiel #28

0

Datei anzeigen

Datei: client.py Projekt: SmartText/EntityExtraction

def extract_entities(pos_server, assimilator, mode, text, link):
    """
    Extract tokens in the buckets of nouns and other entities
    pos_server: part of speech tagger address
    assimilarot: assimilator address
    mode: metadata or content
    """
    content = get_assimilator_data(mode=mode, assimilator=assimilator, text=text, link=link)
    if mode == "meta":
        import json
        yield json.dumps(json.loads(content.decode()), indent=4)
    else:
        import json
        from .semantic_parser import read_dep
        from nltk.tree import Tree

        concept_map = {}

        pos_generator = process_pos(pos_server, content=content)
        for line in pos_generator:
            data = json.loads(line.decode())
            tree = Tree.fromstring(data['tree'])

            tokens = read_dep(tree)
            yield tokens

Beispiel #29

0

Datei anzeigen

Datei: __init__.py Projekt: meyersbs/SPLAT

def yngve_redux(treestring):
	""" For the given parsers-tree-string, return the word count and the yngve score. """
	tree = Tree.fromstring(treestring)
	total = float(calc_yngve_score(tree, 0))
	words = float(get_word_score(tree))

	return [total, words]

Beispiel #30

0

Datei anzeigen

Datei: discourse_parsing.py Projekt: BinbinBian/discourse-parsing

    def initialize_edu_data(edus):
        '''
        Create a representation of the list of EDUS that make up the input.
        '''

        wnum = 0  # counter for distance features
        res = []
        for edu_index, edu in enumerate(edus):
            # lowercase all words
            edu_words = [x[0].lower() for x in edu]
            edu_pos_tags = [x[1] for x in edu]

            # make a dictionary for each EDU
            new_tree = Tree.fromstring('(text)')
            new_tree.append('{}'.format(edu_index))
            tmp_item = {"head_idx": wnum,
                        "start_idx": wnum,
                        "end_idx": wnum,
                        "nt": "text",
                        "head": edu_words,
                        "hpos": edu_pos_tags,
                        "tree": new_tree}
            wnum += 1
            res.append(tmp_item)
        return res

Beispiel #31

0

Datei anzeigen

def tree(bracketed):
    t = Tree.fromstring(bracketed)
    t.draw()

Beispiel #32

0

Datei anzeigen

 def normalize_leaves(self, tree):
     tree = Tree.fromstring(tree)
     for pos in tree.treepositions('leaves'):
         tree[pos] = self.stemmer.stem(tree[pos]).lower()
     return str(tree).replace("\n", "")

Beispiel #33

0

Datei anzeigen

Datei: simplify.py Projekt: needonature/11611teamproject-YenYuan-

def extractSubConjuncts(tree):
    (sub1, sub2) = tsurgeon.extract_sub_conjuncts(tree)
    return (Tree.fromstring(sub1), Tree.fromstring(sub2))

Beispiel #34

0

Datei anzeigen

        rule_head.update({r[0]:1})
        rules.update({r:1})
    for r in rules.keys():
        p = float("{0:.2f}".format(float(rules[r])/float(rule_head[r[0]])))
        if len(r) == 3:
            r = r[0],(r[1],r[2])
        rules_with_p.append((r[0],r[1], p))
    return rules_with_p

# read rule
with open('CNF_rule.txt', 'r') as file:
	rule = []
	for line in file:
		print(line)
		line = line.strip('\n')
		t = Tree.fromstring(line)
		rule += convert_rule(t)

	# add rule with probabilty
	p_rules = p_rule(rule)
	with open('rule.txt', 'w') as file_rule:
		for rules in p_rules:
			for term in rules:
				if type(term) is list or type(term) is tuple:
					for p in term:
						file_rule.write(p)
						file_rule.write(' ')
				else:
					file_rule.write(str(term))
					file_rule.write(' ')
			file_rule.write('\n')

Beispiel #35

0

Datei anzeigen

 def select(self, tree):
     if tree is None:
         raise ValueError("Parse tree not avaialable")
     return Tree("*CHAIN*", [p.select(tree) for p in self.pieces])

Beispiel #36

0

Datei anzeigen

def dertotree(derivation):
    return Tree(derivation.entity, [dertotree(d) for d in derivation.daughters if type(d) != UdfTerminal])

Beispiel #37

0

Datei anzeigen

Datei: stanford.py Projekt: xinyuan-liu/nltk

 def _make_tree(self, result):
     return Tree.fromstring(result)

Beispiel #38

0

Datei anzeigen

Datei: data_helper.py Projekt: cuhk-nlp/BioKMNER

    def read_features(self, flag):
        all_data = self.read_json(
            path.join(self.data_dir, flag + '.stanford.json'))
        all_feature_data = []
        for data in all_data:
            sentence_len = 0
            sentence_feature = []
            sentence = ''
            words = []
            index = []
            sentences = data['sentences']
            for sentence in sentences:
                tokens = sentence['tokens']
                for token in tokens:
                    feature_dict = {}
                    feature_dict['word'] = token['originalText']
                    words.append(token['word'].replace('\xa0', ''))
                    # sentence += token['word']
                    start_index = token['characterOffsetBegin']
                    end_index = token['characterOffsetEnd']
                    feature_dict['char_index'] = [
                        i for i in range(start_index, end_index)
                    ]
                    feature_dict['length'] = sentence_len + len(sentence)
                    feature_dict['pos'] = token['pos']
                    sentence_feature.append(feature_dict)
            # df = df.append([{'word': ' ', 'pos': ' '}], ignore_index=True)

                deparse = sentence['basicDependencies']
                for dep in deparse:

                    dependent_index = dep['dependent'] - 1
                    sentence_feature[dependent_index]['dep'] = dep['dep']
                    sentence_feature[dependent_index][
                        'governed_index'] = dep['governor'] - 1

                c_parse = Tree.fromstring(sentence['parse'].replace(
                    '\xa0', ''))
                current_index = 0
                for s in c_parse.subtrees(lambda t: t.label() in chunk_pos):
                    leaves = s.leaves()

                    if len(leaves) == 0:
                        continue
                    node = s.label()

                    index = words[current_index:].index(
                        leaves[0]) + current_index
                    current_index = index
                    for i, leaf in enumerate(leaves):

                        if 'chunk_tags' not in sentence_feature[index + i]:
                            sentence_feature[index + i]['chunk_tags'] = []
                        sentence_feature[index + i]['chunk_tags'].append({
                            'chunk_tag':
                            node,
                            'height':
                            0,
                            'range': [index, index + len(leaves) - 1]
                        })
                        for chunk_tag in sentence_feature[index +
                                                          i]['chunk_tags']:
                            chunk_tag['height'] += 1
                for token in sentence_feature:
                    if 'chunk_tags' not in token:
                        token['chunk_tags'] = [{
                            'chunk_tag':
                            'ROOT',
                            'height':
                            1,
                            'range': [0, len(sentence_feature) - 1]
                        }]

            all_feature_data.append(sentence_feature)
        return all_feature_data

Beispiel #39

0

Datei anzeigen

Datei: bin_questions.py Projekt: jay1999ke/PureQPA

 def bin_question_extract(self, tree):
     t = Tree.fromstring(tree)
     t_pos = t.pos()
     return t_pos

Beispiel #40

0

Datei anzeigen

 def reparse_tree(self, line):
     ptree = Tree.fromstring(line)
     leaves = ptree.leaves()

Beispiel #41

0

Datei anzeigen

def draw_text_trees(text):
    tree = Tree.fromstring(str(text))
    return svgling.draw_tree(tree)

Beispiel #42

0

Datei anzeigen

Datei: trees_to_latex.py Projekt: Keesiu/meta-kaggle

#!/usr/bin/env python

import sys
from nltk.tree import Tree

print r"\documentclass[10pt]{article}"
print r"\usepackage[landscape]{geometry}"
print r"\usepackage{tikz-qtree}"
print r"\begin{document}"

for line in sys.stdin:
    tree = Tree.fromstring(line.rstrip())
    print r"\begin{tikzpicture}[scale=.5]"
    print tree.pprint_latex_qtree()
    print r"\end{tikzpicture}"
    print ""

print r"\end{document}"

Beispiel #43

0

Datei anzeigen

Datei: svmtk.py Projekt: admukhty/IHP

    def generate_data(self, corpus, pairtypes=("mirna", "protein")):
        if os.path.isfile(self.temp_dir + self.modelname + ".txt"):
            os.remove(self.temp_dir + self.modelname + ".txt")
        xerrors = 0

        #print pairs
        for sentence in corpus.get_sentences("goldstandard"):
            doc_lines = []
            pcount = 0
            logging.info("{}".format(sentence.sid))
            sentence_entities = [
                entity for entity in sentence.entities.elist["goldstandard"]
            ]
            # logging.debug("sentence {} has {} entities ({})".format(sentence.sid, len(sentence_entities), len(sentence.entities.elist["goldstandard"])))
            for pair in itertools.combinations(sentence_entities, 2):
                if pair[0].type == pairtypes[0] and pair[1].type == pairtypes[
                        1] or pair[1].type == pairtypes[0] and pair[
                            0].type == pairtypes[1]:
                    # logging.debug(pair)
                    if pair[0].type == pairtypes[0]:
                        e1id = pair[0].eid
                        e2id = pair[1].eid
                    else:
                        e1id = pair[1].eid
                        e2id = pair[0].eid
                        pair = (pair[1], pair[0])
                    pid = sentence.did + ".p" + str(pcount)
                    """if sid1 != sid2:
                        sentence1 = corpus.documents[did].get_sentence(sid1)
                        tree1 = self.mask_entity(sentence1, Tree.fromstring(sentence1.parsetree), pair[0], "candidate1")
                        sentence2 = corpus.documents[did].get_sentence(sid2)
                        tree2 = self.mask_entity(sentence2, Tree.fromstring(sentence2.parsetree), pair[1], "candidate2")
                        tree = self.join_trees(tree1, tree2)
                    else:"""
                    sentence1 = corpus.documents[sentence.did].get_sentence(
                        pair[0].sid)
                    if sentence1.parsetree == "SENTENCE_SKIPPED_OR_UNPARSABLE":
                        logging.info("skipped {}=>{} on sentence {}-{}".format(
                            pair[0].text, pair[1].text, sentence1.sid,
                            sentence1.text))
                        continue
                    tree = Tree.fromstring(sentence1.parsetree)
                    if "candidate1" in sentence1.parsetree:
                        logging.info(sentence1.parsetree)
                    tree = self.mask_entity(sentence1, tree, pair[0],
                                            "candidate1")
                    tree = self.mask_entity(sentence1, tree, pair[1],
                                            "candidate2")
                    # if tree[0] != '(':
                    #     tree = '(S (' + tree + ' NN))'
                    #this depends on the version of nlkt

                    tree, found = self.get_path(tree)
                    #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20:
                    #print line
                    #    line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|"
                    #    xerrors += 1
                    #else:
                    # tree = self.normalize_leaves(tree)
                    line = self.get_svm_train_line(tree, pair)
                    if pair[1].eid not in pair[0].targets:
                        line = '-' + line
                    self.pids[pid] = pair
                    doc_lines.append(line)
                    pcount += 1
            logging.debug("writing {} lines to file...".format(len(doc_lines)))
            with codecs.open(self.temp_dir + self.modelname + ".txt", 'a',
                             "utf-8") as train:
                for l in doc_lines:
                    train.write(l)
        logging.info("wrote {}{}.txt".format(self.temp_dir, self.modelname))

Beispiel #44

0

Datei anzeigen

Datei: tree.py Projekt: wardbradt/nltk

def demo():
    import random

    def fill(cw):
        cw['fill'] = '#%06d' % random.randint(0, 999999)

    cf = CanvasFrame(width=550, height=450, closeenough=2)

    t = Tree.fromstring('''
    (S (NP the very big cat)
       (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''')

    tc = TreeWidget(cf.canvas(), t, draggable=1,
                    node_font=('helvetica', -14, 'bold'),
                    leaf_font=('helvetica', -12, 'italic'),
                    roof_fill='white', roof_color='black',
                    leaf_color='green4', node_color='blue2')
    cf.add_widget(tc, 10, 10)

    def boxit(canvas, text):
        big = ('helvetica', -16, 'bold')
        return BoxWidget(canvas, TextWidget(canvas, text,
                                            font=big), fill='green')

    def ovalit(canvas, text):
        return OvalWidget(canvas, TextWidget(canvas, text),
                          fill='cyan')

    treetok = Tree.fromstring(
        '(S (NP this tree) (VP (V is) (AdjP shapeable)))')
    tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1)

    def color(node):
        node['color'] = '#%04d00' % random.randint(0, 9999)

    def color2(treeseg):
        treeseg.label()['fill'] = '#%06d' % random.randint(0, 9999)
        treeseg.label().child()['color'] = 'white'

    tc.bind_click_trees(tc.toggle_collapsed)
    tc2.bind_click_trees(tc2.toggle_collapsed)
    tc.bind_click_nodes(color, 3)
    tc2.expanded_tree(1).bind_click(color2, 3)
    tc2.expanded_tree().bind_click(color2, 3)

    paren = ParenWidget(cf.canvas(), tc2)
    cf.add_widget(paren, tc.bbox()[2] + 10, 10)

    tree3 = Tree.fromstring('''
    (S (NP this tree) (AUX was)
       (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''')
    tc3 = tree_to_treesegment(cf.canvas(), tree3, tree_color='green4',
                              tree_xspace=2, tree_width=2)
    tc3['draggable'] = 1
    cf.add_widget(tc3, 10, tc.bbox()[3] + 10)

    def orientswitch(treewidget):
        if treewidget['orientation'] == 'horizontal':
            treewidget.expanded_tree(1, 1).subtrees()[0].set_text('vertical')
            treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('vertical')
            treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical')
            treewidget.collapsed_tree().subtrees()[3].set_text('vertical')
            treewidget['orientation'] = 'vertical'
        else:
            treewidget.expanded_tree(1, 1).subtrees()[0].set_text('horizontal')
            treewidget.collapsed_tree(1, 1).subtrees()[
                0].set_text('horizontal')
            treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal')
            treewidget.collapsed_tree().subtrees()[3].set_text('horizontal')
            treewidget['orientation'] = 'horizontal'

    text = """
Try clicking, right clicking, and dragging
different elements of each of the trees.
The top-left tree is a TreeWidget built from
a Tree.  The top-right is a TreeWidget built
from a Tree, using non-default widget
constructors for the nodes & leaves (BoxWidget
and OvalWidget).  The bottom-left tree is
built from tree_to_treesegment."""
    twidget = TextWidget(cf.canvas(), text.strip())
    textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1)
    cf.add_widget(textbox, tc3.bbox()[2] + 10, tc2.bbox()[3] + 10)

    tree4 = Tree.fromstring('(S (NP this tree) (VP (V is) (Adj horizontal)))')
    tc4 = TreeWidget(cf.canvas(), tree4, draggable=1,
                     line_color='brown2', roof_color='brown2',
                     node_font=('helvetica', -12, 'bold'),
                     node_color='brown4', orientation='horizontal')
    tc4.manage()
    cf.add_widget(tc4, tc3.bbox()[2] + 10, textbox.bbox()[3] + 10)
    tc4.bind_click(orientswitch)
    tc4.bind_click_trees(tc4.toggle_collapsed, 3)

    # Run mainloop
    cf.mainloop()

Beispiel #45

0

Datei anzeigen

Datei: client.py Projekt: kash1102/Mining-opinion-Features

import json
from jsonrpc import ServerProxy, JsonRpc20, TransportTcpIp
from pprint import pprint


class StanfordNLP:
    def __init__(self):
        self.server = ServerProxy(JsonRpc20(),
                                  TransportTcpIp(addr=("127.0.0.1", 8080)))

    def parse(self, text):
        return json.loads(self.server.parse(text))


nlp = StanfordNLP()
result = nlp.parse("Hello world!  It is so beautiful.")
pprint(result)

from nltk.tree import Tree
tree = Tree.parse(result['sentences'][0]['parsetree'])
pprint(tree)

Beispiel #46

0

Datei anzeigen

Datei: propbank.py Projekt: connor50/milk

 def select(self, tree):
     if tree is None: raise ValueError('Parse tree not avaialable')
     return Tree('*SPLIT*', [p.select(tree) for p in self.pieces])

Beispiel #47

0

Datei anzeigen

import sys
import re

trees = list()
tree = ''
for line in sys.stdin:
    if (line[0] == '('):
        if (tree != ''):
            trees.append(tree)
            tree = ''
    tree += line
if (tree != ''):
    trees.append(tree)

for i, t in enumerate(trees):
    tree = Tree.fromstring(t)
    # remove punctuation
    for sub in tree.subtrees():
        remove = list()
        for n, child in enumerate(sub):
            if isinstance(child, str):
                if (re.match(
                        r'^(\.|,|\?|!|;|:|\'|\'\'|`|``|&apos;|&quot;|-[LR][RSC]B-|-|--)$',
                        child)):
                    remove.append(n)
                    #del sub[n]
        for n in sorted(remove, reverse=True):
            del sub[n]
    #sys.stderr.write(str(len(tree.leaves())) + ' ')
    # remove brackets with one item
    for sub in tree.subtrees():

Beispiel #48

0

Datei anzeigen

import sys
from nltk.tree import Tree

fi = open(sys.argv[1]).readlines()

for line in fi:
    line = line.strip()
    if line == '': continue
    print ' '.join(Tree(line).leaves())

Beispiel #49

0

Datei anzeigen

 def get_features(self, doc):
     for sent in sent_tokenize(doc['text']):
         sent = sent.lower()
         if self.feats == 'WordNgram':
             tokens = word_tokenize(sent)
             for n in range(1, 3):
                 if len(tokens) < n:
                     sent_ngrams = ngrams(tokens, len(tokens))
                 else:
                     sent_ngrams = ngrams(tokens, n)
                 for ngram in sent_ngrams:
                     yield ngram
         elif self.feats == 'CharNgram':
             chrs = [c for c in sent]
             for n in range(1, 7):
                 sent_ngrams = ngrams(chrs, n)
                 for ngram in sent_ngrams:
                     yield ngram
         elif self.feats == 'PosNgram':
             token = word_tokenize(sent)
             tagged = pos_tag(token)  # doctest: +SKIP
             tags = []
             for tagtoken in tagged:
                 tags.append(tagtoken[1])
             for n in range(1, 5):
                 taggrams = ngrams(tags, n)
                 for ngram in taggrams:
                     yield ngram
         elif self.feats == 'ProdRules':
             parse = list(parser.raw_parse(sent))
             parse2 = [''.join(str(tree)) for tree in parse]
             parse3 = ''.join(parse2)
             ptree = Tree.fromstring(parse3)
             for rule in ptree.productions():
                 yield rule
         elif self.feats == 'FunctWordsSkipgram':
             skip = []
             tokens = wordpunct_tokenize(sent)
             for token in tokens:
                 if token in funct_words:
                     skip.append(token)
                     skipgrams = ngrams(skip, 2)
                     for ngram in skipgrams:
                         yield ngram
         elif self.feats == "ContentSkipGram":
             skip = []
             tokens = wordpunct_tokenize(sent)
             for token in tokens:
                 if token not in funct_words:
                     skip.append(token)
                     skipgrams = ngrams(skip, 2)
                     for ngram in skipgrams:
                         yield ngram
         elif self.feats == 'FunctWordCount':
             frequency = defaultdict(int)
             tokens = wordpunct_tokenize(sent)
             for token in tokens:
                 if token in funct_words:
                     frequency[token] += 1
             functwordfreqs = []
             for funct_word in funct_words:
                 functwordfreqs.append(frequency[funct_word])
             return functwordfreqs
         elif self.feats == 'OverallFunctWordCount':
             functcount = 0
             tokens = wordpunct_tokenize(sent)
             for token in tokens:
                 if token in funct_words:
                     functcount += 1
                     #                print(functcount)
             return functcount
         elif self.feats == 'Dependency':
             result = dependency_parser.raw_parse(sent)
             for dep in result:
                 triples = list(dep.triples())
                 for triple in triples:
                     trip = triple[0][1] + '.' + triple[1] + '.' + triple[2][1]
                     yield trip

Beispiel #50

0

Datei anzeigen

Datei: client.py Projekt: wanglan0605/stanford-corenlp-python

#coding=utf8

import json
from jsonrpc import ServerProxy, JsonRpc20, TransportTcpIp
from pprint import pprint


class StanfordNLP:
    def __init__(self):
        self.server = ServerProxy(JsonRpc20(),
                                  TransportTcpIp(addr=("127.0.0.1", 8080)))

    def parse(self, text):
        return json.loads(self.server.parse(text))


nlp = StanfordNLP()
#result = nlp.parse(u"Hello world!  It is so beautiful.")
result = nlp.parse(u"今天天气真不错啊！")
pprint(result)

from nltk.tree import Tree
tree = Tree.fromstring(result['sentences'][0]['parsetree'])
#pprint(tree)
tree.pretty_print()

Beispiel #51

0

Datei anzeigen

Datei: verify_trees_match_sentences.py Projekt: Keesiu/meta-kaggle

'''
Insert empty trees for empty sentences
'''

import sys, argparse
from util import tokenize_words
from itertools import izip
from nltk.tree import Tree


def opts():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('trees',
                        type=argparse.FileType('r'),
                        help='File with parse trees')
    parser.add_argument('sentences',
                        type=argparse.FileType('r'),
                        help='File with original sentences')
    return parser


if __name__ == "__main__":
    args = opts().parse_args()

    for tree, sentence in izip(args.trees, args.sentences):
        parse = Tree.fromstring(tree)
        words = tokenize_words(sentence)
        if len(parse.leaves()) != len(words):
            print "Parse tree does not match sentence!"
            print parse.leaves()
            print words

Beispiel #52

0

Datei anzeigen

def flatten_deeptree(tree):
    return Tree(tree.label(), flatten_childtrees([c for c in tree]))

Beispiel #53

0

Datei anzeigen

Datei: treetransforms.py Projekt: Aman020/Big-Data-Analytics

def chomsky_normal_form(
    tree, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^"
):
    # assume all subtrees have homogeneous children
    # assume all terminals have no siblings

    # A semi-hack to have elegant looking code below.  As a result,
    # any subtree with a branching factor greater than 999 will be incorrectly truncated.
    if horzMarkov is None:
        horzMarkov = 999

    # Traverse the tree depth-first keeping a list of ancestor nodes to the root.
    # I chose not to use the tree.treepositions() method since it requires
    # two traversals of the tree (one to get the positions, one to iterate
    # over them) and node access time is proportional to the height of the node.
    # This method is 7x faster which helps when parsing 40,000 sentences.

    nodeList = [(tree, [tree.label()])]
    while nodeList != []:
        node, parent = nodeList.pop()
        if isinstance(node, Tree):

            # parent annotation
            parentString = ""
            originalNode = node.label()
            if vertMarkov != 0 and node != tree and isinstance(node[0], Tree):
                parentString = "%s<%s>" % (parentChar, "-".join(parent))
                node.set_label(node.label() + parentString)
                parent = [originalNode] + parent[: vertMarkov - 1]

            # add children to the agenda before we mess with them
            for child in node:
                nodeList.append((child, parent))

            # chomsky normal form factorization
            if len(node) > 2:
                childNodes = [child.label() for child in node]
                nodeCopy = node.copy()
                node[0:] = []  # delete the children

                curNode = node
                numChildren = len(nodeCopy)
                for i in range(1, numChildren - 1):
                    if factor == "right":
                        newHead = "%s%s<%s>%s" % (
                            originalNode,
                            childChar,
                            "-".join(
                                childNodes[i : min([i + horzMarkov, numChildren])]
                            ),
                            parentString,
                        )  # create new head
                        newNode = Tree(newHead, [])
                        curNode[0:] = [nodeCopy.pop(0), newNode]
                    else:
                        newHead = "%s%s<%s>%s" % (
                            originalNode,
                            childChar,
                            "-".join(
                                childNodes[max([numChildren - i - horzMarkov, 0]) : -i]
                            ),
                            parentString,
                        )
                        newNode = Tree(newHead, [])
                        curNode[0:] = [newNode, nodeCopy.pop()]

                    curNode = newNode

                curNode[0:] = [child for child in nodeCopy]

Beispiel #54

0

Datei anzeigen

Datei: svmtk.py Projekt: admukhty/IHP

 def join_trees(self, tree1, tree2):
     ptree = Tree("ROOTROOT", [tree1, tree2])
     return ptree

Beispiel #55

0

Datei anzeigen

Datei: train.py Projekt: alexanderkoller/cc-ulstm

def map(tree, fn):
    mapped_children = [map(tree[i], fn) for i in range(len(tree))]
    mapped_label = fn(tree.label())
    new_label = tree.label() if mapped_label is None else mapped_label
    return Tree(new_label, mapped_children)

Beispiel #56

0

Datei anzeigen

def flatten_deeptree(tree):
    """
        Flattens a deep tree
    """
    return Tree(tree.label(), flatten_childtrees([child for child in tree]))

Beispiel #57

0

Datei anzeigen

Datei: chemcalc.py Projekt: hughdbrown/edx-platform

def divide_chemical_expression(s1, s2, ignore_state=False):
    '''Compare two chemical expressions for equivalence up to a multiplicative factor:

    - If they are not the same chemicals, returns False.
    - If they are the same, "divide" s1 by s2 to returns a factor x such that s1 / s2 == x as a Fraction object.
    - if ignore_state is True, ignores phases when doing the comparison.

    Examples:
    divide_chemical_expression("H2O", "3H2O") -> Fraction(1,3)
    divide_chemical_expression("3H2O", "H2O") -> 3  # actually Fraction(3, 1), but compares == to 3.
    divide_chemical_expression("2H2O(s) + 2CO2", "H2O(s)+CO2") -> 2
    divide_chemical_expression("H2O(s) + CO2", "3H2O(s)+2CO2") -> False

    Implementation sketch:
        - extract factors and phases to standalone lists,
        - compare expressions without factors and phases,
        - divide lists of factors for each other and check
             for equality of every element in list,
        - return result of factor division

    '''

    # parsed final trees
    treedic = {}
    treedic['1'] = _get_final_tree(s1)
    treedic['2'] = _get_final_tree(s2)

    # strip phases and factors
    # collect factors in list
    for i in ('1', '2'):
        treedic[i + ' cleaned_mm_list'] = []
        treedic[i + ' factors'] = []
        treedic[i + ' phases'] = []
        for el in treedic[i].subtrees(filter=lambda t: t.node == 'multimolecule'):
            count_subtree = [t for t in el.subtrees() if t.node == 'count']
            group_subtree = [t for t in el.subtrees() if t.node == 'group']
            phase_subtree = [t for t in el.subtrees() if t.node == 'phase']
            if count_subtree:
                if len(count_subtree[0]) > 1:
                    treedic[i + ' factors'].append(
                        int(count_subtree[0][0][0]) /
                        int(count_subtree[0][2][0]))
                else:
                    treedic[i + ' factors'].append(int(count_subtree[0][0][0]))
            else:
                treedic[i + ' factors'].append(1.0)
            if phase_subtree:
                treedic[i + ' phases'].append(phase_subtree[0][0])
            else:
                treedic[i + ' phases'].append(' ')
            treedic[i + ' cleaned_mm_list'].append(
                Tree('multimolecule', [Tree('molecule', group_subtree)]))

    # order of factors and phases must mirror the order of multimolecules,
    # use 'decorate, sort, undecorate' pattern
    treedic['1 cleaned_mm_list'], treedic['1 factors'], treedic['1 phases'] = zip(
        *sorted(zip(treedic['1 cleaned_mm_list'], treedic['1 factors'], treedic['1 phases'])))

    treedic['2 cleaned_mm_list'], treedic['2 factors'], treedic['2 phases'] = zip(
        *sorted(zip(treedic['2 cleaned_mm_list'], treedic['2 factors'], treedic['2 phases'])))

    # check if expressions are correct without factors
    if not _check_equality(treedic['1 cleaned_mm_list'], treedic['2 cleaned_mm_list']):
        return False

    # phases are ruled by ingore_state flag
    if not ignore_state:  # phases matters
        if treedic['1 phases'] != treedic['2 phases']:
            return False

    if any(
        map(lambda x, y: x / y - treedic[
            '1 factors'][0] / treedic['2 factors'][0],
            treedic['1 factors'], treedic['2 factors'])):
        # factors are not proportional
        return False
    else:
        # return ratio
        return Fraction(treedic['1 factors'][0] / treedic['2 factors'][0])

Beispiel #58

0

Datei anzeigen

Datei: named_entity.py Projekt: tchangw/nltk

def load_ace_file(textfile, fmt):
    print(f"  - {os.path.split(textfile)[1]}")
    annfile = textfile + ".tmx.rdc.xml"

    # Read the xml file, and get a list of entities
    entities = []
    with open(annfile) as infile:
        xml = ET.parse(infile).getroot()
    for entity in xml.findall("document/entity"):
        typ = entity.find("entity_type").text
        for mention in entity.findall("entity_mention"):
            if mention.get("TYPE") != "NAME":
                continue  # only NEs
            s = int(mention.find("head/charseq/start").text)
            e = int(mention.find("head/charseq/end").text) + 1
            entities.append((s, e, typ))

    # Read the text file, and mark the entities.
    with open(textfile) as infile:
        text = infile.read()

    # Strip XML tags, since they don't count towards the indices
    text = re.sub("<(?!/?TEXT)[^>]+>", "", text)

    # Blank out anything before/after <TEXT>
    def subfunc(m):
        return " " * (m.end() - m.start() - 6)

    text = re.sub(r"[\s\S]*<TEXT>", subfunc, text)
    text = re.sub(r"</TEXT>[\s\S]*", "", text)

    # Simplify quotes
    text = re.sub("``", ' "', text)
    text = re.sub("''", '" ', text)

    entity_types = {typ for (s, e, typ) in entities}

    # Binary distinction (NE or not NE)
    if fmt == "binary":
        i = 0
        toks = Tree("S", [])
        for (s, e, typ) in sorted(entities):
            if s < i:
                s = i  # Overlapping!  Deal with this better?
            if e <= s:
                continue
            toks.extend(word_tokenize(text[i:s]))
            toks.append(Tree("NE", text[s:e].split()))
            i = e
        toks.extend(word_tokenize(text[i:]))
        yield toks

    # Multiclass distinction (NE type)
    elif fmt == "multiclass":
        i = 0
        toks = Tree("S", [])
        for (s, e, typ) in sorted(entities):
            if s < i:
                s = i  # Overlapping!  Deal with this better?
            if e <= s:
                continue
            toks.extend(word_tokenize(text[i:s]))
            toks.append(Tree(typ, text[s:e].split()))
            i = e
        toks.extend(word_tokenize(text[i:]))
        yield toks

    else:
        raise ValueError("bad fmt value")

Beispiel #59

0

Datei anzeigen

Datei: corenlp.py Projekt: westonsteimel/nltk

 def make_tree(self, result):
     return Tree.fromstring(result["parse"])

Beispiel #60

0

Datei anzeigen

Datei: test_corpora.py Projekt: VinodhSubramanian1193/NLP

    def test_parsed_sents(self):

        parsed_sents = conll2007.parsed_sents('esp.train')[0]

        self.assertEqual(
            parsed_sents.tree(),
            Tree('fortaleció', [
                Tree('aumento', [
                    'El',
                    Tree('del', [
                        Tree('índice', [
                            Tree('de', [Tree('desempleo', ['estadounidense'])])
                        ])
                    ])
                ]), 'hoy', 'considerablemente',
                Tree('al', [
                    Tree('euro', [
                        Tree('cotizaba', [
                            ',', 'que',
                            Tree('a', [Tree('15.35', ['las', 'GMT'])]), 'se',
                            Tree('en', [
                                Tree('mercado', [
                                    'el',
                                    Tree('de', ['divisas']),
                                    Tree('de', ['Fráncfort'])
                                ])
                            ]),
                            Tree('a', ['0,9452_dólares']),
                            Tree('frente_a', [
                                ',',
                                Tree('0,9349_dólares', [
                                    'los',
                                    Tree('de', [Tree('mañana', ['esta'])])
                                ])
                            ])
                        ])
                    ])
                ]), '.'
            ]))