Ejemplo n.º 1
0
 def __unicode__(self):
     ret = '<TABLE BORDER="0" CELLSPACING="0"><TR><TD>'
     filtered_spans = []
     for feat,_ in PRINT_FEATURES:
         if (feat in self.features) and (isinstance(self.features[feat], dict)) and ("Span" in self.features[feat]):
             filtered_spans.extend(self.features[feat]["Span"])
     
     
     if 'Lemma' in self.features: # and len(self.text)==1:
         self.str = [Word(index = self.text[0].index, word=self.features['Lemma'])]
     else:
         ls = self.text
         if self.orderText:
             ls = sorted(self.text,key=lambda word:word.index)
         # self.str stores the words as displayed in the node
         self.str = [w for w in ls if w.index not in filtered_spans] 
     
     self.str = strip_punctuations(self.str)
         
     ret += "  ".join([unicode(x) for x in self.str])
     
     ret += "</TD></TR>"
     for feat, printFunc in PRINT_FEATURES:
         if feat in self.features:
             if self.isPredicate and feat =="Definite":
                 continue
             if self.features.get("Determiner",{"Value":""})["Value"] == "no":
                 del(self.features["Determiner"])
                 continue
             ret += "<TR><TD>" 
             ret+= '<FONT POINT-SIZE="10">' + format(cgi.escape(printFunc(self.features[feat]))) + '</FONT>'
             ret+="</TD></TR>"
         
     ret +="</TABLE>" 
     return ret
def create_dep_graphs_from_conll(sentences_conll):
    graphs = []

    coreference_node = None

    for sentence_conll in sentences_conll:
        curGraph = GraphWrapper("", "")
        nodesMap = {}
        coreference_map = {}
        # nodes
        for i, cols in enumerate(sentence_conll):
            if cols[8] != '_':
                id = int(cols[0])
                word_form = cols[1]
                coreference = check_for_coreference(cols)
                if coreference:
                    get_uids_from_coreference(coreference_map, sentence_conll,
                                              i)
                if not id in nodesMap:
                    nodesMap[id] = Node(text=[Word(index=id, word=word_form)],
                                        isPredicate=False,
                                        coreference=coreference_map.get(
                                            id, None),
                                        features={},
                                        gr=curGraph,
                                        orderText=True)

        nodesMap[0] = Node(text=[Word(index=0, word='ROOT')],
                           isPredicate=False,
                           coreference=coreference,
                           features={},
                           gr=curGraph,
                           orderText=True)
        # edges
        for cols in sentence_conll:
            if cols[8] != '_':
                rel = encode_german_characters(cols[10])
                if int(cols[8]) in nodesMap and int(cols[0]) in nodesMap:
                    headNode = nodesMap[int(cols[8])]
                    depNode = nodesMap[int(cols[0])]
                    if curGraph.has_edge((headNode, depNode)):  # stanford bug
                        curGraph.del_edge((headNode, depNode))
                    curGraph.add_edge(edge=(headNode, depNode), label=rel)

        graphs.append((curGraph, nodesMap))
    return graphs
Ejemplo n.º 3
0
def getPossesive(gr,index):
    ret = Node(text=[Word(index=index,word=POSSESSIVE)],
               isPredicate=True,
               features={},
               gr=gr,
               orderText = True)
    ret.features["implicit"] = True
    ret.original_text=[]
    return ret
Ejemplo n.º 4
0
def missing_children(treeNode, graphNode):
    neighbors = graphNode.neighbors()
    ret = [
        Word(index=c.id, word=c.word) for c in treeNode.children
        if (c.parent_relation not in neighbors) or (
            c.id != neighbors[c.parent_relation][0].text[0].index) or (
                c.parent_relation in ignore_labels)
    ]
    return []
Ejemplo n.º 5
0
def getCopular(gr,index,features):
    if "Lemma" in features:
        del(features["Lemma"])
    ret = Node(text=[Word(index=index,word=COPULA)],
               isPredicate=True,
               features=features,
               gr=gr,
               orderText = True)
    ret.features["implicit"] = True
    ret.original_text=[]
    return ret
Ejemplo n.º 6
0
def treeNode_to_graphNode(treeNode, gr):
    """
    @type treeNode DepTree
    """

    feats = get_verbal_features(treeNode)
    ret = newNode.Node(text=[Word(index=treeNode.id, word=treeNode.word)],
                       isPredicate=treeNode.is_verbal_predicate(),
                       features=feats,
                       gr=gr)
    ret.features["pos"] = treeNode.pos
    ret.original_text = copy(ret.text)
    return ret
Ejemplo n.º 7
0
def subgraph_to_string(graph, node, exclude=[]):
    nodes = [node]
    change = True
    while change:
        change = False
        for curNode in nodes:
            for curNeigbour in graph.neighbors(curNode):
                if (curNeigbour in nodes) or (curNeigbour in exclude): continue
                nodes.append(curNeigbour)
                change = True
    #print [[w.word for w in x.text] for x in nodes]

#     minInd = min([n.minIndex() for n in nodes])-1
#     maxInd = max([n.maxIndex() for n in nodes])-1
#     ret = " ".join(graph.originalSentence.split(" ")[minInd:maxInd+1])
#     nodes = [n for n in minimal_spanning_tree(graph, node) if not n in exclude]
#     ret = " ".join(node.get_original_text() for node in sorted(nodes,key = lambda n: n.minIndex()))
    try:
        ret = ""
        words = []
        for n in nodes:
            words += n.surface_form
            # add collapsed prepositions
            for parent_rel in n.incidents():
                # but just nested ones
                if parent_rel.startswith(
                        "prep_") and n.incidents()[parent_rel][0] in nodes:
                    idx = get_min_max_span(graph, n)[0] - 1
                    w = Word(idx, parent_rel.replace("prep_", ""))
                    words += [w]
        words = list(set(words))
        ret = " ".join([
            w.word
            for w in strip_punctuations(sorted(words, key=lambda w: w.index))
        ]) + " "
    except:
        raise Exception()


#

    return ret