Ejemplo n.º 1
0
 def __init__(self,t,locationAnnotator):
     """
     initialize a graph class, followed by converting a tree
 
     @type  t: Tree
     @param tree: syntactic tree to be converted
     
     @type  id: int
     @param id: a unique id for current Tree
     
     @type gr: digraph
     @var  gr: the graph representing t
     """
     if not t.id:      # meaning this is the ROOT element
         self.tree = t.children[0]
     else:
         self.tree = t    
     self.gr = GraphWrapper(t.get_original_sentence())
     
     self.locationAnnotator = locationAnnotator
     
     # maintain an appendix for easier browsing
     self.types = appendix_types()
     
     self.parse(self.tree)
Ejemplo n.º 2
0
def parseSentences(sent, HOME_DIR = BASE_PATH):
    orig_Stdin = sys.stdin
    strIn = StringIO(sent)
    sys.stdin = strIn
    strOut = StringIO()
    parseInput(parser, opts, outputFile=strOut)
    sys.stdin = orig_Stdin
    # Now we can retrieve the output as a string:
    result = strOut.getvalue()
    #print >>sys.stderr, result
    #print result    
    tmp_fn = "./tmp.tmp"
    fout = open(tmp_fn,'w')
    fout.write(result)
    fout.close()
    graphs = read_dep_graphs_file(tmp_fn,False,HOME_DIR)
    ret = []
    for graph in graphs:        
        g = convert(graph)
        ret.append((g,g.tree_str))

    if not graphs:#Berkley bug?
        ret.append((GraphWrapper("",HOME_DIR),""))

    strIn.close()
    strOut.close()
    return ret
Ejemplo n.º 3
0
def create_dep_graphs_from_stream(stream, HOME_DIR):
    graphs = []
    init = True
    curGraph = GraphWrapper("", HOME_DIR)
    nodesMap = {}
    for line in stream:

        line = line.strip()
        #         print line
        if line:
            init = False
            m = pat.match(line)
            rel, head, head_id, dep, dep_id = m.groups()
            #             head_id = int(head_id)
            #             dep_id = int(dep_id)
            if head_id not in nodesMap:
                nodesMap[head_id] = Node(
                    text=[Word(index=int(head_id.split("'")[0]), word=head)],
                    isPredicate=False,
                    features={},
                    gr=curGraph,
                    orderText=True)
            if dep_id not in nodesMap:
                nodesMap[dep_id] = Node(
                    text=[Word(index=int(dep_id.split("'")[0]), word=dep)],
                    isPredicate=False,
                    features={},
                    gr=curGraph,
                    orderText=True)
            headNode = nodesMap[head_id]
            depNode = nodesMap[dep_id]
            if curGraph.has_edge((headNode, depNode)):  # stanford bug
                curGraph.del_edge((headNode, depNode))
            curGraph.add_edge(edge=(nodesMap[head_id], nodesMap[dep_id]),
                              label=rel)
        if (not line) and (not init):
            init = True
            graphs.append((curGraph, nodesMap))
            curGraph = GraphWrapper("", HOME_DIR)
            nodesMap = {}
    return graphs
Ejemplo n.º 4
0
def load_prop_from_file(filename, HOME_DIR):
    fin = open(filename)
    flag = True
    ret = []
    for line in fin:
        line = line.strip("\n")
        if flag:
            curSentence = line
            flag = False
            curGraph = GraphWrapper(curSentence, HOME_DIR)
            parentsList = []
        else:
            if line:
                uid, words, pos, isPredicate, isAsserted, parents = line.split(
                    "\t")
                uid = int(uid)
                isAsserted = bool(int(isAsserted))
                text = [
                    Word(int(index), word) for index, word in
                    [ent.split(",") for ent in words.split(";")]
                ]
                if isAsserted:
                    feats = {"top": isAsserted}
                else:
                    feats = {}
                if parents:
                    parentsList.extend([
                        ((int(index), uid), rel) for rel, index in
                        [ent.split(",") for ent in parents.split(";")]
                    ])

                curNode = newNode.Node(text,
                                       bool(int(isPredicate)),
                                       feats,
                                       curGraph,
                                       uid=uid)

            else:
                for edge, rel in parentsList:
                    digraph.add_edge(curGraph, edge=edge, label=rel)
                ret.append(curGraph)
                flag = True

    fin.close()
    return ret
Ejemplo n.º 5
0
def tree_to_graph(tree):
    """
    @type tree DepTree
    """

    HOME_DIR = os.environ.get("PROPEXTRACTION_HOME_DIR") + "\\"

    ret = GraphWrapper(tree[0].original_sentence, HOME_DIR)
    graphNodes = {}
    for t in tree.values():
        if t.id:
            if t.parent_relation != "erased":

                graphNodes[t.id] = treeNode_to_graphNode(t, ret)
    for t in tree.values():
        if t.id:
            curParent = t.parent.id
            if curParent:
                ret.add_edge(edge=(graphNodes[curParent], graphNodes[t.id]),
                             label=t.parent_relation)

    return ret
Ejemplo n.º 6
0
def parseSentences(sent, HOME_DIR=BASE_PATH, stanford_json_sent=None):

    if stanford_json_sent:
        # Use Stanford json notation
        result = sent['parse'].replace("\n", "") + "\n"
    else:
        # Use default berkeley parser
        from BerkeleyInterface import parseInput
        orig_Stdin = sys.stdin
        strIn = StringIO(sent)
        sys.stdin = strIn
        strOut = StringIO()
        parseInput(parser, opts, outputFile=strOut)
        sys.stdin = orig_Stdin
        result = strOut.getvalue()

    tmp_fn = "./tmp.parse"

    fout = open(tmp_fn, 'w')
    fout.write(result)
    fout.close()
    graphs = read_dep_graphs_file(tmp_fn,
                                  False,
                                  HOME_DIR,
                                  stanford_json_sent = sent \
                                  if stanford_json_sent \
                                  else False)
    ret = []
    for graph in graphs:
        g = convert(graph)
        ret.append((g, g.tree_str))

    if not graphs:  #Berkley bug?
        ret.append((GraphWrapper("", HOME_DIR), ""))

    if (not stanford_json_sent):
        strIn.close()
        strOut.close()
    return ret