예제 #1
0
파일: depparser.py 프로젝트: Camille31/Swip
def read_depparse_conll(sentence, sentnum=-1):
    """ Builds a DepParse out of list of lines : each line is in conll format, the whole lines form a sentence
    @precondition: the sentence argument contains exactly the lines corresponding to one depgraph in conll format
    """
    depgraph = DepGraph()
    for line in sentence:
        cols = line.split('\t')
        dep_lidx = int(cols[0]) -1
        gov_lidx = int(cols[6]) -1
        dep_form = cols[1]
        dep_lemma = cols[2]
        dep_label = cols[7]
        coarsecat = cols[3]
        cat = cols[4]
        if dep_lemma == '_': dep_lemma = None
        # add governor, though form unknown yet
        if gov_lidx <> DUMMY_ROOT_LIDX and gov_lidx not in depgraph.lexnodes:
            governor = LexicalNode('', gov_lidx)
            depgraph.add_lexnode(governor)
        if dep_lidx not in depgraph.lexnodes and dep_lidx <> DUMMY_ROOT_LIDX:
            dependent = LexicalNode(dep_form, dep_lidx, cat=cat, features={'coarsecat':coarsecat, 'lemma':dep_lemma})
            depgraph.add_lexnode(dependent, gov_lidx, dep_label)
        else:
            # update features
            d = depgraph.get_lexnode(dep_lidx) 
            d.set_feature('form',dep_form)
            d.set_feature('coarsecat',coarsecat)
            d.set_feature('cat',cat)
            d.set_feature('lemma',dep_lemma)
            depgraph.add_dep_from_lidx(gov_lidx, dep_lidx, dep_label)

    return DepParse(sentid=sentnum, depgraph=depgraph)
예제 #2
0
def genGraph(depends):
    li = depends['pom']
    relations = depends['relation']
    G = DepGraph.DepGraph()
    mMap = {}
    for pom in li:
        data = pom.split("__fdse__")
        tpom = trimPathUtil.trimPath(data[0])
        tpom = removePathPrefix(tpom)
        # if tpom.startswith('D:/Workspace/WyRepos/pom/'):
        # tpom = tpom.replace("D:/Workspace/WyRepos/pom/","")
        mMap[tpom] = data[1]
    for k in mMap:
        G.addNode(k, mMap[k])
    for relation in relations:
        a = trimPathUtil.trimPath(relation['A'])
        parent = trimPathUtil.trimPath(relation['parent'])
        # if a.startswith('D:/Workspace/WyRepos/pom/'):
        # a = a.replace('D:/Workspace/WyRepos/pom/','')
        a = removePathPrefix(a)
        # if parent.startswith('D:/Workspace/WyRepos/pom/'):
        # parent = parent.replace('D:/Workspace/WyRepos/pom/','')
        parent = removePathPrefix(parent)
        G.addEdge(a, parent)
    G.clustering()
    return G
예제 #3
0
    def initialize(self, dep_graph, tokens_list):
        self.depend_graph = DepGraph.Graph()
        dep_length = len(dep_graph.edge)
        for index in range(0, dep_length):
            temp_node = DepGraph.Node(dep_graph.edge[index].dep,
                                      dep_graph.edge[index].source,
                                      dep_graph.edge[index].target, index)
            self.depend_graph.insert_nodes(temp_node)

        for node in self.depend_graph:
            if str(node.target) in self.depend_graph.nodes_by_source.keys():
                node.previous = self.depend_graph.nodes_by_source[str(
                    node.target)]
            if str(node.target) in self.depend_graph.nodes_comp_source.keys():
                node.previous_compound.append(
                    self.depend_graph.nodes_comp_source[str(node.target)])
        """add entities and attributes"""
        # nlp_rules.depend_graph = self.depend_graph
        for node in self.depend_graph:
            new_graph = nlp_rules.Rules_Classifier(
                node.dep,
                tokens_list,
                node.source,
                node.target,
                node.previous_compound,
                node.previous,
                node=node,
                depend_graph=self.depend_graph)

            self.entities = nlp_rules.entities
            self.attributes = nlp_rules.attributes
        self.depend_graph = new_graph

        print(self.entities)
        print(self.attributes)

        relation_rules.depend_graph = self.depend_graph
        relation_rules.entities = self.entities
        """add relationships"""
        for node in self.depend_graph:
            relation_rules.Rules_Classifier(node.dep, tokens_list, node.source,
                                            node.target)
            self.relationship = relation_rules.relationship
        print(self.relationship)
예제 #4
0
파일: depparser.py 프로젝트: Camille31/Swip
def read_depparse_pivot(s):
    """ Builds a DepParse out of a string in pivot format 
    @precondition: the string contains exactly one depgraph in pivot format
    CAUTION : intended to be fast (no re), but supposes nice coherent input ...
    """
    s = s.strip()
    if s.startswith('sentence('):
        s = s[9:]
    else:
        sys.stderr.write('Pivot String should start with "sentence("')
        return None
    is_in_deps = False
    is_in_features = False
    depgraph = DepGraph()
    meta = {}
    while s <> '':
        s = s.strip()
        if s[0] == ')':
            s = s[1:]
            if is_in_features: is_in_features = False
            elif is_in_deps: is_in_deps = False
            elif s <> '':
                sys.stderr.write('Reading pivot string : unexpected additional material :'+s)
                return None
            continue
        l = s.find('(')
        if l == -1 :
            sys.stderr.write('Reading pivot string : unexpected format : cannot find any (:'+s)
            return None
        label = s[0:l]
        # backward compatibility :
        if label == 'id': label = 'sentid'
        s = s[l+1:]
        if label == 'surf_deps':
            is_in_deps = True
            continue
        if label == 'features':
            is_in_features = True
            continue
        # in any case, match following ')'
        r = s.find(')') # )
        # meta information of the parse (id, validators ...)
        if not(is_in_features) and not(is_in_deps):
            meta[label] = s[0:r].strip()
        # read either a dependency, or a feature over lexical nodes
        else:
            c = s.find(',') # comma
            # TODO : what if unexpected format?
            (gov_form,gov_lidx) = s[0:c].split('~',1)
            gov_lidx = int(gov_lidx)
            if is_in_deps:
                (dep_form,dep_lidx) = s[c+1:r].split('~',1)
                dep_lidx = int(dep_lidx)
                # backward compatibility : to read older versions of pivot, with MISSINGHEAD~-1 nodes
                if gov_form == 'MISSINGHEAD':
                    label = UNK_GOV_DEP_LABEL
                # backward compatibility : 
                if label == 'head' and gov_lidx == DUMMY_ROOT_LIDX:
                    label = DUMMY_DEP_LABEL
                # caution : connectedness not ensured here
                # add governor before adding dependent, and dependency
                if gov_lidx <> DUMMY_ROOT_LIDX and gov_lidx not in depgraph.lexnodes:
                    governor = LexicalNode(gov_form, gov_lidx)
                    depgraph.add_lexnode(governor)
                if dep_lidx not in depgraph.lexnodes and dep_lidx <> DUMMY_ROOT_LIDX:
                    dependent = LexicalNode(dep_form, dep_lidx)
                    depgraph.add_lexnode(dependent, gov_lidx, label)
                else:
                    depgraph.add_dep_from_lidx(gov_lidx, dep_lidx, label)
            elif is_in_features:
                value = s[c+1:r]
                if label == 'pos': label = 'cat'
                dep = depgraph.get_lexnode(gov_lidx)
                if dep <> None:
                    dep.set_feature(label, value)
        s = s[r+1:]
    if 'sentid' not in meta:
        return None
    sentid = meta['sentid']
    del meta['sentid']
    return DepParse(sentid = sentid, depgraph = depgraph, features = meta)
예제 #5
0
 def __init__(self):
     self.depend_graph = DepGraph.Graph()
     self.attributes = set()
     self.entities = set()
     self.relationship = []