def read_depparse_conll(sentence, sentnum=-1): """ Builds a DepParse out of list of lines : each line is in conll format, the whole lines form a sentence @precondition: the sentence argument contains exactly the lines corresponding to one depgraph in conll format """ depgraph = DepGraph() for line in sentence: cols = line.split('\t') dep_lidx = int(cols[0]) -1 gov_lidx = int(cols[6]) -1 dep_form = cols[1] dep_lemma = cols[2] dep_label = cols[7] coarsecat = cols[3] cat = cols[4] if dep_lemma == '_': dep_lemma = None # add governor, though form unknown yet if gov_lidx <> DUMMY_ROOT_LIDX and gov_lidx not in depgraph.lexnodes: governor = LexicalNode('', gov_lidx) depgraph.add_lexnode(governor) if dep_lidx not in depgraph.lexnodes and dep_lidx <> DUMMY_ROOT_LIDX: dependent = LexicalNode(dep_form, dep_lidx, cat=cat, features={'coarsecat':coarsecat, 'lemma':dep_lemma}) depgraph.add_lexnode(dependent, gov_lidx, dep_label) else: # update features d = depgraph.get_lexnode(dep_lidx) d.set_feature('form',dep_form) d.set_feature('coarsecat',coarsecat) d.set_feature('cat',cat) d.set_feature('lemma',dep_lemma) depgraph.add_dep_from_lidx(gov_lidx, dep_lidx, dep_label) return DepParse(sentid=sentnum, depgraph=depgraph)
def genGraph(depends): li = depends['pom'] relations = depends['relation'] G = DepGraph.DepGraph() mMap = {} for pom in li: data = pom.split("__fdse__") tpom = trimPathUtil.trimPath(data[0]) tpom = removePathPrefix(tpom) # if tpom.startswith('D:/Workspace/WyRepos/pom/'): # tpom = tpom.replace("D:/Workspace/WyRepos/pom/","") mMap[tpom] = data[1] for k in mMap: G.addNode(k, mMap[k]) for relation in relations: a = trimPathUtil.trimPath(relation['A']) parent = trimPathUtil.trimPath(relation['parent']) # if a.startswith('D:/Workspace/WyRepos/pom/'): # a = a.replace('D:/Workspace/WyRepos/pom/','') a = removePathPrefix(a) # if parent.startswith('D:/Workspace/WyRepos/pom/'): # parent = parent.replace('D:/Workspace/WyRepos/pom/','') parent = removePathPrefix(parent) G.addEdge(a, parent) G.clustering() return G
def initialize(self, dep_graph, tokens_list): self.depend_graph = DepGraph.Graph() dep_length = len(dep_graph.edge) for index in range(0, dep_length): temp_node = DepGraph.Node(dep_graph.edge[index].dep, dep_graph.edge[index].source, dep_graph.edge[index].target, index) self.depend_graph.insert_nodes(temp_node) for node in self.depend_graph: if str(node.target) in self.depend_graph.nodes_by_source.keys(): node.previous = self.depend_graph.nodes_by_source[str( node.target)] if str(node.target) in self.depend_graph.nodes_comp_source.keys(): node.previous_compound.append( self.depend_graph.nodes_comp_source[str(node.target)]) """add entities and attributes""" # nlp_rules.depend_graph = self.depend_graph for node in self.depend_graph: new_graph = nlp_rules.Rules_Classifier( node.dep, tokens_list, node.source, node.target, node.previous_compound, node.previous, node=node, depend_graph=self.depend_graph) self.entities = nlp_rules.entities self.attributes = nlp_rules.attributes self.depend_graph = new_graph print(self.entities) print(self.attributes) relation_rules.depend_graph = self.depend_graph relation_rules.entities = self.entities """add relationships""" for node in self.depend_graph: relation_rules.Rules_Classifier(node.dep, tokens_list, node.source, node.target) self.relationship = relation_rules.relationship print(self.relationship)
def read_depparse_pivot(s): """ Builds a DepParse out of a string in pivot format @precondition: the string contains exactly one depgraph in pivot format CAUTION : intended to be fast (no re), but supposes nice coherent input ... """ s = s.strip() if s.startswith('sentence('): s = s[9:] else: sys.stderr.write('Pivot String should start with "sentence("') return None is_in_deps = False is_in_features = False depgraph = DepGraph() meta = {} while s <> '': s = s.strip() if s[0] == ')': s = s[1:] if is_in_features: is_in_features = False elif is_in_deps: is_in_deps = False elif s <> '': sys.stderr.write('Reading pivot string : unexpected additional material :'+s) return None continue l = s.find('(') if l == -1 : sys.stderr.write('Reading pivot string : unexpected format : cannot find any (:'+s) return None label = s[0:l] # backward compatibility : if label == 'id': label = 'sentid' s = s[l+1:] if label == 'surf_deps': is_in_deps = True continue if label == 'features': is_in_features = True continue # in any case, match following ')' r = s.find(')') # ) # meta information of the parse (id, validators ...) if not(is_in_features) and not(is_in_deps): meta[label] = s[0:r].strip() # read either a dependency, or a feature over lexical nodes else: c = s.find(',') # comma # TODO : what if unexpected format? (gov_form,gov_lidx) = s[0:c].split('~',1) gov_lidx = int(gov_lidx) if is_in_deps: (dep_form,dep_lidx) = s[c+1:r].split('~',1) dep_lidx = int(dep_lidx) # backward compatibility : to read older versions of pivot, with MISSINGHEAD~-1 nodes if gov_form == 'MISSINGHEAD': label = UNK_GOV_DEP_LABEL # backward compatibility : if label == 'head' and gov_lidx == DUMMY_ROOT_LIDX: label = DUMMY_DEP_LABEL # caution : connectedness not ensured here # add governor before adding dependent, and dependency if gov_lidx <> DUMMY_ROOT_LIDX and gov_lidx not in depgraph.lexnodes: governor = LexicalNode(gov_form, gov_lidx) depgraph.add_lexnode(governor) if dep_lidx not in depgraph.lexnodes and dep_lidx <> DUMMY_ROOT_LIDX: dependent = LexicalNode(dep_form, dep_lidx) depgraph.add_lexnode(dependent, gov_lidx, label) else: depgraph.add_dep_from_lidx(gov_lidx, dep_lidx, label) elif is_in_features: value = s[c+1:r] if label == 'pos': label = 'cat' dep = depgraph.get_lexnode(gov_lidx) if dep <> None: dep.set_feature(label, value) s = s[r+1:] if 'sentid' not in meta: return None sentid = meta['sentid'] del meta['sentid'] return DepParse(sentid = sentid, depgraph = depgraph, features = meta)
def __init__(self): self.depend_graph = DepGraph.Graph() self.attributes = set() self.entities = set() self.relationship = []