def inputsentence_analysis(inputsentence): post_data = "sentence=" + inputsentence url = urllib2.urlopen("http://barbar.cs.lth.se:8081/parse", data=post_data) returncode = url.getcode() content = url.read() #if returncode != 200: # print >> log,'NLP server error (problem processing)' #print content #print type(content) content = content.split('\n') sent = "" #result={} #SemPar = 0 #NoSbj = 0 #print "data is", repr(content) #print content for row in content: table = row.split('\t') #sent.append([table[0], table[1], table[2], table[4], table[5], table[6], table[8], table[10], table[12], table[13], "\n"]) sent += table[0] + "\t" + table[1] + "\t" + table[2] + "\t" + table[ 4] + "\t" + table[5] + "\t" + table[6] + "\t" + table[ 8] + "\t" + table[10] + "\t" + table[12] + "\t" + table[ 13] + "\n" #sent+=table[0]+"\t"+table[1]+"\n" print sent dg = DependencyGraph(sent) tree = dg.tree() print tree.pprint() #print(dg) print(dg.to_conll(4))
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line[0] in "*+": # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodes[i] node.update({"address": i, "rel": m.group(2), "word": []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: dg.nodes[dep_parent]["deps"].append(i) i += 1 elif line[0] != "#": # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = cells[0], " ".join(cells[1:]) dg.nodes[i - 1]["word"].append(morph) if self.morphs2str: for node in dg.nodes.values(): node["word"] = self.morphs2str(node["word"]) return dg.tree()
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line[0] in '*+': # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodes[i] node.update({'address': i, 'rel': m.group(2), 'word': []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: dg.nodes[dep_parent]['deps'].append(i) i += 1 elif line[0] != '#': # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = cells[0], ' '.join(cells[1:]) dg.nodes[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodes.values(): node['word'] = self.morphs2str(node['word']) return dg.tree()
def __init__(self, ldg=None): DependencyGraph.__init__(self) self.nodes = defaultdict(lambda: {'address': None, 'ldg': 0, 'gid': 1, #has the same value of the gid of nodes in ldg. 'lemma': None, 'head': None, 'deps': defaultdict(int), 'remaining_ops': defaultdict(list), #list(LgGraph.operator_dic.keys()), 'ctag': None, 'tag': None, 'feats': None, }) self.git_list = [1] self.nodes[0].update( {'address': 0, 'head': -1, 'ldg': 'TOP', 'gid': 1, #has the same value of the gid of nodes in ldg. 'remaining_ops': defaultdict(list), } ) if isinstance(ldg, LgGraph): self.nodes[0]['ldg'] = ldg if isinstance(ldg, GraphNet): self.nodes = ldg self.git_list = ldg.get_git_list()
def __init__(self, sent, top_relation_label): DependencyGraph.__init__(self, sent, top_relation_label=top_relation_label) self.words = [ node['word'] for i, node in sorted(self.nodes.items()) if node['tag'] != 'TOP' ]
def evald(prd_list, trg_file_path, directed=True): """Compute UAS score.""" with open(trg_file_path, 'r') as trg_file: trg_string = trg_file.read().strip() trg_string_list = trg_string.split('\n\n') try: trg_list = [ DependencyGraph(t, top_relation_label='root') for t in trg_string_list ] except ValueError: def extract_10_cells(cells, index): line_index, word, lemma, tag, _, head, rel, _, _, _ = cells try: index = int(line_index) except ValueError: # index can't be parsed as an integer, use default pass return index, word, lemma, tag, tag, '', head, rel trg_list = [ DependencyGraph(t, top_relation_label='root', cell_extractor=extract_10_cells) for t in trg_string_list ] correct = 0.0 total = 0.0 for prd, trg in zip(prd_list, trg_list): # assert len(prd.nodes) == len(trg.nodes) prd_deps = get_dep(prd, directed) trg_deps = get_dep(trg, directed) if len(prd_deps) != len(trg_deps): print(prd) print(prd_deps, len(prd_deps)) print(trg) print(trg_deps, len(trg_deps)) raise Exception for d in prd_deps: if d in trg_deps: correct += 1 total += len(prd_deps) acc = correct / total if directed: print('DDA: %.3f' % acc) else: print('UDA: %.3f' % acc) return acc
def read_conllx_triples( corpus_file ): #='/tmp/autocorpus3/autocorpus3/static/Chinese_train_pos.xml.utf8.Chinese_medicine.segmented.conllx'): triples = [] sents = concat([ DependencyCorpusView(corpus_file, tagged=False, group_by_sent=True, dependencies=True) ]) for sent in sents: dg = DependencyGraph(sent, top_relation_label='root') triples += dg.triples() return triples
def sent_to_dggraph(self, sent, rootrel='PRED'): """ Creates a Dependency Graph object from an AGLDT sentence Parameters ---------- sent : list(named tuple) the AGLDT sentence Returns ------- nltk.parse.DependencyGraph """ from nltk.parse import DependencyGraph strsent = "\n".join([ "{}\t{}\t{}\t{}".format(w.form, w.postag, w.head, w.relation) for w in sent ]) for w in sent: if w.head == '0': rootrel = w.relation break g = DependencyGraph(strsent, cell_separator="\t", top_relation_label=rootrel) return g
def test_oracle(): """Make sure that the oracle is able to build the correct arcs in order""" graph_data = """\ word_1 tag_1 0 ROOT word_2 tag_2 3 deprel_2 word_3 tag_3 5 deprel_3 word_4 tag_4 3 deprel_4 word_5 tag_5 1 deprel_5 """ graph = DependencyGraph(graph_data) pp = PartialParse(get_sentence_from_graph(graph)) transition_ids = [] while not pp.complete: transition_id, deprel = pp.get_oracle(graph) transition_ids.append(transition_id) pp.parse_step(transition_id, deprel) _test_arcs("oracle", pp, [(0, 1, 'ROOT'), (3, 2, 'deprel_2'), (5, 3, 'deprel_3'), (3, 4, 'deprel_4'), (1, 5, 'deprel_5')]) ex_tids = [ pp.shift_id, pp.shift_id, pp.shift_id, # 0 1 2 3 pp.left_arc_id, pp.shift_id, # 0 1 3 4 pp.right_arc_id, pp.shift_id, # 0 1 3 5 pp.left_arc_id, # 0 1 5 pp.right_arc_id, pp.right_arc_id, # 0 ] assert transition_ids == ex_tids, \ "oracle test resulted in transitions {}, expected {}".format( transition_ids, ex_tids) print('oracle test passed!')
def tagged_parse_sents(self, sentences, verbose=False): """ >>> parser.parse(['من', 'به', 'مدرسه', 'رفته بودم', '.']).tree().pprint() '(رفته_بودم من (به مدرسه) .)' """ input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll', dir=self.working_dir, delete=False) output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll', dir=self.working_dir, delete=False) try: for sentence in sentences: for i, (word, tag) in enumerate(sentence, start=1): word = word.strip() if not word: word = '_' input_file.write(('\t'.join([str(i), word.replace(' ', '_'), self.lemmatize(word, tag).replace(' ', '_'), tag, tag, '_', '0', 'ROOT', '_', '_', '\n'])).encode('utf8')) input_file.write('\n\n'.encode('utf8')) input_file.close() cmd = ['java', '-jar', self._malt_bin, '-w', self.working_dir, '-c', self.mco, '-i', input_file.name, '-o', output_file.name, '-m', 'parse'] if self._execute(cmd, verbose) != 0: raise Exception("MaltParser parsing failed: %s" % (' '.join(cmd))) return [DependencyGraph(item) for item in codecs.open(output_file.name, encoding='utf8').read().split('\n\n')] finally: input_file.close() os.remove(input_file.name) output_file.close() os.remove(output_file.name)
def to_dependendency_graph(self): """ Returns the sentence as an NLTK dependency graph. Useful if you want to use the DOT visualization of the dependency graph or `triples` that returns all the dependency triplets of a sentence as tuples: (form_head, dependency_label, form_dependent). Refer to the class documentation of nltk.parse.DependencyGraph for more details. """ d = { "labels(n)": "type", "n.form": "form", "n.pos": "pos", "n.head": "head", "n.original_label": "label" } c = self._query_sent(include_artificial=True, **d) s = "" try: top_lab = [ d["Rel"] for d in self.dependents if d["Rel"] != "AuxK" ][0] except IndexError: # default to pred top_lab = "PRED" for e in c: if e["type"][0] == "Artificial": form = "*" + e["form"] else: form = e["form"] s = s + "{}\t{}\t{}\t{}\n".format(form, e["pos"], e["head"], e["label"]) return DependencyGraph(s, top_relation_label=top_lab)
def test_oracle2(): """Make sure that the oracle is able to build the correct arcs in order""" graph_data = """\ Nadia PROPN 2 nsubj rode VERB 0 ROOT the DET 5 det old ADJ 5 amod donkey NOUN 2 dobj with ADP 2 prep dexterity NOUN 6 pobj """ graph = DependencyGraph(graph_data) pp = PartialParse(get_sentence_from_graph(graph)) transition_ids = [] while not pp.complete: transition_id, deprel = pp.get_oracle(graph) transition_ids.append(transition_id) pp.parse_step(transition_id, deprel) _test_arcs("oracle", pp, [(2, 1, 'nsubj'), (0, 2, 'ROOT'), (5, 3, 'det'), (5, 4, 'amod'), (2, 5, 'dobj'), (2, 6, 'prep'), (6, 7, 'pobj')]) ex_tids = [ pp.shift_id, pp.shift_id, pp.left_arc_id, pp.shift_id, pp.shift_id, pp.shift_id, pp.left_arc_id, pp.left_arc_id, pp.right_arc_id, pp.shift_id, pp.shift_id, pp.right_arc_id, pp.right_arc_id, pp.right_arc_id ] assert transition_ids == ex_tids, \ "oracle2 test resulted in transitions {}, expected {}".format( transition_ids, ex_tids) print('oracle2 test passed!')
def read_dep_parses(inputfile, depfile, filename=' ', make_graph=False): fh = unzip_corpus(inputfile, depfile, True).splitlines() # list to store the results graphs = [] # Read the lines containing the first parse. dep, qID = read_dep(fh, filename) # print(qID) # While there are more lines: # 1) create the DependencyGraph # 2) add it to our list # 3) try again until we're done while dep is not None: #creates dependency graph if (make_graph): graph = DependencyGraph(dep) else: graph = dep graphs.append((qID, graph)) #removes lines that were present in dep n = len(dep.splitlines() + [qID]) + 1 fh = fh[n:] dep, qID = read_dep(fh, filename) #returns a tuple (questionID, DependencyGraph) return graphs
def parse(sent): con_parse, = con_parser.raw_parse(sent) dep_parse, = dep_parser.raw_parse(sent) print() print("Constituency Tree:") con_parse.pretty_print() dg = DependencyGraph(dep_parse.to_conll(4)) print() print("Dependency Tree:") dg.tree().pprint() print() print("Dependencies:") for governor, dependency, dependent in dg.triples(): print(governor, dependency, dependent)
def flattened_node_list(graph): """ Takes an instance of DependencyGraph corresponding to a parsed sentence. Flattens into a list of DependencyGraph instances, each with a different word from the sentence as its root node (and no children). """ nodelist = copy.copy(graph.nodelist[1:]) flattened = [] for node in nodelist: node["deps"] = [] node["head"] = 0 node["address"] = 1 new_graph = DependencyGraph() new_graph.nodelist.append(node) new_graph.root = node flattened.append(new_graph) return flattened
def parsed_sents(self, fileids=None): sents = concat( [ DependencyCorpusView(fileid, False, True, True, encoding=enc) for fileid, enc in self.abspaths(fileids, include_encoding=True) ] ) return [DependencyGraph(sent) for sent in sents]
def prepare_deps(raw_deps): if type(raw_deps) == float and math.isnan(raw_deps): return [] return [ DependencyGraph(dep, top_relation_label="root") for dep in raw_deps.split("\n\n") if len(dep) > 2 ]
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*") or line.startswith("+"): # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodelist[i] node['address'] = i node['rel'] = m.group(2) # dep_type node['word'] = [] dep_parent = int(m.group(1)) while len(dg.nodelist) < i + 1 or len( dg.nodelist) < dep_parent + 1: dg.nodelist.append({'word': [], 'deps': []}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("#"): # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = (cells[0], ' '.join(cells[1:])) dg.nodelist[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodelist: node['word'] = self.morphs2str(node['word']) return dg.tree()
def __init__(self, tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label='root'): DependencyGraph.__init__(self,tree_str, cell_extractor, zero_based, cell_separator, top_relation_label) self.nodes = defaultdict(lambda: {'address': None, 'word': None, 'lemma': None, 'ctag': None, # upostag 'tag': None, # xpostag 'feats': None, 'head': None, 'deps': defaultdict(list), 'rel': None, }) self.nodes[0].update( { 'ctag': 'TOP', 'tag': 'TOP', 'ID': 0, } )
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*") or line.startswith("+"): # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodelist[i] node['address'] = i node['rel'] = m.group(2) # dep_type node['word'] = [] dep_parent = int(m.group(1)) while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1: dg.nodelist.append({'word':[], 'deps':[]}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("#"): # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = ( cells[0], ' '.join(cells[1:]) ) dg.nodelist[i-1]['word'].append(morph) if self.morphs2str: for node in dg.nodelist: node['word'] = self.morphs2str(node['word']) return dg.tree()
def __init__(self, tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label='root'): DependencyGraph.__init__(self,tree_str, cell_extractor, zero_based, cell_separator, top_relation_label) self.nodes = defaultdict(lambda: {'address': None, 'word': None, 'lemma': None, 'ctag': None, # upostag 'tag': None, # xpostag 'feats': None, 'head': None, 'deps': defaultdict(list), 'rel': None, }) self.nodes[0].update( { 'ctag': 'TOP', 'tag': 'TOP', 'address': 0, } )
def Process(sentence): words = sentence.replace('|', '।').split() tags = test_fn(words) text = [] i = 0 for word, tag in zip(words, tags): i += 1 fill = '_' text.append('\t'.join( [str(i), word, fill, fill, fill, fill, fill, fill, fill, fill])) dg = DependencyGraph('\n'.join(text)) text = '\n'.join(text) text = text + '\n\n' + text with open('biaffine-parser-master/data/naive3.conllx', 'w') as f: f.write(text) os.chdir('biaffine-parser-master') os.system( 'python run.py predict --feat=bert --fdata=data/naive3.conllx --fpred=data/naive3.conllx' ) txt = '' os.chdir('..') with open('biaffine-parser-master/data/naive3.conllx', 'r') as f: txt = f.read().split('\n\n')[0] # parser = TransitionParser('arc-eager') # with open('models/parser.pkl','rb') as in_file: # parser = pickle.load(in_file) # predictions = parser.parse([dg],'models/arc_eager.model') # txt = predictions[0].to_conll(4) err = False try: out = DependencyGraph(txt) out_dot = out.to_dot() G = pgv.AGraph(out_dot) G.layout(prog='dot') # use dot G.draw('static/process.png') except: err = True txt += '''Error generating graph.\n''' return txt, err
def par(self, infilm, outfilm): input_data = open(infilm, 'r', encoding='utf-8') output_data = open(outfilm, 'w+', encoding='utf=8') for line in input_data.readlines(): line = line.strip() # 分词 words = self.segmentor.segment(line) # self.segmentor.load_with_lexicon('lexicon') # 使用自定义词典,lexicon外部词典文件路径 print('分词:' + '\t'.join(words)) # 词性标注 postags = self.postagger.postag(words) print('词性标注:' + '\t'.join(postags)) # 句法分析 arcs = self.parser.parse(words, postags) rely_id = [arc.head for arc in arcs] # 提取依存父节点id relation = [arc.relation for arc in arcs] # 提取依存关系 heads = ['Root' if id == 0 else words[id - 1] for id in rely_id] # 匹配依存父节点词语 output_data.write(line) output_data.write('\n') output_data.write('句法分析:') par_result = '' for i in range(len(words)): if arcs[i].head == 0: arcs[i].relation = "ROOT" par_result += "\t" + words[i] + "(" + arcs[ i].relation + ")" + "\t" + postags[i] + "\t" + str( arcs[i].head) + "\t" + arcs[i].relation + "\n" output_data.write(relation[i] + '(' + words[i] + ', ' + heads[i] + ')' + '\n') print(par_result) conlltree = DependencyGraph(par_result) # 转换为依存句法图 tree = conlltree.tree() # 构建树结构 tree.draw() # 显示输出的树 output_data.write('\n') input_data.close() output_data.close()
def trees(self): for sentence in self._sentences(): tree = DependencyGraph(sentence) for node in word_nodes(tree): node['mtag'] = [node['ctag'], node['tag']] if 'ezafe' in node['feats']: node['mtag'].append('EZ') node['mtag'] = self._pos_map(node['mtag']) yield tree
def parsed_sents_3cells(self, tagset=None, fileids=None): sentences = [] if tagset == 'opt': sentences = self.extract(opt=True, head=True, fileids=fileids) else: sentences = self.extract(tags=True, head=True, fileids=fileids) restruct = [] for j in sentences: str = "" for i in j: str += '{0}\t{1}\t{2}\n'.format(i[0], i[1], i[2]) restruct.append(str) return [DependencyGraph(s) for s in restruct]
def _to_dep_graph(self, graph): cur = self._get_nodes(graph) s = "" top_rels = [] for c in cur: form = c["n"]["form"] pos = c["n"]["pos"].title() if c["n"]["pos"] else "X" head = c["n"]["head"] lab = c["n"]["original_label"] s += "{} {} {} {}\n".format(form, pos, head, lab) if head == 0: top_rels.append(lab) return DependencyGraph(s, top_relation_label=top_rels[0])
def clean_corpus(path, parser_std): arquivo = open(path) a = arquivo.read() new_training_data = open(path + "1", "w") graphs = [DependencyGraph(entry) for entry in a.split('\n\n') if entry] for depgraph in graphs: if parser_std._is_projective(depgraph): new_training_data.write(depgraph.to_conll(style=10) + "\n") os.remove(arquivo.name) new_training_data.close() os.rename(new_training_data.name, (new_training_data.name).replace("1", "")) new_training_data.close()
def cabocha2depgraph(t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*"): # start of bunsetsu cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[2]) node = dg.nodelist[i] node.update( {'address': i, 'rel': m.group(2), # dep_type 'word': [], 'tag': [] }) dep_parent = int(m.group(1)) while len(dg.nodelist) < i + 1 or len(dg.nodelist) < dep_parent + 1: dg.nodelist.append({'word': [], 'deps': [], 'tag': []}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("EOS"): # normal morph cells = line.strip().split("\t") morph = (cells[0], tuple(cells[1].split(','))) dg.nodelist[i - 1]['word'].append(morph[0]) dg.nodelist[i - 1]['tag'].append(morph[1]) return dg
def _parse(self, input): lines = [DependencyGraph._normalize(line) for line in input.split('\n') if line.strip()] temp = [] for index, line in enumerate(lines): cells = line.split('\t') _, word, _, tag, _, _, head, rel, _, _ = cells head = int(head) self.nodelist.append({'address': index+1, 'word': word, 'tag': tag, 'head': head, 'rel': rel, 'deps': [d for (d,h) in temp if h == index+1]}) try: self.nodelist[head]['deps'].append(index+1) except IndexError: temp.append((index+1, head)) root_address = self.nodelist[0]['deps'][0] self.root = self.nodelist[root_address]
def trees(self): for sentence in self._sentences(): tree = DependencyGraph(sentence) for node in tree.nodelist[1:]: node['mtag'] = [node['ctag'], node['tag']] for node in tree.nodelist[1:]: if node['rel'] in ('MOZ', 'NPOSTMOD'): tree.nodelist[node['head']]['mtag'].append('EZ') for node in tree.nodelist[1:]: node['mtag'] = self._pos_map(node['mtag']) yield tree
def read_dep_parses(depfile): fh = open(depfile, 'r') # list to store the results graphs = [] # Read the lines containing the first parse. dep = read_dep(fh) # While there are more lines: # 1) create the DependencyGraph # 2) add it to our list # 3) try again until we're done while dep is not None: graph = DependencyGraph(dep) graphs.append(graph) dep = read_dep(fh) fh.close() return graphs
def test_minimal_connected_graph(self): lst = [( "1 Nero _ NNP NNP _ 2 nsubj _ _ *2 played _ VBD VBD _ 0 root _ _ *3 his _ PRP$ PRP$ _ 4 nmod:poss _ _ *4 flute _ NN NN _ 2 dobj _ _ *5 while _ IN IN _ 7 mark _ _ *6 Rome _ NNP NNP _ 7 nsubj _ _ *7 burned _ VBD VBD _ 2 advcl _ _ *", ["while"], [2], [7], [5, 7, 2]), ("1 Weder _ NNP NNP _ 6 compound _ _*\ 2 war _ NN NN _ 6 compound _ _*\ 3 der _ NN NN _ 6 compound _ _*\ 4 Stern _ NNP NNP _ 6 compound _ _*\ 5 von _ NNP NNP _ 6 compound _ _*\ 6 Bethlehem _ NNP NNP _ 7 nsubj _ _*\ 7 eine _ VBP VBP _ 0 root _ _*\ 8 Supernova _ NNP NNP _ 12 compound _ _*\ 10 noch _ NNP NNP _ 12 appos _ _*\ 11 ein _ NNPS NNPS _ 12 compound _ _*\ 12 Komet _ NNP NNP _ 7 dobj _ _*", ["Weder", "noch"], [7], [6, 12], [1, 6, 10, 12, 7])] for record in lst: cnll = record[0] cnll = cnll.replace('*', '\n') g = DependencyGraph(cnll) root = snt2ldg.get_root_address_from_nltkg(g) print('root', root) assert root == record[2] newg = deepcopy(g) addressLst = [] rootAddress = snt2ldg.get_root_address_from_nltkg(newg) wordAddress = [] for word in record[1]: wordAddress += snt2ldg.get_address_of_word(newg, word) for address in wordAddress: addressBetween = snt2ldg.get_addresses_from_child_to_ancestor( newg, address, rootAddress[0]) addressLst += [address] + addressBetween addressLst += rootAddress print(addressLst) assert addressLst == record[4] for i in list(newg.nodes.keys()): if i not in addressLst: newg.remove_by_address(i) pprint(newg.nodes) assert len(newg.nodes) == len(record[4])
def DepGraphList(sentenceList): dgList = [] i = 0 j = 0 for sentence in sentenceList: text = [] for token in sentence: text.append(' '.join([ token['form'], token['upostag'], str(token['head']), token['deprel'].upper() ])) try: dg = DependencyGraph('\n'.join(text)) except: j += 1 continue i += 1 dgList.append(dg) print(i, j) return dgList
def read_dep_parses_questions(depfile): fh = open(depfile, 'r') # list to store the results graphs = {} # Read the lines containing the first parse. res = read_dep_questions(fh) dep = res[1] qid = res[0] # While there are more lines: # 1) create the DependencyGraph # 2) add it to our list # 3) try again until we're done while dep is not None: graph = DependencyGraph(dep) graphs[qid] = graph res = read_dep_questions(fh) qid = res[0] dep = res[1] fh.close() return graphs
def dependency_parse_to_graph(filename): """ Read dependency parser output from file and construct graph """ data = '' dtree = [] with open(filename, 'r') as f: for line in f: if line[0] != '#': if 'root' in line: elements = line.split('\t') if elements[7] == 'root': elements[7] = 'ROOT' line = '\t'.join(elements) data += line if line == '\n': dg = DependencyGraph(data.decode('utf8')) dtree.append(dg) data = '' return dtree
def parse(f_item): dg_list = [] sent = '' cnt = 0 with open(f_item, 'rt', encoding='utf-8') as f: for line in f: if line != '\n': sent += line else: dg = DependencyGraph(sent, zero_based=True) dg_list.append(dg) sent = '' cnt += 1 print('the index of sentence' + str(cnt)) if cnt == 3: pass #only use them to predict, no training!! parser_std = TransitionParser('arc-standard') #loading trained features folder_dir = 'C:/Users/Haoran Zhang/Desktop/nlp/' parser_std._dictionary = pickle.load( open(folder_dir + 'self._dictionary', 'rb')) parser_std._transition = pickle.load( open(folder_dir + 'self._transition', 'rb')) parser_std._match_transition = pickle.load( open(folder_dir + 'self.match_transition', 'rb')) result = parser_std.parse( dg_list, folder_dir + 'temp.arcstd.model') #result here is a list of dg #print(result[0].to_conll(4)) try: f = open(to_dir + f_item, 'w') for item in result: f.writelines(item.to_conll(4)) f.writelines('\n') finally: f.close()
print >>fout,r''' \end{dependency} \end{CJK} \end{document} ''' else: print >>fout,r'''\begin{tikzpicture}''' i = -1 dep_str = '' wids = [] wlens,xpos = [0],[0] for s in open(parse_file): if len(s.strip()) == 0: i += 1 if i == line_num: dg = DependencyGraph(dep_str) tree = dg.tree() if flag == '0': h = tree.height() traverse(tree,h,0) for k,w in wids: print >>fout,'\\node(m{}) at({},{}) {{{}}};'.format(k,0.6*xpos[k],0,w) print >>fout,'\\node at({},{}) {{{}}};'.format(0.6*xpos[k],-0.5,k-1) print >>fout,'\\draw[dotted] (m{}) -- (n{});'.format(k,k) else: print >>fout,tree.pprint_latex_qtree() break dep_str = '' wids = [] wlens,xpos = [0],[0] else: