def main(): amr_file = r'test-data/amrs.txt' sentence_file = r'test-data/sentences.txt' if len(sys.argv) > 2: amr_file = sys.argv[1] sentence_file = sys.argv[2] failed_amrs = Counter() failed_words = Counter() with open(sentence_file, 'r', encoding='utf8') as f1: sentences = [s for s in re.split('\n\s*\n', f1.read()) if s] with open(amr_file, 'r', encoding='utf8') as f2: for i, amr in enumerate(AMR.amr_iter(f2.read())): print('#' + str(i + 1)) words = sentences[i].strip().split() amr = AMR(amr) # test_rules(amr, words) alignments, amr_unal, words_unal = align_amr(amr, words) print('# AMR:') print('\n'.join('# ' + l for l in str(amr).split('\n'))) print('# Sentence:') print('# ' + ' '.join(words)) print('# Alignments:') for a in alignments: print('#', a.readible()) for a in alignments: print(a) print()
def html(text, delete_x_ids=True): amr = AMR(text) elems = [e for e in amr.text_elements] nodes = [id for id in amr.node_ids()] edges = [id for id in amr.edge_ids()] node_indices = [i for i,e in enumerate(amr.text_elements) if amr.NODE_RE.match(e)] edge_indices = [i for i,e in enumerate(amr.text_elements) if amr.EDGE_RE.match(e)] Named_Entity_RE = re.compile('x[0-9]+/".*?"') for i,e in enumerate(elems): if i in node_indices: id = nodes.pop(0) frame = e.split('/')[-1] if '/' in e else '_' node = e if delete_x_ids: node = re.sub('^x[0-9]+/', '', e, 1) if frame in propbank_frames_dictionary: description = propbank_frames_dictionary[frame].replace('\t','\n') elems[i] = f'<span class="amr-frame" tok-id="{id}" title="{description}">{node}</span>' elif Named_Entity_RE.match(e): elems[i] = f'<span class="amr-entity" tok-id="{id}">{node}</span>' else: elems[i] = f'<span class="amr-node" tok-id="{id}">{node}</span>' elif i in edge_indices: id = edges.pop(0) elems[i] = f'<span class="amr-edge" tok-id="{id}">{e}</span>' text = ''.join(elems) return '\n<div class="amr-container">\n<pre>\n'+text+'\n</pre>\n</div>\n'
def latex(text): amr = AMR(text) text = str(amr) for x in re.findall('x[0-9]+ ?/ ?[^()\s]+', text): text = text.replace(x, '(' + x + ')') edges = [(e, id) for e, id in zip(amr.edges(), amr.edge_ids())] elems = [] max_depth = paren_utils.max_depth(text) prev_depth = 0 depth = 0 i = 0 node_depth = {} for t in paren_utils.paren_iter(text): node = amr.NODE_RE.match(t).group() id = node.split('/')[0].strip() # clean node if re.match('x[0-9]+/', node): node = node.split('/')[1] node = node.replace('"', '``', 1).replace('"', "''", 1) prev_depth = depth depth = paren_utils.depth_at(text, text.index(t)) if depth > prev_depth: i = 0 node_depth[id] = depth num_nodes = paren_utils.mark_depth(text).count(f'<{depth}>') x = AMR_Latex.get_x(i, num_nodes) y = AMR_Latex.get_y(depth, max_depth) color = AMR_Latex.get_color(i) elems.append(f'\t\\node[{color}]({id}) at ({x},{y}) {{{node}}};') i += 1 for edge, id in edges: source = id.split('_')[0] target = id.split('_')[2] dir1 = 'south' dir2 = 'north' if node_depth[source] > node_depth[target]: dir1 = 'north' dir2 = 'south' if node_depth[source] == node_depth[target]: dir1 = 'north' dir2 = 'north' elems.append( f'\t\draw[->, thick] ({source}.{dir1}) -- ({target}.{dir2}) node[midway, above, sloped] {{{edge}}};' ) latex = '\n\\begin{tikzpicture}[\n' latex += 'red/.style={rectangle, draw=red!60, fill=red!5, very thick, minimum size=7mm},\n' latex += 'blue/.style={rectangle, draw=blue!60, fill=blue!5, very thick, minimum size=7mm},\n' latex += 'green/.style={rectangle, draw=green!60, fill=green!5, very thick, minimum size=7mm},\n' latex += 'purple/.style={rectangle, draw=purple!60, fill=purple!5, very thick, minimum size=7mm},\n' latex += 'orange/.style={rectangle, draw=orange!60, fill=orange!5, very thick, minimum size=7mm},\n' latex += ']\n' latex += '\n'.join(elems) latex += '\n\end{tikzpicture}\n' return latex
def normalize_entity(root, nodes, edges): normalize_ids = { id: i for i, id in enumerate(sorted(nodes, key=lambda x: nodes[x])) } normalized_entity = AMR() for n in nodes: normalized_entity.nodes[normalize_ids[n]] = nodes[n] for s, r, t in edges: normalized_entity.edges.append((normalize_ids[s], r, normalize_ids[t])) normalized_entity.edges = sorted(normalized_entity.edges) normalized_entity.root = normalize_ids[root] return normalized_entity
def __init__(self, tokens, verbose=False, add_unaligned=0): tokens = tokens.copy() # add unaligned if add_unaligned and '<unaligned>' not in tokens: for i in range(add_unaligned): tokens.append('<unaligned>') # add root if '<ROOT>' not in tokens: tokens.append("<ROOT>") # init stack, buffer self.stack = [] self.buffer = list( reversed([ i + 1 for i, tok in enumerate(tokens) if tok != '<unaligned>' ])) self.latent = list( reversed([ i + 1 for i, tok in enumerate(tokens) if tok == '<unaligned>' ])) # init amr self.amr = AMR(tokens=tokens) for i, tok in enumerate(tokens): if tok != "<ROOT>": self.amr.nodes[i + 1] = tok # add root self.buffer[0] = -1 self.amr.nodes[-1] = "<ROOT>" self.new_id = len(tokens) + 1 self.verbose = verbose # parser target output self.actions = [] self.labels = [] self.labelsA = [] self.predicates = [] # information for oracle self.merged_tokens = {} self.entities = [] self.is_confirmed = set() self.is_confirmed.add(-1) self.swapped_words = {} if self.verbose: print('INIT') print(self.printStackBuffer())
def main(args): # First, let's read the graphs and surface forms with open(args.input_amr) as f: amrs = f.readlines() with open(args.input_surface) as f: surfs = f.readlines() if args.triples_output is not None: triples_out = open(args.triples_output, 'w') # Iterate anon_surfs = [] anon_maps = [] anon_surfs_scope = [] i = 0 with open(args.output, 'w') as out, open(args.output_surface, 'w') as surf_out: for amr, surf in zip(amrs, surfs): graph = AMR(amr, surf.split()) # Get variable: concept map for reentrancies #v2c = graph.var2concept() if args.mode == 'LIN': # Linearisation mode for seq2seq tokens = amr.split() new_tokens = simplify(tokens, v2c) out.write(' '.join(new_tokens) + '\n') elif args.mode == 'GRAPH': # Triples mode for graph2seq #import ipdb; ipdb.set_trace() # Get concepts and generate IDs v_ids, rev_v_ids = get_nodes2(graph) # Triples triples = get_triples(graph, v_ids, rev_v_ids) # Print concepts/constants and triples #cs = [get_name(c) for c in rev_c_ids] cs = [get_name(v, v2c) for v in rev_v_ids] out.write(' '.join(cs) + '\n') triples_out.write( ' '.join(['(' + ','.join(adj) + ')' for adj in triples]) + '\n') elif args.mode == 'LINE_GRAPH': # Similar to GRAPH, but with edges as extra nodes #import ipdb; ipdb.set_trace() print(i) i += 1 #if i == 98: # import ipdb; ipdb.set_trace() nodes, triples, anon_surf, anon_map, anon_surf_scope = get_line_graph( graph, surf, anon=args.anon) out.write(' '.join(nodes) + '\n') triples_out.write( ' '.join(['(%d,%d,%s)' % adj for adj in triples]) + '\n') #surf = ' '.join(new_surf) anon_surfs.append(anon_surf) anon_maps.append(json.dumps(anon_map)) anon_surfs_scope.append(anon_surf_scope) # Process the surface form surf_out.write(surf.lower()) if args.anon: with open(args.anon_surface, 'w') as f: for anon_surf in anon_surfs: f.write(anon_surf + '\n') with open(args.map_output, 'w') as f: for anon_map in anon_maps: f.write(anon_map + '\n') with open(args.anon_surface_scope, 'w') as f: for anon_surf_scope in anon_surfs_scope: f.write(anon_surf_scope + '\n')
#!/usr/bin/env python2.7 #coding=utf-8 ''' @author: Nathan Schneider ([email protected]) @since: 2015-05-06 ''' from __future__ import print_function import sys, re, fileinput, codecs from collections import Counter, defaultdict from amr import AMR, AMRSyntaxError, AMRError, Concept, AMRConstant c = Counter() for ln in fileinput.input(): try: a = AMR(ln) c.update(map(repr, a.nodes.keys())) # vars, concepts, constants: count once per AMR c.update('.'+repr(x) for _,r,x in a.triples(rel=':instance-of')) # concepts count once per variable c.update(map((lambda x: x[1]), a.triples())) # relations c.update('.'+repr(x) for _,_,x in a.triples() if isinstance(x,AMRConstant)) # constants count once per relation except AMRSyntaxError as ex: print(ex, file=sys.stderr) except AMRError as ex: print(ex, file=sys.stderr) for k,n in c.most_common(): print(k,n, sep='\t')
args.p_ctx), str(args.p_proj)) logging.basicConfig(filename=logfilename, level=logging.INFO, format='%(asctime)s :: %(levelname)s :: %(message)s') logging.info('log info to ' + logfilename) logging.info(args) if args.dataset == 'amazon': ds = ds_amazon(logging, args) else: raise Exception('no dataset' + args.dataset) if args.model == 'bpr': model = BPR(ds, args, logging) elif args.model == 'cbpr': model = CBPR(ds, args, logging) elif args.model == 'vbpr': model = VBPR(ds, args, logging) elif args.model == 'amr': model = AMR(ds, args, logging) elif args.model == 'mtpr': model = MTPR(ds, args, logging) else: raise Exception('unknown model type', args.model) model.train() weight_filename = 'weights/%s_%s_%s_%s_%s.npy' % ( args.dataset, args.model, str(args.p_emb), str(args.p_ctx), str( args.p_proj)) model.save(weight_filename)
def main(arguments): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input_file', help="Path of the file containing AMRs of each sentence", type=str, default='/home/shibhansh/UGP-2/data/LDC2015E86_DEFT_Phase_2_AMR_Annotation_R1/' + \ 'data/amrs/split/test/deft-p2-amr-r1-amrs-test-alignments-proxy.txt') parser.add_argument('--dataset', help="Name of dataset", type=str, default='') parser.add_argument('--display', help="Path of the file containing AMRs of each sentence", type=bool, default=False) args = parser.parse_args(arguments) input_file = args.input_file dataset = args.dataset ''' 'docs' is a list of 'documents', each 'document' is list a dictionary. Each dictionary contains information about a sentence. Each dicitonary has 'alignments', 'amr' etc. keys. Corresponding to each key we have the relevant information like the amr, text, alignment etc. ''' # Remove alignments from the new file os.system('cp '+ input_file +' auxiliary/temp') with codecs.open('auxiliary/temp', 'r') as data_file: original_data = data_file.readlines() os.system('sed -i \'s/~e.[ 0-9]*//g\' auxiliary/temp') os.system('sed -i \'s/,[ 0-9]*//g\' auxiliary/temp') with codecs.open('auxiliary/temp', 'r') as data_file: data = data_file.readlines() for index_line,line in enumerate(data): if line.startswith('#'): data[index_line] = original_data[index_line] with codecs.open('auxiliary/temp', 'w') as data_file: for line in data: data_file.write(line) input_file = 'auxiliary/temp' docs, target_summaries, stories = read_data(input_file) os.system('rm auxiliary/temp') save_stories(stories,'auxiliary/stories.txt') with open('auxiliary/target_summaries.txt','w') as f: for summary in target_summaries: f.write(tok_to_std_format_convertor(summary)+'\n') idf = {} with open('auxiliary/'+dataset+'_idf.txt','r') as f: idf = pickle.load(f) f = open('auxiliary/predicted_summaries.txt','w') summary_sentences_per_story = [] # currently all the information of a node is stored as a list, changing it to a dictionary debug = False # 'document_amrs' is the list of document amrs formed after joining nodes and collapsing same entities etc. target_summaries_amrs = [] predicted_summaries_amrs = [] document_amrs = [] selected_sents = [] for index_doc, doc in enumerate(docs): current_doc_sent_amr_list = [] current_target_summary_sent_amr_list = [] for index_dict, dict_sentence in enumerate(doc): if dict_sentence['amr'] != []: if dict_sentence['tok'].strip()[-1] != '.': dict_sentence['tok'] = dict_sentence['tok'] + ' .' # Get the AMR class for each sentence using just the text if dict_sentence['snt-type'] == 'summary': current_target_summary_sent_amr_list.append(AMR(dict_sentence['amr'], amr_with_attributes=False, text=dict_sentence['tok'], alignments=dict_sentence['alignments'])) if dict_sentence['snt-type'] == 'body': docs[index_doc][index_dict]['amr'] = AMR(dict_sentence['amr'], amr_with_attributes=False, text=dict_sentence['tok'], alignments=dict_sentence['alignments']) current_doc_sent_amr_list.append(docs[index_doc][index_dict]['amr']) # merging the sentence AMRs to form a single AMR amr_as_list, document_text, document_alignments,var_to_sent = \ merge_sentence_amrs(current_doc_sent_amr_list,debug=False) new_document_amr = AMR(text_list=amr_as_list, text=document_text, alignments=document_alignments, amr_with_attributes=True, var_to_sent=var_to_sent) document_amrs.append(new_document_amr) target_summaries_amrs.append(current_target_summary_sent_amr_list) # number of nodes required in summary imp_doc = index_doc # imp_doc = 1000 if imp_doc == 1000: # just the first sentence of the story is the summary predicted_summaries_amrs.append([current_doc_sent_amr_list[0]]) if imp_doc == 2000: # just the first two sentences of the story is the summary predicted_summaries_amrs.append([current_doc_sent_amr_list[0],current_doc_sent_amr_list[1]]) if imp_doc == 3000: # just the first two sentences of the story is the summary predicted_summaries_amrs.append([current_doc_sent_amr_list[0],current_doc_sent_amr_list[1]\ ,current_doc_sent_amr_list[2]]) if imp_doc == -1: # all sentences of the story is the summary predicted_summaries_amrs.append(current_doc_sent_amr_list) if index_doc == imp_doc: document_amrs[index_doc], phrases,idf_vars = resolve_coref_doc_AMR(amr=document_amrs[index_doc], resolved=True,story=' '.join(document_amrs[index_doc].text), location_of_resolved_story='auxiliary/'+dataset+'_predicted_resolutions.txt', location_of_story_in_file=index_doc, location_of_resolver='.', idf=idf, debug=False) cn_freq_dict,cn_sent_lists,cn_var_lists=document_amrs[index_doc].get_common_nouns(phrases=phrases) idf_vars = document_amrs[index_doc].get_idf_vars(idf_vars=idf_vars,idf=idf) # range equal to the std_deviation of the summary size in the dataset if dataset == '': current_summary_nodes = [] for target_summary_amr in current_target_summary_sent_amr_list: current_summary_nodes.extend(target_summary_amr.get_nodes() ) num_summary_nodes = len(current_summary_nodes) range_num_nodes = 0 range_num_nodes = int((len(document_amrs[index_doc].get_nodes())*4)/100) document_amrs[index_doc].get_concept_relation_list(story_index=index_doc,debug=False) pr = document_amrs[index_doc].directed_graph.rank_sent_in_degree() # rank the nodes with the 'meta_nodes' pr = document_amrs[index_doc].directed_graph.rank_with_meta_nodes(var_freq_list=pr, cn_freq_dict=cn_freq_dict, cn_sent_lists=cn_sent_lists, cn_var_dict=cn_var_lists) ranks, weights, _ = zip(*pr) print ranks print weights pr = document_amrs[index_doc].directed_graph.add_idf_ranking(var_freq_list=pr, default_idf=5.477, idf_vars=idf_vars, num_vars_to_add=5) ranks, weights, _ = zip(*pr) print ranks print weights new_graph = document_amrs[index_doc].directed_graph.construct_greedily_first(ranks=ranks,weights=weights, concept_relation_list=document_amrs[index_doc].concept_relation_list, use_true_sent_rank=False,num_nodes=num_summary_nodes,range_num_nodes=range_num_nodes) # generate AMR from the graphical representation new_amr_graph = document_amrs[index_doc].get_AMR_from_directed_graph(sub_graph=new_graph) new_amr_graph.print_amr() predicted_summaries_amrs.append([new_amr_graph]) with open('auxiliary/'+dataset+'_eos_stories.txt','w') as f: for document_amr in document_amrs: f.write(' <eos> '.join(document_amr.text)+'\n') f.close() with open('auxiliary/num_sent_per_story.txt','w') as f3: pickle.dump(summary_sentences_per_story,f3) # save document AMR in file with open('auxiliary/text_amr.txt','w') as f2: f2.write('# :id PROXY_AFP_ENG_20050317_010.10 ::amr-annotator SDL-AMR-09 ::preferred ::snt-type body\n') f2.write('# ::snt On 21 March 2005\n') f2.write('# ::tok On 21 March 2005\n') if imp_doc >= 0 and imp_doc < len(document_amrs): for index_node, node in enumerate(document_amrs[imp_doc].amr): f2.write('\t'*node['depth']+node['text']+'\n') target_summaries_nodes = [] for target_summary_amrs in target_summaries_amrs: current_summary_nodes = [] for target_summary_amr in target_summary_amrs: # current_summary_nodes.extend(target_summary_amr.get_edge_tuples() ) current_summary_nodes.extend(target_summary_amr.get_nodes() ) target_summaries_nodes.append(current_summary_nodes) target_summary_lengths = [len(i) for i in target_summaries_nodes] document_lengths = [len(i.get_nodes()) for i in document_amrs] ratios = [] for i in range(len(document_lengths)): ratios.append(float(target_summary_lengths[i]/document_lengths[i])*100) average_ratio = (float(sum(ratios)) / len(ratios)) deviations = [abs(ratio - average_ratio) for ratio in ratios] mean_deviation = (float(sum(deviations)) / len(deviations)) # average ratio in 'gold' dataset is 9%, and deviation is 4% print 'average_ratio', average_ratio, 'mean_deviation', mean_deviation with open('auxiliary/target_summary_nodes.txt','w') as f6: for node_list in target_summaries_nodes: f6.write(' '.join([node for node in node_list]) + '\n') predicted_summaries_nodes = [] for predicted_summary_amrs in predicted_summaries_amrs: current_summary_nodes = [] for predicted_summary_amr in predicted_summary_amrs: # current_summary_nodes.extend(predicted_summary_amr.get_edge_tuples() ) current_summary_nodes.extend(predicted_summary_amr.get_nodes() ) predicted_summaries_nodes.append(current_summary_nodes) with open('auxiliary/predicted_summary_nodes.txt','w') as f7: for node_list in predicted_summaries_nodes: f7.write(' '.join([node for node in node_list]) + '\n')
def main(arguments): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input_file', help="Path of the file containing AMRs of each sentence", type=str, default='/home/prerna/Documents/thesis_work/LDC2015E86_DEFT_Phase_2_AMR_Annotation_R1/' + \ 'data/amrs/split/test/deft-p2-amr-r1-amrs-test-alignments-proxy.txt') parser.add_argument('--dataset', help="Name of dataset", type=str, default='') parser.add_argument( '--display', help="Path of the file containing AMRs of each sentence", type=bool, default=False) args = parser.parse_args(arguments) input_file = args.input_file dataset = args.dataset # ''' # 'docs' is a list of 'documents', each 'document' is list a dictionary. Each dictionary contains # information about a sentence. Each dicitonary has 'alignments', 'amr' etc. keys. Corresponding # to each key we have the relevant information like the amr, text, alignment etc. # ''' # Remove alignments from the new file os.system('cp ' + input_file + ' auxiliary/temp') with codecs.open('auxiliary/temp', 'r') as data_file: original_data = data_file.readlines() os.system('sed -i \'s/~e.[ 0-9]*//g\' auxiliary/temp') os.system('sed -i \'s/,[ 0-9]*//g\' auxiliary/temp') with codecs.open('auxiliary/temp', 'r') as data_file: data = data_file.readlines() for index_line, line in enumerate(data): if line.startswith('#'): data[index_line] = original_data[index_line] with codecs.open('auxiliary/temp', 'w') as data_file: for line in data: data_file.write(line) input_file = 'auxiliary/temp' docs, target_summaries, stories = read_data(input_file) os.system('rm auxiliary/temp') save_stories(stories, 'auxiliary/stories.txt') with open('auxiliary/target_summaries.txt', 'w') as f: for summary in target_summaries: f.write(tok_to_std_format_convertor(summary) + '\n') f = open('auxiliary/predicted_summaries.txt', 'w') summary_sentences_per_story = [] # currently all the information of a node is stored as a list, changing it to a dictionary debug = False # 'document_amrs' is the list of document amrs formed after joining nodes and collapsing same entities etc. target_summaries_amrs = [] predicted_summaries_amrs = [] document_amrs = [] selected_sents = [] for index_doc, doc in enumerate(docs): current_doc_sent_amr_list = [] current_target_summary_sent_amr_list = [] for index_dict, dict_sentence in enumerate(doc): if dict_sentence['amr'] != []: if dict_sentence['tok'].strip()[-1] != '.': dict_sentence['tok'] = dict_sentence['tok'] + ' .' # Get the AMR class for each sentence using just the text if dict_sentence['snt-type'] == 'summary': current_target_summary_sent_amr_list.append( AMR(dict_sentence['amr'], amr_with_attributes=False, text=dict_sentence['tok'], alignments=dict_sentence['alignments'])) if dict_sentence['snt-type'] == 'body': docs[index_doc][index_dict]['amr'] = AMR( dict_sentence['amr'], amr_with_attributes=False, text=dict_sentence['tok'], alignments=dict_sentence['alignments']) current_doc_sent_amr_list.append( docs[index_doc][index_dict]['amr']) # merging the sentence AMRs to form a single AMR amr_as_list, document_text, document_alignments,var_to_sent = \ merge_sentence_amrs(current_doc_sent_amr_list,debug=False) new_document_amr = AMR(text_list=amr_as_list, text=document_text, alignments=document_alignments, amr_with_attributes=True, var_to_sent=var_to_sent) document_amrs.append(new_document_amr) target_summaries_amrs.append(current_target_summary_sent_amr_list) imp_doc = index_doc if imp_doc == 1000: # just the first sentence of the story is the summary predicted_summaries_amrs.append([current_doc_sent_amr_list[0]]) print index_doc if index_doc == imp_doc: document_amrs[index_doc] = resolve_coref_doc_AMR( amr=document_amrs[index_doc], resolved=True, story=' '.join(document_amrs[index_doc].text), # location_of_resolved_story='auxiliary/human_corefs.txt', location_of_resolved_story='auxiliary/' + dataset + '_predicted_resolutions.txt', location_of_story_in_file=index_doc, location_of_resolver='.', debug=False) pr = document_amrs[index_doc].directed_graph.rank_sent_in_degree() ranks, weights = zip(*pr) print ranks print weights # get pairs in order of importance ranked_pairs = document_amrs[index_doc].directed_graph.rank_pairs( ranks=ranks, weights=weights, pairs_to_rank=3) # print 'ranked_pairs', ranked_pairs paths_and_sub_graphs = document_amrs[ index_doc].directed_graph.max_imp_path( ordered_pairs=ranked_pairs) # add method to check no repeated sub_graph summary_paths = [] summary_amrs = [] summary_amrs_text = [] for path_and_sub_graph in paths_and_sub_graphs: path, sub_graph, sent = path_and_sub_graph path_sent_dict = {} if sent == -1: path_sent_dict = document_amrs[ index_doc].break_path_by_sentences(path=path) else: path_sent_dict[sent] = path for key in path_sent_dict.keys(): temp_path = path_sent_dict[key] # path = document_amrs[index_doc].concept_relation_list.get_concepts_given_path(sent_index=key,path=temp_path) path = -1 # key = 0 if path == -1: path = document_amrs[index_doc].get_sent_amr( sent_index=key) nodes, sub_graph = document_amrs[ index_doc].directed_graph.get_name_path(nodes=path) new_amr_graph = document_amrs[ index_doc].get_AMR_from_directed_graph( sub_graph=sub_graph) repeated_path = False # removing repreating sents/amrs for var_set in summary_paths: if set(var_set) == set(nodes): repeated_path = True if repeated_path: continue summary_paths.append(list(nodes)) summary_amrs_text.append( new_amr_graph.print_amr(file=f, print_indices=False, write_in_file=True, one_line_output=True, return_str=True, to_print=False)) print '' summary_amrs.append(new_amr_graph) final_summary_amrs_text = [] final_summary_amrs = [] for index, path in enumerate(summary_paths): indices_to_search_at = range(len(summary_paths)) indices_to_search_at.remove(index) to_print = True for index_2 in indices_to_search_at: if set(path) < set(summary_paths[index_2]): to_print = False if to_print: final_summary_amrs_text.append(summary_amrs_text[index]) final_summary_amrs.append(summary_amrs[index]) for summary_amr in final_summary_amrs_text: try: summary_sentences_per_story[index_doc] += 1 except: summary_sentences_per_story.append(1) print summary_amr predicted_summaries_amrs.append(final_summary_amrs) with open('auxiliary/' + dataset + '_eos_stories.txt', 'w') as f: for document_amr in document_amrs: f.write(' <eos> '.join(document_amr.text) + '\n') f.close() with open('auxiliary/num_sent_per_story.txt', 'w') as f3: pickle.dump(summary_sentences_per_story, f3) # save document AMR in file with open('auxiliary/text_amr.txt', 'w') as f2: f2.write( '# :id PROXY_AFP_ENG_20050317_010.10 ::amr-annotator SDL-AMR-09 ::preferred ::snt-type body\n' ) f2.write('# ::snt On 21 March 2005\n') f2.write('# ::tok On 21 March 2005\n') if imp_doc >= 0 and imp_doc < len(document_amrs): for index_node, node in enumerate(document_amrs[imp_doc].amr): f2.write('\t' * node['depth'] + node['text'] + '\n') # an option to generate the graphical representations # return document_amrs target_summaries_nodes = [] for target_summary_amrs in target_summaries_amrs: current_summary_nodes = [] for target_summary_amr in target_summary_amrs: current_summary_nodes.extend(target_summary_amr.get_nodes()) target_summaries_nodes.append(current_summary_nodes) with open('auxiliary/target_summary_nodes.txt', 'w') as f6: for node_list in target_summaries_nodes: f6.write(' '.join([node for node in node_list]) + '\n') predicted_summaries_nodes = [] for predicted_summary_amrs in predicted_summaries_amrs: current_summary_nodes = [] for predicted_summary_amr in predicted_summary_amrs: current_summary_nodes.extend(predicted_summary_amr.get_nodes()) predicted_summaries_nodes.append(current_summary_nodes) with open('auxiliary/predicted_summary_nodes.txt', 'w') as f7: for node_list in predicted_summaries_nodes: f7.write(' '.join([node for node in node_list]) + '\n')