def main(): cmd = argparse.ArgumentParser(usage='the evaluate script.') cmd.add_argument('-gold', help='the path to the gold amr graph.') cmd.add_argument('-pred_actions', help='the path to the predicted actions.') opt = cmd.parse_args() reader = AlignmentReader(opt.gold) generator = Generator() scorer = SmatchScorer() predict_dataset = codecs.open(opt.pred_actions, 'r', encoding='utf-8').read().strip().split('\n\n') for block, predict_data in zip(reader, predict_dataset): graph = Alignment(block) actions = [line.replace('# ::action\t', '').split('\t') for line in predict_data.splitlines() if line.startswith('# ::action')] try: state = generator.parse(graph, actions) predict_amr_graph = str(state.arcs_).encode('utf-8') except: # print('{0}'.format(graph.n)) # print('Failed to parse actions:') # for action in actions: # print(' - {0}'.format('\t'.join(action).encode('utf-8'))) # make the predicted graph empty to avoid crash predict_amr_graph = '(a / amr-empty)' scorer.update(graph.amr_graph, predict_amr_graph) print(scorer.f_score())
def main(): cmd = argparse.ArgumentParser( 'Get the block that contains certain amr graph.') cmd.add_argument('-lexicon', help='the path to the alignment file.') cmd.add_argument('-data', help='the path to the alignment file.') cmd.add_argument('-key', required=True, help='the key') cmd.add_argument('-remove_node_edge_and_root', default=False, action='store_true', help='') opt = cmd.parse_args() lexicon = {} for data in open(opt.lexicon, 'r').read().strip().split('\n\n'): lines = data.splitlines() assert len(lines) == 2 lexicon[lines[0].strip()] = lines[1].strip() signature = '# ::{0}'.format(opt.key) handler = AlignmentReader(opt.data) for block in handler: graph = Alignment(block) for line in block: if opt.remove_node_edge_and_root and\ (line.startswith('# ::node') or line.startswith('# ::edge') or line.startswith('# ::root')): continue if line.startswith('#'): if not line.startswith(signature): print(line.encode('utf-8')) else: print(lexicon[graph.n]) print(graph.amr_graph.encode('utf-8'), end='\n\n')
def main(): cmd = argparse.ArgumentParser('Test the program.') cmd.add_argument( '-mod', default='evaluate', choices=('parse', 'evaluate', 'dump'), help= 'the running mode. -parse: evaluate the best AMR graph achieved by the alignment ' '(specified in ::alignment field) and use the resulted graph to replace the original' 'AMR graph; -evaluate: same as parser without replacement; -dump: dump action file.' ) cmd.add_argument('-aligned', help='the path to the filename.') cmd.add_argument('-verbose', default=False, action='store_true', help='verbose the actions.') opt = cmd.parse_args() align_handler = AlignmentReader(opt.aligned) parser = Oracle(verbose=opt.verbose) for align_block in align_handler: graph = Alignment(align_block) try: actions, state = parser.parse(graph) if opt.mod in ('parse', 'evaluate'): predicted_amr_graph = str(state.arcs_) f_score = smatch(predicted_amr_graph, graph.amr_graph) for line in align_block: if line.startswith('# ::alignments'): line = line + ' ::parser eager_oracle.py' \ ' ::smatch {0} ::n_actions {1}'.format(f_score, len(actions)) # if line.startswith('('): # break # do not ignore gold amr string print(line) if opt.mod == 'parse': print(str(state.arcs_)) else: print(graph.amr_graph) else: print('# ::id {0}'.format(graph.n)) for line in align_block: if line.startswith('# ::tok') or line.startswith( '# ::pos') or line.startswith('('): print(line) print('\n'.join( ['# ::action {0}'.format(action) for action in actions])) print() if opt.verbose: print(graph.n, file=sys.stderr) print('\n'.join(actions), file=sys.stderr, end='\n\n') except Exception: print(graph.n, file=sys.stderr) traceback.print_exc(file=sys.stderr)
def main(): cmd = argparse.ArgumentParser( 'Get the block that contains certain amr graph.') cmd.add_argument('-lexicon', help='the path to the alignment file.') cmd.add_argument('-data', help='the path to the alignment file.') cmd.add_argument('-keep_alignment_in_node', default=False, action='store_true', help='') opt = cmd.parse_args() lexicon = {} for data in open(opt.lexicon, 'r').read().strip().split('\n\n'): lines = data.splitlines() assert len(lines) == 2 lexicon[lines[0].strip()] = lines[1].strip() handler = AlignmentReader(opt.data) for block in handler: graph = Alignment(block) new_alignment = lexicon[graph.n] graph.alignments = Alignment._parse_alignment([new_alignment]) graph.refill_alignment() for line in block: if line.startswith('#'): if line.startswith('# ::alignments'): print(new_alignment) else: if not opt.keep_alignment_in_node and line.startswith( '# ::node'): tokens = line.split() level = tokens[2] alignment = graph.get_node_by_level(level).alignment print('# ::node\t{0}\t{1}\t{2}'.format( tokens[2], tokens[3], '{0}-{1}'.format(alignment[0], alignment[1]) if alignment else '')) else: print(line) print(graph.amr_graph, end='\n\n')
def align(opt): reader = AlignmentReader(opt.data) stemmer = Stemmer() matchers = [ WordMatcher(), FuzzyWordMatcher(), FuzzySpanMatcher(), NamedEntityMatcher(), FuzzyNamedEntityMatcher(), DateEntityMatcher(), URLEntityMatcher(), OrdinalEntityMatcher(), MinusPolarityMatcher(), BelocatedAtMatcher(), TemporalQuantityMatcher() ] if opt.morpho_match: matchers.append(MorphosemanticLinkMatcher()) if opt.semantic_match: matchers.append(SemanticWordMatcher(lower=not opt.cased)) matchers.append(SemanticNamedEntityMatcher(lower=not opt.cased)) updaters = [ EntityTypeUpdater(), PersonOfUpdater(), QuantityUpdater(), PersonUpdater(), MinusPolarityPrefixUpdater(), RelativePositionUpdater(), DegreeUpdater(), HaveOrgRoleUpdater(), GovernmentOrganizationUpdater(), CauseUpdater(), ImperativeUpdater(), PossibleUpdater() ] unaligned_records = [] oracle = Oracle(verbose=False) fpo = codecs.open(opt.output, 'w', encoding='utf-8') if opt.output else sys.stdout for block in reader: graph = Alignment(block) if opt.verbose: print('Aligning {0}'.format(graph.n), file=sys.stderr) best_alignment = [(n.level, None, n.alignment[0], n.alignment[1]) for n in graph.true_nodes() if n.alignment] actions, states = oracle.parse(graph) pred_amr_graph = str(states.arcs_) baseline_f_score, baseline_n_actions = best_f_score, best_n_actions = \ smatch(graph.amr_graph, pred_amr_graph), len(actions) words = graph.tok postags = graph.pos if hasattr( graph, 'pos') else [None for _ in range(len(words))] stemmed_words = [ stemmer.stem(word, postag) for word, postag in zip(words, postags) ] results = AlignedResults() for matcher in matchers: matcher.match(words, stemmed_words, postags, graph, results) added = True while added: added = False for updater in updaters: added = added or updater.update(words, graph, results) unaligned = [(n.level, n.name) for n in graph.true_nodes() if n.level not in results.levels_to_spans] if len(unaligned) > 0: unaligned_records.append((graph.n, unaligned)) if opt.report_only: continue n_test = number_of_enumerate_alignment(results) if opt.verbose: print(' - Going to enumerate {0}'.format(n_test), file=sys.stderr) if not opt.improve_perfect and baseline_f_score == 1.: print(' - Best already achieved.', file=sys.stderr) elif n_test > opt.trials: print(' - Too many test!', file=sys.stderr) else: for alignment in enumerate_alignment(results): fill_alignment(graph, alignment) actions, states = oracle.parse(graph) pred_amr_graph = str(states.arcs_) pred_f_score, pred_n_actions = smatch( graph.amr_graph, pred_amr_graph), len(actions) if pred_f_score > best_f_score or \ (pred_f_score == best_f_score and pred_n_actions < best_n_actions): best_f_score = pred_f_score best_n_actions = pred_n_actions best_alignment = alignment[:] if opt.verbose: if best_f_score > baseline_f_score or \ (best_f_score == baseline_f_score and best_n_actions < baseline_n_actions): print(' - Better achieved!'.format(graph.n), file=sys.stderr) else: print(' - Stay the same.'.format(graph.n), file=sys.stderr) fill_alignment(graph, best_alignment) output = alignment_string(graph) now = datetime.datetime.now() output = '# ::alignments {0} ::annotator aligner3.py ::date {1} ::parser {2} ::smatch {3} ' \ '::n_actions {4}'.format(output, now, oracle.name, best_f_score, best_n_actions) if not opt.show_all: print(graph.n, file=fpo) print(output, end='\n\n', file=fpo) else: block = graph.block for line in block: if line.startswith("#"): if line.startswith('# ::alignments'): print(output, file=fpo) elif line.startswith('# ::node'): tokens = line.split() level = tokens[2] alignment = graph.get_node_by_level(level).alignment print('# ::node\t{0}\t{1}\t{2}'.format( tokens[2], tokens[3], '{0}-{1}'.format(alignment[0], alignment[1]) if alignment else ''), file=fpo) else: print(line, file=fpo) else: print(graph.amr_graph, file=fpo, end='\n\n') break dump_unaligned_records(unaligned_records)
def exclusively_align(opt): reader = AlignmentReader(opt.data) stemmer = Stemmer() matchers = [ WordMatcher(), FuzzyWordMatcher(), FuzzySpanMatcher(), NamedEntityMatcher(), FuzzyNamedEntityMatcher(), DateEntityMatcher(), URLEntityMatcher(), OrdinalEntityMatcher(), MinusPolarityMatcher(), BelocatedAtMatcher(), TemporalQuantityMatcher() ] if opt.morpho_match: matchers.append(MorphosemanticLinkMatcher()) if opt.semantic_match: matchers.append(SemanticWordMatcher(lower=not opt.cased)) matchers.append(SemanticNamedEntityMatcher(lower=not opt.cased)) updaters = [ EntityTypeUpdater(), PersonOfUpdater(), QuantityUpdater(), PersonUpdater(), MinusPolarityPrefixUpdater(), RelativePositionUpdater(), DegreeUpdater(), HaveOrgRoleUpdater(), GovernmentOrganizationUpdater(), CauseUpdater(), ImperativeUpdater(), PossibleUpdater() ] unaligned_records = [] fpo = codecs.open(opt.output, 'w', encoding='utf-8') if opt.output else sys.stdout for block in reader: graph = Alignment(block) if opt.verbose: print('Aligning {0}'.format(graph.n), file=sys.stderr) words = graph.tok postags = graph.pos if hasattr( graph, 'pos') else [None for _ in range(len(words))] stemmed_words = [ stemmer.stem(word, postag) for word, postag in zip(words, postags) ] results = AlignedResults(multiple=False) for matcher in matchers: matcher.match(words, stemmed_words, postags, graph, results) for updater in updaters: updater.update(words, graph, results) unaligned = [(n.level, n.name) for n in graph.true_nodes() if n.level not in results.levels_to_spans] if len(unaligned) > 0: unaligned_records.append((graph.n, unaligned)) if opt.report_only: continue n_test = number_of_enumerate_alignment(results) assert n_test == 1 for alignment in enumerate_alignment(results): fill_alignment(graph, alignment) break output = alignment_string(graph) now = datetime.datetime.now() output = '# ::alignments {0} ::annotator aligner_v0.py ::date {1}'.format( output, now) if not opt.show_all: print(graph.n, file=fpo) print(output, end='\n\n', file=fpo) else: block = graph.block for line in block: if line.startswith("#"): if line.startswith('# ::alignments'): print(output, file=fpo) elif line.startswith('# ::node'): tokens = line.split() level = tokens[2] alignment = graph.get_node_by_level(level).alignment print('# ::node\t{0}\t{1}\t{2}'.format( tokens[2], tokens[3], '{0}-{1}'.format(alignment[0], alignment[1]) if alignment else ''), file=fpo) else: print(line, file=fpo) else: print(graph.amr_graph, file=fpo, end='\n\n') break