Example #1
0
def main():
    cmd = argparse.ArgumentParser(usage='the evaluate script.')
    cmd.add_argument('-gold', help='the path to the gold amr graph.')
    cmd.add_argument('-pred_actions', help='the path to the predicted actions.')
    opt = cmd.parse_args()

    reader = AlignmentReader(opt.gold)
    generator = Generator()
    scorer = SmatchScorer()

    predict_dataset = codecs.open(opt.pred_actions, 'r', encoding='utf-8').read().strip().split('\n\n')
    for block, predict_data in zip(reader, predict_dataset):
        graph = Alignment(block)
        actions = [line.replace('# ::action\t', '').split('\t')
                   for line in predict_data.splitlines() if line.startswith('# ::action')]
        try:
            state = generator.parse(graph, actions)
            predict_amr_graph = str(state.arcs_).encode('utf-8')
        except:
            # print('{0}'.format(graph.n))
            # print('Failed to parse actions:')
            # for action in actions:
            #     print(' - {0}'.format('\t'.join(action).encode('utf-8')))

            # make the predicted graph empty to avoid crash
            predict_amr_graph = '(a / amr-empty)'
        scorer.update(graph.amr_graph, predict_amr_graph)
    print(scorer.f_score())
Example #2
0
def main():
    cmd = argparse.ArgumentParser(
        'Get the block that contains certain amr graph.')
    cmd.add_argument('-lexicon', help='the path to the alignment file.')
    cmd.add_argument('-data', help='the path to the alignment file.')
    cmd.add_argument('-key', required=True, help='the key')
    cmd.add_argument('-remove_node_edge_and_root',
                     default=False,
                     action='store_true',
                     help='')
    opt = cmd.parse_args()

    lexicon = {}
    for data in open(opt.lexicon, 'r').read().strip().split('\n\n'):
        lines = data.splitlines()
        assert len(lines) == 2
        lexicon[lines[0].strip()] = lines[1].strip()

    signature = '# ::{0}'.format(opt.key)
    handler = AlignmentReader(opt.data)
    for block in handler:
        graph = Alignment(block)
        for line in block:
            if opt.remove_node_edge_and_root and\
                    (line.startswith('# ::node') or line.startswith('# ::edge') or line.startswith('# ::root')):
                continue
            if line.startswith('#'):
                if not line.startswith(signature):
                    print(line.encode('utf-8'))
                else:
                    print(lexicon[graph.n])
        print(graph.amr_graph.encode('utf-8'), end='\n\n')
Example #3
0
def main():
    cmd = argparse.ArgumentParser('Test the program.')
    cmd.add_argument(
        '-mod',
        default='evaluate',
        choices=('parse', 'evaluate', 'dump'),
        help=
        'the running mode. -parse: evaluate the best AMR graph achieved by the alignment '
        '(specified in ::alignment field) and use the resulted graph to replace the original'
        'AMR graph; -evaluate: same as parser without replacement; -dump: dump action file.'
    )
    cmd.add_argument('-aligned', help='the path to the filename.')
    cmd.add_argument('-verbose',
                     default=False,
                     action='store_true',
                     help='verbose the actions.')
    opt = cmd.parse_args()

    align_handler = AlignmentReader(opt.aligned)
    parser = Oracle(verbose=opt.verbose)

    for align_block in align_handler:
        graph = Alignment(align_block)
        try:
            actions, state = parser.parse(graph)

            if opt.mod in ('parse', 'evaluate'):
                predicted_amr_graph = str(state.arcs_)
                f_score = smatch(predicted_amr_graph, graph.amr_graph)
                for line in align_block:
                    if line.startswith('# ::alignments'):
                        line = line + ' ::parser eager_oracle.py' \
                                      ' ::smatch {0} ::n_actions {1}'.format(f_score, len(actions))
                    # if line.startswith('('):
                    #     break

                    # do not ignore gold amr string
                    print(line)
                if opt.mod == 'parse':
                    print(str(state.arcs_))
                else:
                    print(graph.amr_graph)
            else:
                print('# ::id {0}'.format(graph.n))
                for line in align_block:
                    if line.startswith('# ::tok') or line.startswith(
                            '# ::pos') or line.startswith('('):
                        print(line)
                print('\n'.join(
                    ['# ::action {0}'.format(action) for action in actions]))
            print()

            if opt.verbose:
                print(graph.n, file=sys.stderr)
                print('\n'.join(actions), file=sys.stderr, end='\n\n')
        except Exception:
            print(graph.n, file=sys.stderr)
            traceback.print_exc(file=sys.stderr)
def main():
    cmd = argparse.ArgumentParser(
        'Get the block that contains certain amr graph.')
    cmd.add_argument('-lexicon', help='the path to the alignment file.')
    cmd.add_argument('-data', help='the path to the alignment file.')
    cmd.add_argument('-keep_alignment_in_node',
                     default=False,
                     action='store_true',
                     help='')
    opt = cmd.parse_args()

    lexicon = {}
    for data in open(opt.lexicon, 'r').read().strip().split('\n\n'):
        lines = data.splitlines()
        assert len(lines) == 2
        lexicon[lines[0].strip()] = lines[1].strip()

    handler = AlignmentReader(opt.data)
    for block in handler:
        graph = Alignment(block)
        new_alignment = lexicon[graph.n]

        graph.alignments = Alignment._parse_alignment([new_alignment])
        graph.refill_alignment()

        for line in block:
            if line.startswith('#'):
                if line.startswith('# ::alignments'):
                    print(new_alignment)
                else:
                    if not opt.keep_alignment_in_node and line.startswith(
                            '# ::node'):
                        tokens = line.split()
                        level = tokens[2]
                        alignment = graph.get_node_by_level(level).alignment
                        print('# ::node\t{0}\t{1}\t{2}'.format(
                            tokens[2], tokens[3],
                            '{0}-{1}'.format(alignment[0], alignment[1])
                            if alignment else ''))
                    else:
                        print(line)

        print(graph.amr_graph, end='\n\n')
def align(opt):
    reader = AlignmentReader(opt.data)
    stemmer = Stemmer()
    matchers = [
        WordMatcher(),
        FuzzyWordMatcher(),
        FuzzySpanMatcher(),
        NamedEntityMatcher(),
        FuzzyNamedEntityMatcher(),
        DateEntityMatcher(),
        URLEntityMatcher(),
        OrdinalEntityMatcher(),
        MinusPolarityMatcher(),
        BelocatedAtMatcher(),
        TemporalQuantityMatcher()
    ]

    if opt.morpho_match:
        matchers.append(MorphosemanticLinkMatcher())

    if opt.semantic_match:
        matchers.append(SemanticWordMatcher(lower=not opt.cased))
        matchers.append(SemanticNamedEntityMatcher(lower=not opt.cased))

    updaters = [
        EntityTypeUpdater(),
        PersonOfUpdater(),
        QuantityUpdater(),
        PersonUpdater(),
        MinusPolarityPrefixUpdater(),
        RelativePositionUpdater(),
        DegreeUpdater(),
        HaveOrgRoleUpdater(),
        GovernmentOrganizationUpdater(),
        CauseUpdater(),
        ImperativeUpdater(),
        PossibleUpdater()
    ]

    unaligned_records = []
    oracle = Oracle(verbose=False)
    fpo = codecs.open(opt.output, 'w',
                      encoding='utf-8') if opt.output else sys.stdout
    for block in reader:
        graph = Alignment(block)
        if opt.verbose:
            print('Aligning {0}'.format(graph.n), file=sys.stderr)

        best_alignment = [(n.level, None, n.alignment[0], n.alignment[1])
                          for n in graph.true_nodes() if n.alignment]
        actions, states = oracle.parse(graph)
        pred_amr_graph = str(states.arcs_)
        baseline_f_score, baseline_n_actions = best_f_score, best_n_actions = \
            smatch(graph.amr_graph, pred_amr_graph), len(actions)

        words = graph.tok
        postags = graph.pos if hasattr(
            graph, 'pos') else [None for _ in range(len(words))]
        stemmed_words = [
            stemmer.stem(word, postag) for word, postag in zip(words, postags)
        ]

        results = AlignedResults()
        for matcher in matchers:
            matcher.match(words, stemmed_words, postags, graph, results)
        added = True
        while added:
            added = False
            for updater in updaters:
                added = added or updater.update(words, graph, results)

        unaligned = [(n.level, n.name) for n in graph.true_nodes()
                     if n.level not in results.levels_to_spans]
        if len(unaligned) > 0:
            unaligned_records.append((graph.n, unaligned))

        if opt.report_only:
            continue

        n_test = number_of_enumerate_alignment(results)
        if opt.verbose:
            print(' - Going to enumerate {0}'.format(n_test), file=sys.stderr)
        if not opt.improve_perfect and baseline_f_score == 1.:
            print(' - Best already achieved.', file=sys.stderr)
        elif n_test > opt.trials:
            print(' - Too many test!', file=sys.stderr)
        else:
            for alignment in enumerate_alignment(results):
                fill_alignment(graph, alignment)

                actions, states = oracle.parse(graph)
                pred_amr_graph = str(states.arcs_)
                pred_f_score, pred_n_actions = smatch(
                    graph.amr_graph, pred_amr_graph), len(actions)
                if pred_f_score > best_f_score or \
                        (pred_f_score == best_f_score and pred_n_actions < best_n_actions):
                    best_f_score = pred_f_score
                    best_n_actions = pred_n_actions
                    best_alignment = alignment[:]

        if opt.verbose:
            if best_f_score > baseline_f_score or \
                    (best_f_score == baseline_f_score and best_n_actions < baseline_n_actions):
                print(' - Better achieved!'.format(graph.n), file=sys.stderr)
            else:
                print(' - Stay the same.'.format(graph.n), file=sys.stderr)

        fill_alignment(graph, best_alignment)
        output = alignment_string(graph)
        now = datetime.datetime.now()
        output = '# ::alignments {0} ::annotator aligner3.py ::date {1} ::parser {2} ::smatch {3} ' \
                 '::n_actions {4}'.format(output, now, oracle.name, best_f_score, best_n_actions)
        if not opt.show_all:
            print(graph.n, file=fpo)
            print(output, end='\n\n', file=fpo)
        else:
            block = graph.block
            for line in block:
                if line.startswith("#"):
                    if line.startswith('# ::alignments'):
                        print(output, file=fpo)
                    elif line.startswith('# ::node'):
                        tokens = line.split()
                        level = tokens[2]
                        alignment = graph.get_node_by_level(level).alignment
                        print('# ::node\t{0}\t{1}\t{2}'.format(
                            tokens[2], tokens[3],
                            '{0}-{1}'.format(alignment[0], alignment[1])
                            if alignment else ''),
                              file=fpo)
                    else:
                        print(line, file=fpo)
                else:
                    print(graph.amr_graph, file=fpo, end='\n\n')
                    break

    dump_unaligned_records(unaligned_records)
def exclusively_align(opt):
    reader = AlignmentReader(opt.data)
    stemmer = Stemmer()
    matchers = [
        WordMatcher(),
        FuzzyWordMatcher(),
        FuzzySpanMatcher(),
        NamedEntityMatcher(),
        FuzzyNamedEntityMatcher(),
        DateEntityMatcher(),
        URLEntityMatcher(),
        OrdinalEntityMatcher(),
        MinusPolarityMatcher(),
        BelocatedAtMatcher(),
        TemporalQuantityMatcher()
    ]

    if opt.morpho_match:
        matchers.append(MorphosemanticLinkMatcher())

    if opt.semantic_match:
        matchers.append(SemanticWordMatcher(lower=not opt.cased))
        matchers.append(SemanticNamedEntityMatcher(lower=not opt.cased))

    updaters = [
        EntityTypeUpdater(),
        PersonOfUpdater(),
        QuantityUpdater(),
        PersonUpdater(),
        MinusPolarityPrefixUpdater(),
        RelativePositionUpdater(),
        DegreeUpdater(),
        HaveOrgRoleUpdater(),
        GovernmentOrganizationUpdater(),
        CauseUpdater(),
        ImperativeUpdater(),
        PossibleUpdater()
    ]

    unaligned_records = []
    fpo = codecs.open(opt.output, 'w',
                      encoding='utf-8') if opt.output else sys.stdout
    for block in reader:
        graph = Alignment(block)
        if opt.verbose:
            print('Aligning {0}'.format(graph.n), file=sys.stderr)

        words = graph.tok
        postags = graph.pos if hasattr(
            graph, 'pos') else [None for _ in range(len(words))]
        stemmed_words = [
            stemmer.stem(word, postag) for word, postag in zip(words, postags)
        ]

        results = AlignedResults(multiple=False)
        for matcher in matchers:
            matcher.match(words, stemmed_words, postags, graph, results)
        for updater in updaters:
            updater.update(words, graph, results)

        unaligned = [(n.level, n.name) for n in graph.true_nodes()
                     if n.level not in results.levels_to_spans]
        if len(unaligned) > 0:
            unaligned_records.append((graph.n, unaligned))

        if opt.report_only:
            continue

        n_test = number_of_enumerate_alignment(results)
        assert n_test == 1

        for alignment in enumerate_alignment(results):
            fill_alignment(graph, alignment)
            break

        output = alignment_string(graph)
        now = datetime.datetime.now()
        output = '# ::alignments {0} ::annotator aligner_v0.py ::date {1}'.format(
            output, now)
        if not opt.show_all:
            print(graph.n, file=fpo)
            print(output, end='\n\n', file=fpo)
        else:
            block = graph.block
            for line in block:
                if line.startswith("#"):
                    if line.startswith('# ::alignments'):
                        print(output, file=fpo)
                    elif line.startswith('# ::node'):
                        tokens = line.split()
                        level = tokens[2]
                        alignment = graph.get_node_by_level(level).alignment
                        print('# ::node\t{0}\t{1}\t{2}'.format(
                            tokens[2], tokens[3],
                            '{0}-{1}'.format(alignment[0], alignment[1])
                            if alignment else ''),
                              file=fpo)
                    else:
                        print(line, file=fpo)
                else:
                    print(graph.amr_graph, file=fpo, end='\n\n')
                    break