コード例 #1
0
ファイル: processors.py プロジェクト: mfomicheva/metric-dev
    def run(self, config, from_file=False):
        working_dir = os.path.expanduser(config.get('Data', 'working_dir'))
        tgt_path = working_dir + '/' + 'tgt.parse'
        ref_path = working_dir + '/' + 'ref.parse'

        if os.path.exists(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out'):
            print("Alignments already exist.\n Aligner will not run.")
            return

        targets = StanfordParseLoader.parsed_sentences(tgt_path)
        references = StanfordParseLoader.parsed_sentences(ref_path)

        aligner = AlignerStanford('english')
        alignments = []

        for i, sentence in enumerate(targets):
            alignments.append(aligner.align(sentence, references[i]))

        output = codecs.open(os.path.expanduser(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out'), 'w', 'utf-8')

        for i, alignment in enumerate(alignments):
            print('Sentence #' + str(i + 1), file=output)

            for a in sorted(alignment[0], key=lambda x: x[0]):
                output.write('[' + str(targets[i][a[0] - 1].index) + ', ' + str(references[i][a[1] - 1].index) + ']' + ' : ' +
                             '[' + targets[i][a[0] - 1].form + ', ' + references[i][a[1] - 1].form + ']' + ' : ' +
                             alignment[1][(a[0], a[1])] + '\n')

            output.write('\n')
        output.close()
コード例 #2
0
ファイル: processors.py プロジェクト: mfomicheva/metric-dev
    def get(self, config, from_file=False):

        working_dir = os.path.expanduser(config.get('Data', 'working_dir'))

        result_tgt = StanfordParseLoader.parsed_sentences(working_dir + '/' + 'tgt.parse')
        result_ref = StanfordParseLoader.parsed_sentences(working_dir + '/' + 'ref.parse')

        AbstractProcessor.set_result_tgt(self, result_tgt)
        AbstractProcessor.set_result_ref(self, result_ref)
コード例 #3
0
ファイル: processors.py プロジェクト: mfomicheva/metric-dev
    def run(self, config, from_file=False):
        working_dir = os.path.expanduser(config.get('Data', 'working_dir'))
        tgt_path = working_dir + '/' + 'tgt.parse'
        ref_path = working_dir + '/' + 'ref.parse'

        if os.path.exists(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford-context-diff.out'):
            print("Context difference already compiled.\n Context difference compiler will not run.")
            return

        reader = CobaltAlignReader()

        alignment_result = reader.read(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out')
        targets = StanfordParseLoader.parsed_sentences(tgt_path)
        references = StanfordParseLoader.parsed_sentences(ref_path)

        compiler = ContextInfoCompiler('english')
        info = []

        for i, sentence in enumerate(targets):
            info.append(compiler.compile_context_info(sentence, references[i], alignment_result[i][0]))

        output = codecs.open(os.path.expanduser(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford-context-diff.out'), 'w', 'utf-8')

        for i, context_info in enumerate(info):
            print('Sentence #' + str(i + 1), file=output)

            for j, a in enumerate(alignment_result[i][0]):
                output.write('[' + str(targets[i][a[0] - 1].index) + ', ' + str(references[i][a[1] - 1].index) + ']' + ' : ')
                output.write('[' + targets[i][a[0] - 1].form + ', ' + references[i][a[1] - 1].form + ']' + ' : ')
                output.write(alignment_result[i][2][j] + ' : ')
                output.write('srcDiff=' + ','.join(context_info[j]['srcDiff']) + ';')
                output.write('srcCon=' + ','.join(context_info[j]['srcCon']) + ';')
                output.write('tgtDiff=' + ','.join(context_info[j]['tgtDiff']) + ';')
                output.write('tgtCon=' + ','.join(context_info[j]['tgtCon']) + '\n')

            output.write('\n')
        output.close()
コード例 #4
0
ファイル: test_context.py プロジェクト: mfomicheva/metric-dev
import codecs

from utils.meteor_align_reader import MeteorAlignReader
from utils.stanford_format import StanfordParseLoader
from alignment.context_evidence import ContextEvidence


parsed_target = StanfordParseLoader.parsed_sentences('data_test/tgt.parse')
parsed_ref = StanfordParseLoader.parsed_sentences('data_test/ref.parse')

meteor_alignments = MeteorAlignReader.read('data_test/tgt.meteor-align.out')
alignments = MeteorAlignReader.alignments(meteor_alignments)

context = ContextEvidence()

for i, alignment in enumerate(alignments):
    for word_pair in alignment:
        word_pair.context_difference = context.context_differences(word_pair.left_word,
                                                                   word_pair.right_word,
                                                                   parsed_target[i],
                                                                   parsed_ref[i],
                                                                   meteor_alignments[i][0])