def run(self, config, from_file=False): working_dir = os.path.expanduser(config.get('Data', 'working_dir')) tgt_path = working_dir + '/' + 'tgt.parse' ref_path = working_dir + '/' + 'ref.parse' if os.path.exists(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out'): print("Alignments already exist.\n Aligner will not run.") return targets = StanfordParseLoader.parsed_sentences(tgt_path) references = StanfordParseLoader.parsed_sentences(ref_path) aligner = AlignerStanford('english') alignments = [] for i, sentence in enumerate(targets): alignments.append(aligner.align(sentence, references[i])) output = codecs.open(os.path.expanduser(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out'), 'w', 'utf-8') for i, alignment in enumerate(alignments): print('Sentence #' + str(i + 1), file=output) for a in sorted(alignment[0], key=lambda x: x[0]): output.write('[' + str(targets[i][a[0] - 1].index) + ', ' + str(references[i][a[1] - 1].index) + ']' + ' : ' + '[' + targets[i][a[0] - 1].form + ', ' + references[i][a[1] - 1].form + ']' + ' : ' + alignment[1][(a[0], a[1])] + '\n') output.write('\n') output.close()
def get(self, config, from_file=False): working_dir = os.path.expanduser(config.get('Data', 'working_dir')) result_tgt = StanfordParseLoader.parsed_sentences(working_dir + '/' + 'tgt.parse') result_ref = StanfordParseLoader.parsed_sentences(working_dir + '/' + 'ref.parse') AbstractProcessor.set_result_tgt(self, result_tgt) AbstractProcessor.set_result_ref(self, result_ref)
def run(self, config, from_file=False): working_dir = os.path.expanduser(config.get('Data', 'working_dir')) tgt_path = working_dir + '/' + 'tgt.parse' ref_path = working_dir + '/' + 'ref.parse' if os.path.exists(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford-context-diff.out'): print("Context difference already compiled.\n Context difference compiler will not run.") return reader = CobaltAlignReader() alignment_result = reader.read(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out') targets = StanfordParseLoader.parsed_sentences(tgt_path) references = StanfordParseLoader.parsed_sentences(ref_path) compiler = ContextInfoCompiler('english') info = [] for i, sentence in enumerate(targets): info.append(compiler.compile_context_info(sentence, references[i], alignment_result[i][0])) output = codecs.open(os.path.expanduser(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford-context-diff.out'), 'w', 'utf-8') for i, context_info in enumerate(info): print('Sentence #' + str(i + 1), file=output) for j, a in enumerate(alignment_result[i][0]): output.write('[' + str(targets[i][a[0] - 1].index) + ', ' + str(references[i][a[1] - 1].index) + ']' + ' : ') output.write('[' + targets[i][a[0] - 1].form + ', ' + references[i][a[1] - 1].form + ']' + ' : ') output.write(alignment_result[i][2][j] + ' : ') output.write('srcDiff=' + ','.join(context_info[j]['srcDiff']) + ';') output.write('srcCon=' + ','.join(context_info[j]['srcCon']) + ';') output.write('tgtDiff=' + ','.join(context_info[j]['tgtDiff']) + ';') output.write('tgtCon=' + ','.join(context_info[j]['tgtCon']) + '\n') output.write('\n') output.close()
import codecs from utils.meteor_align_reader import MeteorAlignReader from utils.stanford_format import StanfordParseLoader from alignment.context_evidence import ContextEvidence parsed_target = StanfordParseLoader.parsed_sentences('data_test/tgt.parse') parsed_ref = StanfordParseLoader.parsed_sentences('data_test/ref.parse') meteor_alignments = MeteorAlignReader.read('data_test/tgt.meteor-align.out') alignments = MeteorAlignReader.alignments(meteor_alignments) context = ContextEvidence() for i, alignment in enumerate(alignments): for word_pair in alignment: word_pair.context_difference = context.context_differences(word_pair.left_word, word_pair.right_word, parsed_target[i], parsed_ref[i], meteor_alignments[i][0])