def __init__(self, align_model='', src_file='', tg_file='', tmp_dir=None, context_size=1): if tmp_dir is None: tmp_dir = os.getcwd() try: os.makedirs(tmp_dir) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(tmp_dir): pass else: raise self.tmp_dir = tmp_dir self.model = '' # no alignment model if align_model == '': # if src_file and tg_file are not empty, it means that an alignment model needs to be trained # (self.model doesn't have to be defined, if context objects have alignments) if os.path.isfile(src_file) and os.path.isfile(tg_file): self.model = train_alignments(src_file, tg_file, self.tmp_dir) else: self.model = align_model self.context_size = context_size
def __init__(self, align_model=None, src_file=None, tg_file=None, lex_prefix=None, tmp_dir=None, moses_dir=None, moses_config=None, workers=1): self.tmp_dir = mk_tmp_dir(tmp_dir) self.time_stamp = str(time.time()) self.moses_dir = moses_dir self.moses_config = moses_config self.workers = workers self.lex_prob = lex_prefix if align_model is None: if src_file is not None and tg_file is not None: self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model) else: print("Alignment model not defined, no files for training") return else: self.align_model = align_model
def get_alignments(src_file, tg_file, trained_model=None, src_train='', tg_train='', align_model='align_model', label='alignments'): if trained_model is None: trained_model = train_alignments(src_train, tg_train, align_model) if trained_model == '': sys.stderr.write('No alignment model trained\n') return [] aligner = Aligner(trained_model + '.fwd_params', trained_model + '.fwd_err', trained_model + '.rev_params', trained_model + '.rev_err') src = open(src_file) tg = open(tg_file) align_file = src_file + '_' + os.path.basename(tg_file) + '.aligned' aligned = open(align_file, 'w') for src_line, tg_line in zip(src, tg): aligned.write( aligner.align(src_line[:-1].decode('utf-8') + u' ||| ' + tg_line[:-1].decode('utf-8')) + u'\n') aligned.close() aligner.close() return (label, align_file)
def __init__(self, align_model=None, src_file=None, tg_file=None, tmp_dir=None): tmp_dir = mk_tmp_dir(tmp_dir) if align_model is None: if src_file is not None and tg_file is not None: self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model) else: print("Alignment model not defined, no files for training") return else: self.align_model = align_model
def __init__(self, lex_file, align_model=None, src_file=None, tg_file=None, tmp_dir=None): tmp_dir = mk_tmp_dir(tmp_dir) if align_model is None: if src_file is not None and tg_file is not None: self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model) else: print("Alignment model not defined, no files for training") return else: self.align_model = align_model self.lex_prob = self.get_align_prob(lex_file)
def __init__(self, lex_file, align_model=None, src_file=None, tg_file=None, tmp_dir=None): tmp_dir = mk_tmp_dir(tmp_dir) self.tmp = tmp_dir if align_model is None: if src_file is not None and tg_file is not None: align_model = 'align_model' self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model) else: print("Alignment model not defined, no files for training") return else: self.align_model = align_model self.lex_prob = self.get_align_prob(lex_file)
def get_alignments(src_file, tg_file, trained_model=None, src_train='', tg_train='', align_model='align_model', label='alignments'): if trained_model is None: trained_model = train_alignments(src_train, tg_train, align_model) if trained_model == '': sys.stderr.write('No alignment model trained\n') return [] aligner = Aligner(trained_model+'.fwd_params', trained_model+'.fwd_err', trained_model+'.rev_params', trained_model+'.rev_err') src = open(src_file) tg = open(tg_file) align_file = src_file+'_'+os.path.basename(tg_file)+'.aligned' aligned = open(align_file, 'w') for src_line, tg_line in zip(src, tg): aligned.write(aligner.align(src_line[:-1].decode('utf-8')+u' ||| '+tg_line[:-1].decode('utf-8'))+u'\n') aligned.close() aligner.close() return (label, align_file)
def __init__(self, align_model='', src_file='', tg_file='', tmp_dir=None): if tmp_dir is None: tmp_dir = os.getcwd() try: os.makedirs(tmp_dir) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(tmp_dir): pass else: raise self.tmp_dir = tmp_dir self.model = '' # no alignment model if align_model == '': # if src_file and tg_file are not empty, it means that an alignment model needs to be trained # (self.model doesn't have to be defined, if context objects have alignments) if os.path.isfile(src_file) and os.path.isfile(tg_file): self.model = train_alignments(src_file, tg_file, self.tmp_dir) else: self.model = align_model