def __init__(self,
                 align_model='',
                 src_file='',
                 tg_file='',
                 tmp_dir=None,
                 context_size=1):
        if tmp_dir is None:
            tmp_dir = os.getcwd()
        try:
            os.makedirs(tmp_dir)
        except OSError as exc:  # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(tmp_dir):
                pass
            else:
                raise
        self.tmp_dir = tmp_dir

        self.model = ''

        # no alignment model
        if align_model == '':
            # if src_file and tg_file are not empty, it means that an alignment model needs to be trained
            # (self.model doesn't have to be defined, if context objects have alignments)
            if os.path.isfile(src_file) and os.path.isfile(tg_file):
                self.model = train_alignments(src_file, tg_file, self.tmp_dir)
        else:
            self.model = align_model
        self.context_size = context_size
    def __init__(self,
                 align_model=None,
                 src_file=None,
                 tg_file=None,
                 lex_prefix=None,
                 tmp_dir=None,
                 moses_dir=None,
                 moses_config=None,
                 workers=1):

        self.tmp_dir = mk_tmp_dir(tmp_dir)
        self.time_stamp = str(time.time())
        self.moses_dir = moses_dir
        self.moses_config = moses_config
        self.workers = workers
        self.lex_prob = lex_prefix

        if align_model is None:
            if src_file is not None and tg_file is not None:
                self.align_model = train_alignments(src_file,
                                                    tg_file,
                                                    tmp_dir,
                                                    align_model=align_model)
            else:
                print("Alignment model not defined, no files for training")
                return
        else:
            self.align_model = align_model
Esempio n. 3
0
def get_alignments(src_file,
                   tg_file,
                   trained_model=None,
                   src_train='',
                   tg_train='',
                   align_model='align_model',
                   label='alignments'):
    if trained_model is None:
        trained_model = train_alignments(src_train, tg_train, align_model)
        if trained_model == '':
            sys.stderr.write('No alignment model trained\n')
            return []

    aligner = Aligner(trained_model + '.fwd_params',
                      trained_model + '.fwd_err',
                      trained_model + '.rev_params',
                      trained_model + '.rev_err')
    src = open(src_file)
    tg = open(tg_file)
    align_file = src_file + '_' + os.path.basename(tg_file) + '.aligned'
    aligned = open(align_file, 'w')
    for src_line, tg_line in zip(src, tg):
        aligned.write(
            aligner.align(src_line[:-1].decode('utf-8') + u' ||| ' +
                          tg_line[:-1].decode('utf-8')) + u'\n')
    aligned.close()
    aligner.close()

    return (label, align_file)
    def __init__(self, align_model=None, src_file=None, tg_file=None, tmp_dir=None):

        tmp_dir = mk_tmp_dir(tmp_dir)

        if align_model is None:
            if src_file is not None and tg_file is not None:
                self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model)
            else:
                print("Alignment model not defined, no files for training")
                return
        else:
            self.align_model = align_model
    def __init__(self, lex_file, align_model=None, src_file=None, tg_file=None, tmp_dir=None):

        tmp_dir = mk_tmp_dir(tmp_dir)

        if align_model is None:
            if src_file is not None and tg_file is not None:
                self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model)
            else:
                print("Alignment model not defined, no files for training")
                return
        else:
            self.align_model = align_model
        self.lex_prob = self.get_align_prob(lex_file)
    def __init__(self, lex_file, align_model=None, src_file=None, tg_file=None, tmp_dir=None):

        tmp_dir = mk_tmp_dir(tmp_dir)
        self.tmp = tmp_dir

        if align_model is None:
            if src_file is not None and tg_file is not None:
                align_model = 'align_model'
                self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model)
            else:
                print("Alignment model not defined, no files for training")
                return
        else:
            self.align_model = align_model
        self.lex_prob = self.get_align_prob(lex_file)
    def __init__(self, align_model=None, src_file=None, tg_file=None, lex_prefix=None, tmp_dir=None, moses_dir=None, moses_config=None, workers=1):

        self.tmp_dir = mk_tmp_dir(tmp_dir)
        self.time_stamp = str(time.time())
        self.moses_dir = moses_dir
        self.moses_config = moses_config
        self.workers = workers
        self.lex_prob = lex_prefix

        if align_model is None:
            if src_file is not None and tg_file is not None:
                self.align_model = train_alignments(src_file, tg_file, tmp_dir, align_model=align_model)
            else:
                print("Alignment model not defined, no files for training")
                return
        else:
            self.align_model = align_model
Esempio n. 8
0
def get_alignments(src_file, tg_file, trained_model=None, src_train='', tg_train='', align_model='align_model', label='alignments'):
    if trained_model is None:
        trained_model = train_alignments(src_train, tg_train, align_model)
        if trained_model == '':
            sys.stderr.write('No alignment model trained\n')
            return []

    aligner = Aligner(trained_model+'.fwd_params', trained_model+'.fwd_err', trained_model+'.rev_params', trained_model+'.rev_err')
    src = open(src_file)
    tg = open(tg_file)
    align_file = src_file+'_'+os.path.basename(tg_file)+'.aligned'
    aligned = open(align_file, 'w')
    for src_line, tg_line in zip(src, tg):
        aligned.write(aligner.align(src_line[:-1].decode('utf-8')+u' ||| '+tg_line[:-1].decode('utf-8'))+u'\n')
    aligned.close()
    aligner.close()

    return (label, align_file)
    def __init__(self, align_model='', src_file='', tg_file='', tmp_dir=None):
        if tmp_dir is None:
            tmp_dir = os.getcwd()
        try:
            os.makedirs(tmp_dir)
        except OSError as exc:  # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(tmp_dir):
                pass
            else:
                raise
        self.tmp_dir = tmp_dir

        self.model = ''

        # no alignment model
        if align_model == '':
            # if src_file and tg_file are not empty, it means that an alignment model needs to be trained
            # (self.model doesn't have to be defined, if context objects have alignments)
            if os.path.isfile(src_file) and os.path.isfile(tg_file):
                self.model = train_alignments(src_file, tg_file, self.tmp_dir)
            else:
                self.model = align_model