Example #1
0
def binarise_model(working_path, l_lang, l_direct, l_orient, l_model, tconf, d):
    '''This function binarises the phrase and reoridering tables.
    Binarising them speeds up loading the decoder, though doesn't actually speed up decoding sentences

    :param string working_path: the path to the working directory
    :param string l_lang: reordering language setting, either f or fe
    :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional
    :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright
    :param string l_model: reordering modeltype setting, either wbe, phrase, or hier
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'binarise', lg=logger):
        binarised_model_path = os.path.join(working_path, 'binarised-model')
        os.makedirs(binarised_model_path)
        command("{0}/bin/processPhraseTable  -ttable 0 0 {1}/train/model/{2}.gz -nscores 5 -out {1}/binarised-model/phrase-table".format(tconf.paths.moses, working_path, tconf.settings.phrase_table_name), logger=logger, capture=True)
        command("{0}/bin/processLexicalTable -in {1}/train/model/{6}.{2}-{3}-{4}-{5}.gz -out {1}/binarised-model/reordering-table".format(tconf.paths.moses, working_path, l_model, l_orient, l_direct, l_lang, tconf.settings.reordering_name), logger=logger, capture=True)

        copy_always(os.path.join(working_path, 'mert-work', 'moses.ini'),
                    os.path.join(binarised_model_path, 'moses.ini'))

        sub_dict = (re.compile(r'PhraseDictionaryMemory'), 'PhraseDictionaryBinary')
        mosesini = os.path.join(working_path, 'binarised-model', 'moses.ini')
        logger.info(mosesini)
        munge_page(mosesini, sub_dict)
        phrase_table_path = os.path.join('train', 'model', tconf.settings.phrase_table_name) + '.gz'
        sub_table = (re.compile(phrase_table_path), 'binarised-model/phrase-table')
        munge_page(mosesini, sub_table)
Example #2
0
def binarise_model(working_path, l_lang, l_direct, l_orient, l_model, tconf, d):
    '''This function binarises the phrase and reoridering tables.
    Binarising them speeds up loading the decoder, though doesn't actually speed up decoding sentences

    :param string working_path: the path to the working directory
    :param string l_lang: reordering language setting, either f or fe
    :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional
    :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright
    :param string l_model: reordering modeltype setting, either wbe, phrase, or hier
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'binarise', lg=logger):
        binarised_model_path = os.path.join(working_path, 'binarised-model')
        os.makedirs(binarised_model_path)
        command("{0}/bin/processPhraseTable  -ttable 0 0 {1}/train/model/{2}.gz -nscores 5 -out {1}/binarised-model/phrase-table".format(tconf.paths.moses, working_path, tconf.settings.phrase_table_name), logger=logger, capture=True)
        command("{0}/bin/processLexicalTable -in {1}/train/model/{6}.{2}-{3}-{4}-{5}.gz -out {1}/binarised-model/reordering-table".format(tconf.paths.moses, working_path, l_model, l_orient, l_direct, l_lang, tconf.settings.reordering_name), logger=logger, capture=True)

        copy_always(os.path.join(working_path, 'mert-work', 'moses.ini'),
                    os.path.join(binarised_model_path, 'moses.ini'))

        sub_dict = (re.compile(r'PhraseDictionaryMemory'), 'PhraseDictionaryBinary')
        mosesini = os.path.join(working_path, 'binarised-model', 'moses.ini')
        logger.info(mosesini)
        munge_page(mosesini, sub_dict)
        phrase_table_path = os.path.join('train', 'model', tconf.settings.phrase_table_name) + '.gz'
        sub_table = (re.compile(phrase_table_path), 'binarised-model/phrase-table')
        munge_page(mosesini, sub_table)
def _process_page(fn, output_fn, regex, copy, builder):
    tmp_fn = fn + '~'

    munge_page(fn=fn, out_fn=tmp_fn, regex=regex)

    cp_args = dict(source_file=tmp_fn, target_file=output_fn, name=builder)

    if copy == 'always':
        copy_always(**cp_args)
    else:
        copy_if_needed(**cp_args)
Example #4
0
def _process_page(fn, output_fn, regex, copy, builder):
    tmp_fn = fn + '~'

    munge_page(fn=fn, out_fn=tmp_fn, regex=regex)

    cp_args = dict(source_file=tmp_fn,
                   target_file=output_fn,
                   name=builder)

    if copy == 'always':
        copy_always(**cp_args)
    else:
        copy_if_needed(**cp_args)