def binarise_model(working_path, l_lang, l_direct, l_orient, l_model, tconf, d): '''This function binarises the phrase and reoridering tables. Binarising them speeds up loading the decoder, though doesn't actually speed up decoding sentences :param string working_path: the path to the working directory :param string l_lang: reordering language setting, either f or fe :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright :param string l_model: reordering modeltype setting, either wbe, phrase, or hier :param config tconf: translate configuration :param dict d: output dictionary ''' with Timer(d, 'binarise', lg=logger): binarised_model_path = os.path.join(working_path, 'binarised-model') os.makedirs(binarised_model_path) command("{0}/bin/processPhraseTable -ttable 0 0 {1}/train/model/{2}.gz -nscores 5 -out {1}/binarised-model/phrase-table".format(tconf.paths.moses, working_path, tconf.settings.phrase_table_name), logger=logger, capture=True) command("{0}/bin/processLexicalTable -in {1}/train/model/{6}.{2}-{3}-{4}-{5}.gz -out {1}/binarised-model/reordering-table".format(tconf.paths.moses, working_path, l_model, l_orient, l_direct, l_lang, tconf.settings.reordering_name), logger=logger, capture=True) copy_always(os.path.join(working_path, 'mert-work', 'moses.ini'), os.path.join(binarised_model_path, 'moses.ini')) sub_dict = (re.compile(r'PhraseDictionaryMemory'), 'PhraseDictionaryBinary') mosesini = os.path.join(working_path, 'binarised-model', 'moses.ini') logger.info(mosesini) munge_page(mosesini, sub_dict) phrase_table_path = os.path.join('train', 'model', tconf.settings.phrase_table_name) + '.gz' sub_table = (re.compile(phrase_table_path), 'binarised-model/phrase-table') munge_page(mosesini, sub_table)
def _process_page(fn, output_fn, regex, copy, builder): tmp_fn = fn + '~' munge_page(fn=fn, out_fn=tmp_fn, regex=regex) cp_args = dict(source_file=tmp_fn, target_file=output_fn, name=builder) if copy == 'always': copy_always(**cp_args) else: copy_if_needed(**cp_args)