コード例 #1
0
 def __init__(self, config, weights):
     cdec_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     decoder = os.path.join(cdec_root, 'decoder', 'cdec')
     decoder_cmd = [decoder, '-c', config, '-w', weights]
     logger.info('Executing: {}'.format(' '.join(decoder_cmd)))
     self.decoder = util.popen_io(decoder_cmd)
     self.lock = util.FIFOLock()
コード例 #2
0
 def __init__(self, config, weights, metric='ibm_bleu'):
     cdec_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     mira = os.path.join(cdec_root, 'training', 'mira', 'kbest_cut_mira')
     #                                              optimizer=2 step=0.001    best=500,    k=500,       uniq, stream, metric
     mira_cmd = [mira, '-c', config, '-w', weights, '-o', '2', '-C', '0.001', '-b', '500', '-k', '500', '-u', '-t', '-m', metric]
     logger.info('Executing: {}'.format(' '.join(mira_cmd)))
     self.decoder = util.popen_io(mira_cmd)
     self.lock = util.FIFOLock()
コード例 #3
0
ファイル: rt.py プロジェクト: wilkeraziz/cdec
 def __init__(self, config):
     # Make sure pycdec is on PYTHONPATH
     cdec_root = os.path.dirname(
         os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     pycdec = os.path.join(cdec_root, 'python')
     env = os.environ.copy()
     python_path = env.get('PYTHONPATH', '')
     if 'cdec/python' not in python_path:
         python_path = '{}:{}'.format(
             python_path, pycdec) if len(python_path) > 0 else pycdec
         env['PYTHONPATH'] = python_path
     # Start grammar extractor as separate process using stdio
     cmd = [
         'python', '-m', 'cdec.sa.extract', '-o', '-z', '-c', config, '-t'
     ]
     logger.info('Executing: {}'.format(' '.join(cmd)))
     self.p = subprocess.Popen(cmd,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               env=env)
     util.consume_stream(self.p.stderr)
     self.lock = util.FIFOLock()
コード例 #4
0
    def __init__(self,
                 fwd_params,
                 fwd_err,
                 rev_params,
                 rev_err,
                 heuristic='grow-diag-final-and'):

        cdec_root = os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
        fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align')
        atools = os.path.join(cdec_root, 'utils', 'atools')

        (fwd_T, fwd_m) = self.read_err(fwd_err)
        (rev_T, rev_m) = self.read_err(rev_err)

        fwd_cmd = [
            fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f',
            fwd_params
        ]
        rev_cmd = [
            fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f',
            rev_params, '-r'
        ]
        tools_cmd = [atools, '-i', '-', '-j', '-', '-c', heuristic]

        logger.info('Executing: {}'.format(' '.join(fwd_cmd)))
        self.fwd_align = util.popen_io(fwd_cmd)

        logger.info('Executing: {}'.format(' '.join(rev_cmd)))
        self.rev_align = util.popen_io(rev_cmd)

        logger.info('Executing: {}'.format(' '.join(tools_cmd)))
        self.tools = util.popen_io(tools_cmd)

        # Used to guarantee thread safety
        self.lock = util.FIFOLock()
コード例 #5
0
ファイル: rt.py プロジェクト: wilkeraziz/cdec
    def __init__(self, configdir, tmpdir='/tmp', cache_size=5, norm=False):

        # name -> (method, set of possible nargs)
        self.COMMANDS = {
            'TR': (self.translate, set((1, ))),
            'LEARN': (self.learn, set((2, ))),
            'SAVE': (self.save_state, set((0, 1))),
            'LOAD': (self.load_state, set((0, 1))),
            'DROP': (self.drop_ctx, set((0, ))),
            'LIST': (self.list_ctx, set((0, ))),
        }

        cdec_root = os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

        # rt.ini options
        ini = dict(line.strip().split('=')
                   for line in open(os.path.join(configdir, 'rt.ini')))
        self.hpyplm = (ini.get('hpyplm', 'false') in TRUE)
        self.metric = ini.get('metric', 'ibm_bleu')

        ### Single instance for all contexts

        self.config = configdir
        # Temporary work dir
        self.tmp = tempfile.mkdtemp(dir=tmpdir, prefix='realtime.')
        logger.info('Using temp dir {}'.format(self.tmp))

        # Normalization
        self.norm = norm
        if self.norm:
            self.tokenizer = util.popen_io([
                os.path.join(cdec_root, 'corpus', 'tokenize-anything.sh'), '-u'
            ])
            self.tokenizer_lock = util.FIFOLock()
            self.detokenizer = util.popen_io(
                [os.path.join(cdec_root, 'corpus', 'untok.pl')])
            self.detokenizer_lock = util.FIFOLock()

        # Word aligner
        fwd_params = os.path.join(configdir, 'a.fwd_params')
        fwd_err = os.path.join(configdir, 'a.fwd_err')
        rev_params = os.path.join(configdir, 'a.rev_params')
        rev_err = os.path.join(configdir, 'a.rev_err')
        self.aligner = aligner.ForceAligner(fwd_params, fwd_err, rev_params,
                                            rev_err)

        # Grammar extractor
        sa_config = cdec.configobj.ConfigObj(os.path.join(configdir, 'sa.ini'),
                                             unrepr=True)
        sa_config.filename = os.path.join(self.tmp, 'sa.ini')
        util.sa_ini_for_realtime(sa_config, os.path.abspath(configdir))
        sa_config.write()
        self.extractor = ExtractorWrapper(sa_config.filename)
        self.cache_size = cache_size

        ### One instance per context

        self.ctx_names = set()
        # All context-dependent operations are atomic
        self.ctx_locks = collections.defaultdict(util.FIFOLock)
        # ctx -> list of (source, target, alignment)
        self.ctx_data = {}

        # Grammar extractor is not threadsafe
        self.extractor_lock = util.FIFOLock()
        # ctx -> deque of file
        self.grammar_files = {}
        # ctx -> dict of {sentence: file}
        self.grammar_dict = {}

        self.decoders = {}