예제 #1
0
 def _get_initial_hypos(self):
     """Get the list of initial ``PartialHypothesis``. """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     init_hypo = PartialHypothesis(self.get_predictor_states())
     init_hypo.fst_node = self._find_start_node()
     return [init_hypo]
예제 #2
0
 def __init__(self, path, slave_predictor):
     """Constructor for the fsttok wrapper
     
     Args:
         path (string): Path to an FST which transduces characters 
                        to predictor tokens
         slave_predictor (Predictor): Wrapped predictor
     """
     super(FSTTokPredictor, self).__init__()
     self.max_pending_score = 5.0 # TODO: Add to config
     self.slave_predictor = slave_predictor
     if isinstance(slave_predictor, UnboundedVocabularyPredictor):
         logging.fatal("fsttok cannot wrap an unbounded "
                       "vocabulary predictor.")
     self.trans_fst = utils.load_fst(path)
예제 #3
0
 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     self.cur_nodes = []
     if self.cur_fst:
         self.cur_nodes = self._follow_eps({self.cur_fst.start(): 0.0})
     self.consume(utils.GO_ID)
     if not self.cur_nodes:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths start with the begin-of-"
                      "sentence symbol." % (self.current_sen_id + 1))
예제 #4
0
파일: automata.py 프로젝트: ucam-smt/sgnmt
 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(utils.get_path(self.fst_path,
                                            self.current_sen_id+1))
     self.cur_nodes = []
     if self.cur_fst:
         self.cur_nodes = self._follow_eps({self.cur_fst.start(): 0.0})
     self.consume(utils.GO_ID)
     if not self.cur_nodes:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths start with the begin-of-"
                      "sentence symbol." % (self.current_sen_id+1))
예제 #5
0
 def __init__(self, path):
     """Loads subword->char FST, determinizes and minimizes it.
     
     Args:
         path (string): Path to an FST from subword unit to char
                        sequence
     """
     self.token2char_fst = utils.load_fst(path)
     self.token2char_fst.rmepsilon()
     self.token2char_fst.determinize()
     self.token2char_fst.minimize()
     self.word_begin_tokens = {arc.ilabel: True 
         for arc in self.token2char_fst.arcs(self.token2char_fst.start())}
     self.char2token_fst = fst.Fst(self.token2char_fst)
     self.char2token_fst.invert()
     self.cmap = dict(utils.trg_cmap)
     self.cmap[" "] = self.cmap["</w>"]
     del self.cmap["</w>"]
     self.inv_cmap = {(i,c) for c,i in self.cmap.iteritems()}
예제 #6
0
 def __init__(self, path, fst_unk_id, max_pending_score, slave_predictor):
     """Constructor for the fsttok wrapper
     
     Args:
         path (string): Path to an FST which transduces characters 
                        to predictor tokens
         fst_unk_id (int): ID used to represent UNK in the FSTs
                           (usually 999999998)
         max_pending_score (float): Maximum pending score in a
                                    ``CombinedState`` instance.
         slave_predictor (Predictor): Wrapped predictor
     """
     super(FSTTokPredictor, self).__init__()
     self.max_pending_score = max_pending_score
     self.fst_unk_id = fst_unk_id
     self.slave_predictor = slave_predictor
     if isinstance(slave_predictor, UnboundedVocabularyPredictor):
         logging.fatal("fsttok cannot wrap an unbounded "
                       "vocabulary predictor.")
     self.trans_fst = utils.load_fst(path)
예제 #7
0
 def __init__(self, path, fst_unk_id, max_pending_score, slave_predictor):
     """Constructor for the fsttok wrapper
     
     Args:
         path (string): Path to an FST which transduces characters 
                        to predictor tokens
         fst_unk_id (int): ID used to represent UNK in the FSTs
                           (usually 999999998)
         max_pending_score (float): Maximum pending score in a
                                    ``CombinedState`` instance.
         slave_predictor (Predictor): Wrapped predictor
     """
     super(FSTTokPredictor, self).__init__()
     self.max_pending_score = max_pending_score 
     self.fst_unk_id = fst_unk_id
     self.slave_predictor = slave_predictor
     if isinstance(slave_predictor, UnboundedVocabularyPredictor):
         logging.fatal("fsttok cannot wrap an unbounded "
                       "vocabulary predictor.")
     self.trans_fst = utils.load_fst(path)
예제 #8
0
파일: automata.py 프로젝트: ucam-smt/sgnmt
 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(utils.get_path(self.fst_path,
                                            self.current_sen_id+1))
     self.cur_node = self.cur_fst.start() if self.cur_fst else None
     self.bos_score = self.consume(utils.GO_ID)
     if not self.bos_score: # Override None
         self.bos_score = 0.0
     if self.cur_node is None:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths contain the begin-of-"
                      "sentence symbol %d. If you are using a different "
                      "begin-of-sentence symbol, double-check --indexing_"
                      "scheme." % (self.current_sen_id+1, utils.GO_ID))
예제 #9
0
 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     self.cur_node = self.cur_fst.start() if self.cur_fst else None
     self.bos_score = self.consume(utils.GO_ID)
     if not self.bos_score:  # Override None
         self.bos_score = 0.0
     if self.cur_node is None:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths the begin-of-"
                      "sentence symbol %d. If you are using a different "
                      "begin-of-sentence symbol, double-check --indexing_"
                      "scheme." % (self.current_sen_id + 1, utils.GO_ID))
예제 #10
0
 def __init__(self, path):
     """Loads subword->char FST, determinizes and minimizes it.
     
     Args:
         path (string): Path to an FST from subword unit to char
                        sequence
     """
     self.token2char_fst = utils.load_fst(path)
     self.token2char_fst.rmepsilon()
     self.token2char_fst.determinize()
     self.token2char_fst.minimize()
     self.word_begin_tokens = {
         arc.ilabel: True
         for arc in self.token2char_fst.arcs(self.token2char_fst.start())
     }
     self.char2token_fst = fst.Fst(self.token2char_fst)
     self.char2token_fst.invert()
     self.cmap = dict(utils.trg_cmap)
     self.cmap[" "] = self.cmap["</w>"]
     del self.cmap["</w>"]
     self.inv_cmap = {(i, c) for c, i in self.cmap.iteritems()}