Beispiel #1
0
 def make_expr(self, init_state=None, tries=10, test_output=True, skip_re=r"", probability=False):
     found = False
     for _ in range(tries):
         if init_state:
             init_state = unicodedata.normalize("NFD", init_state)
             prefix = init_state.strip(BEGIN)
             init_state = init_state.rjust(self.state_size, BEGIN)[-self.state_size:]
         else:
             prefix = ''
         try:
             if probability:
                 expr, prob = self.chain.walk(init_state, probability)
                 expr = prefix + expr
             else:
                 expr = prefix + self.chain.walk(init_state, probability)
         except KeyError:
             expr, prob = "", 0
         if test_output:
             if self.test_expr_output(expr):
                 if skip_re:
                     if not re.search(unicodedata.normalize("NFD", skip_re), expr):
                         found = True
                 else:
                     found = True
         else:
             found = True
         if found:
             if probability:
                 return expr, prob
             else:
                 return expr
Beispiel #2
0
 def build(self, corpus, state_size):
     model = {}
     model = defaultdict(lambda: defaultdict(int))
     for run, score in corpus:
         norm_run = unicodedata.normalize("NFD", run)
         items = (BEGIN * state_size) + norm_run + END
         for i in range(len(norm_run) + 1):
             state = items[i:i+state_size]
             follow = items[i+state_size]
             model[state][follow] += score
     model = dict({k: dict(model[k]) for k in model})
     return model
Beispiel #3
0
 def expr_prob(self, expr):
     prepped_expr = BEGIN * self.state_size + unicodedata.normalize("NFD", expr) + END
     output = 1
     for i in range(len(expr) + 1):
         output *= self.chain.prob(prepped_expr[i:i+self.state_size], prepped_expr[i+self.state_size])
     return output
Beispiel #4
0
 def __init__(self, uid, state_size, expr_score_list, chain=None):
     self.uid = uid
     self.state_size = state_size
     self.expr_set = {unicodedata.normalize("NFD", ex[0]) for ex in expr_score_list}
     self.chain = chain or PLChain(expr_score_list, state_size)
 def test_bug_834676(self):
     # Check for bug 834676
     unicodedata.normalize('NFC', '\ud55c\uae00')
 def test_edge_cases(self):
     self.assertRaises(TypeError, unicodedata.normalize)
     self.assertRaises(ValueError, unicodedata.normalize, 'unknown', 'xx')
     self.assertEqual(unicodedata.normalize('NFKC', ''), '')
 def NFKD(str):
     return unicodedata.normalize("NFKD", str)
 def NFC(str):
     return unicodedata.normalize("NFC", str)