コード例 #1
0
ファイル: m2.py プロジェクト: duanzhenchun/mylab
 def __init__(self, source, target, t=None):
     super(M2, self).__init__(source, target, t)
     self.a = defaultdict(float)
     for (e, f) in m1.bitext(self.source, self.target):
         l, m = len(e), len(f)
         for j in range(m):
             tmp = 1.0 / l
             for i in range(l):
                 self.a[i, j, m, l] = tmp
コード例 #2
0
ファイル: m2.py プロジェクト: duanzhenchun/mylab
 def EM(self, delta_threshold):
     # E step:
     c_fe = defaultdict(float)
     c_e = defaultdict(float)
     c_ijml = defaultdict(float)
     c_jml = defaultdict(float)
     for (e, f) in m1.bitext(self.source, self.target):
         l, m = len(e), len(f)
         for j in range(m):
             tmpdic = defaultdict(float)
             total = 0.0
             for i in range(l):
                 tmp = self.t[e[i]][f[j]] * self.a[i, j, m, l] 
                 tmpdic[f[j], e[i]] = tmp
                 total += tmp
             if total <= 0: 
                 continue
             for i in range(l):
                 tmp = tmpdic[f[j], e[i]] / total
                 c_fe[e[i], f[j]] += tmp  # (27)
                 c_e[e[i]] += tmp
                 c_ijml[i, j, m, l] = tmp  # (28)
                 c_jml[j, m, l] += tmp
         
     # M step:
     accepted = 0
     for e, f in c_fe:
         if c_e[e] <= 0:
             continue
         tmp = c_fe[e, f] / c_e[e]
         if delta_threshold > abs(self.t[e][f] - tmp):
             accepted += 1
         self.t[e][f] = tmp
     for k in c_ijml:
         if c_jml[k[1:]] <= 0:
             continue
         self.a[k] = c_ijml[k] / c_jml[k[1:]]
     return accepted
コード例 #3
0
ファイル: m3.py プロジェクト: duanzhenchun/mylab
 def EM(self, delta_threshold):
     # E step:
     c_fe = defaultdict(float)
     c_e = defaultdict(float)
     c_ijml = defaultdict(float)
     c_jml = defaultdict(float)
     for (e, f) in m1.bitext(self.source, self.target):
         l, m = len(e), len(f)
         for j in range(m):
             tmpdic = defaultdict(float)
             total = 0.0
             for i in range(l):
                 tmp = self.t[e[i]][f[j]] * self.a[i, j, m, l]
                 tmpdic[f[j], e[i]] = tmp
                 total += tmp
             if total <= 0:
                 continue
             for i in range(l):
                 tmp = tmpdic[f[j], e[i]] / total
                 c_fe[e[i], f[j]] += tmp  # (27)
                 c_e[e[i]] += tmp
                 c_ijml[i, j, m, l] = tmp  # (28)
                 c_jml[j, m, l] += tmp
コード例 #4
0
ファイル: m3.py プロジェクト: freephys/mylab
 def EM(self, delta_threshold):
     # E step:
     c_fe = defaultdict(float)
     c_e = defaultdict(float)
     c_ijml = defaultdict(float)
     c_jml = defaultdict(float)
     for (e, f) in m1.bitext(self.source, self.target):
         l, m = len(e), len(f)
         for j in range(m):
             tmpdic = defaultdict(float)
             total = 0.0
             for i in range(l):
                 tmp = self.t[e[i]][f[j]] * self.a[i, j, m, l] 
                 tmpdic[f[j], e[i]] = tmp
                 total += tmp
             if total <= 0: 
                 continue
             for i in range(l):
                 tmp = tmpdic[f[j], e[i]] / total
                 c_fe[e[i], f[j]] += tmp  # (27)
                 c_e[e[i]] += tmp
                 c_ijml[i, j, m, l] = tmp  # (28)
                 c_jml[j, m, l] += tmp