Esempio n. 1
0
 def populate_dense(self):
     p = config.proj
     I, O = config.Inode, config.Onode
     for tree in tqdm(config.train, desc='Doing projections'):
         Y = np.dot(p[tree.label()][0], I[tree])
         if tree.label() not in self.pi:
             self.pi[tree.label()] = Y
         else:
             self.pi[tree.label()] += Y
         for node in tree.postorder():
             if len(node) == 2:
                 a, b, c = node.label(), node[0].label(), node[1].label()
                 pi, pj, pk = p[a][1], p[b][0], p[c][0]
                 Zi = np.dot(pi, O[node])
                 Yj = np.dot(pj, I[node[0]])
                 Yk = np.dot(pk, I[node[1]])
                 r = Rule3(a, b, c)
                 if r not in self.rule3s:
                     self.rule3s[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                 else:
                     self.rule3s[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk)
             elif len(node) == 1:
                 a, x = node.label(), node[0]
                 Z = np.dot(p[a][1], O[node])
                 r = Rule1(a, x)
                 if r not in self.rule1s:
                     self.rule1s[r] = Z
                 else:
                     self.rule1s[r] += Z
             else:
                 raise RuntimeError
     for k, v in self.pi.items():
         self.pi[k] = v / len(config.train)
Esempio n. 2
0
 def populate(self):
     for tree in config.train:
         self.pi[tree.label()] += 1
         for node in tree.postorder():
             self.nonterminals[node.label()] += 1
             if len(node) == 2:
                 r = Rule3(node.label(), node[0].label(), node[1].label())
                 self.rule3s[r] += 1
             elif len(node) == 1:
                 r = Rule1(node.label(), node[0])
                 self.rule1s[r] += 1
             else:
                 raise RuntimeError
     for k, v in self.pi.items():
         self.pi[k] = v / len(config.train)
Esempio n. 3
0
 def populate(self):
     for tree in tqdm(config.train, desc='Doing vanilla PCFG'):
         for node in tree.postorder():
             self.nonterminals[node.label()] += 1
             if len(node) == 2:
                 self.interminals[node.label()] += 1
                 r = Rule3(node.label(), node[0].label(), node[1].label())
                 self.rule3s_count[r] += 1
             elif len(node) == 1:
                 self.terminals[node[0]] += 1
                 self.preterminals[node.label()] += 1
                 r = Rule1(node.label(), node[0])
                 self.rule1s_count[r] += 1
             else:
                 raise RuntimeError
         self.pi[tree.label()] += 1
     for k, v in self.pi.items():
         self.pi[k] = v / len(config.train)
Esempio n. 4
0
 def populate(self):
     p = config.proj
     I, O = config.Inode, config.Onode
     for tree in tqdm(config.train, desc='Doing projections'):
         idx = I[tree].nonzero()[1]
         Y = np.sum(p[tree.label()][0][:, idx] * I[tree][0, idx].toarray()[0], axis=1)
         if tree.label() not in self.pi:
             self.pi[tree.label()] = Y
         else:
             self.pi[tree.label()] += Y
         for node in tree.postorder():
             if len(node) == 2:
                 a, b, c = node.label(), node[0].label(), node[1].label()
                 pi, pj, pk = p[a][1], p[b][0], p[c][0]
                 idx = O[node].nonzero()[1]
                 Zi = np.sum(pi[:, idx] * O[node][0, idx].toarray()[0], axis=1)
                 idx = I[node[0]].nonzero()[1]
                 Yj = np.sum(pj[:, idx] * I[node[0]][0, idx].toarray()[0], axis=1)
                 idx = I[node[1]].nonzero()[1]
                 Yk = np.sum(pk[:, idx] * I[node[1]][0, idx].toarray()[0], axis=1)
                 r = Rule3(a, b, c)
                 if r not in self.rule3s:
                     self.rule3s[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                 else:
                     self.rule3s[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk)
             elif len(node) == 1:
                 a, x = node.label(), node[0]
                 idx = O[node].nonzero()[1]
                 Z = np.sum(p[a][1][:, idx] * O[node][0, idx].toarray()[0], axis=1)
                 r = Rule1(a, x)
                 if r not in self.rule1s:
                     self.rule1s[r] = Z
                 else:
                     self.rule1s[r] += Z
             else:
                 raise RuntimeError
     for k, v in self.pi.items():
         self.pi[k] = v / len(config.train)
Esempio n. 5
0
 def populate(self):
     I, O = config.I, config.O
     pI, pO = Counter(), Counter()
     config.pI, config.pO = pI, pO
     for tree in tqdm(config.train, desc='Constructing L-PCFG'):
         for node in tree.postorder():
             if len(node) == 2:
                 a, b, c = node.label(), node[0].label(), node[1].label()
                 Zi = O[a][pO[a]]
                 pO[a] += 1
                 Yj = I[b][pI[b]]
                 pI[b] += 1
                 Yk = I[c][pI[c]]
                 pI[c] += 1
                 r = Rule3(a, b, c)
                 if r not in self.rule3s:
                     self.rule3s[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                 else:
                     self.rule3s[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk)
             elif len(node) == 1:
                 a, x = node.label(), node[0]
                 Z = O[a][pO[a]]
                 pO[a] += 1
                 r = Rule1(a, x)
                 if r not in self.rule1s:
                     self.rule1s[r] = Z
                 else:
                     self.rule1s[r] += Z
             else:
                 raise RuntimeError
         a = node.label()
         if a not in self.pi:
             self.pi[a] = I[a][pI[a]]
         else:
             self.pi[a] += I[a][pI[a]]
         pI[a] += 1
     for a, param in self.pi.items():
         self.pi[a] = param / len(config.train)
Esempio n. 6
0
 def populate(self):
     p = config.proj
     I, O = config.Inode, config.Onode
     for tree in tqdm(config.train, desc='Doing projections'):
         idx = I[tree].nonzero()[1]
         Y = np.sum(p[tree.label()][0][:, idx] *
                    I[tree][0, idx].toarray()[0],
                    axis=1)
         if tree.label() not in self.pi:
             self.pi[tree.label()] = Y
         else:
             self.pi[tree.label()] += Y
         for node in tree.postorder():
             if len(node) == 2:
                 a, b, c = node.label(), node[0].label(), node[1].label()
                 pi, pj, pk = p[a][1], p[b][0], p[c][0]
                 idx = O[node].nonzero()[1]
                 Zi = np.sum(pi[:, idx] * O[node][0, idx].toarray()[0],
                             axis=1)
                 idx = I[node[0]].nonzero()[1]
                 Yj = np.sum(pj[:, idx] * I[node[0]][0, idx].toarray()[0],
                             axis=1)
                 idx = I[node[1]].nonzero()[1]
                 Yk = np.sum(pk[:, idx] * I[node[1]][0, idx].toarray()[0],
                             axis=1)
                 r = Rule3(a, b, c)
                 if r not in self.Eijk:
                     self.Eijk[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                     self.Eij[r] = np.einsum('i,j->ij', Zi, Yj)
                     self.Eik[r] = np.einsum('i,k->ik', Zi, Yk)
                     self.Ejk[r] = np.einsum('j,k->jk', Yj, Yk)
                     self.Ei[r] = Zi
                     self.Ej[r] = Yj
                     self.Ek[r] = Yk
                 else:
                     self.Eijk[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                     self.Eij[r] += np.einsum('i,j->ij', Zi, Yj)
                     self.Eik[r] += np.einsum('i,k->ik', Zi, Yk)
                     self.Ejk[r] += np.einsum('j,k->jk', Yj, Yk)
                     self.Ei[r] += Zi
                     self.Ej[r] += Yj
                     self.Ek[r] += Yk
                 if a not in self.H:
                     self.H[a] = Zi
                 else:
                     self.H[a] += Zi
                 if b not in self.F:
                     self.F[b] = Yj
                 else:
                     self.F[b] += Yj
                 if c not in self.F:
                     self.F[c] = Yk
                 else:
                     self.F[c] += Yk
             elif len(node) == 1:
                 a, x = node.label(), node[0]
                 idx = O[node].nonzero()[1]
                 Z = np.sum(p[a][1][:, idx] * O[node][0, idx].toarray()[0],
                            axis=1)
                 r = Rule1(a, x)
                 if r not in self.Eax:
                     self.Eax[r] = Z
                 else:
                     self.Eax[r] += Z
                 if a not in self.H:
                     self.H[a] = deepcopy(Z)
                 else:
                     self.H[a] += Z
             else:
                 raise RuntimeError
     for k, v in self.pi.items():
         self.pi[k] = v / len(config.train)
Esempio n. 7
0
 def populate(self):
     I, O = config.I, config.O
     pI, pO = Counter(), Counter()
     config.pI, config.pO = pI, pO
     for tree in tqdm(config.train, desc='Constructing L-PCFG'):
         for node in tree.postorder():
             if len(node) == 2:
                 a, b, c = node.label(), node[0].label(), node[1].label()
                 Zi = O[a][pO[a]]
                 pO[a] += 1
                 Yj = I[b][pI[b]]
                 pI[b] += 1
                 Yk = I[c][pI[c]]
                 pI[c] += 1
                 r = Rule3(a, b, c)
                 if r not in self.Eijk:
                     self.Eijk[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                     self.Eij[r] = np.einsum('i,j->ij', Zi, Yj)
                     self.Eik[r] = np.einsum('i,k->ik', Zi, Yk)
                     self.Ejk[r] = np.einsum('j,k->jk', Yj, Yk)
                     self.Ei[r] = deepcopy(Zi)
                     self.Ej[r] = deepcopy(Yj)
                     self.Ek[r] = deepcopy(Yk)
                 else:
                     self.Eijk[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk)
                     self.Eij[r] += np.einsum('i,j->ij', Zi, Yj)
                     self.Eik[r] += np.einsum('i,k->ik', Zi, Yk)
                     self.Ejk[r] += np.einsum('j,k->jk', Yj, Yk)
                     self.Ei[r] += Zi
                     self.Ej[r] += Yj
                     self.Ek[r] += Yk
                 if a not in self.H:
                     self.H[a] = deepcopy(Zi)
                 else:
                     self.H[a] += Zi
                 if b not in self.F:
                     self.F[b] = deepcopy(Yj)
                 else:
                     self.F[b] += Yj
                 if c not in self.F:
                     self.F[c] = deepcopy(Yk)
                 else:
                     self.F[c] += Yk
             elif len(node) == 1:
                 a, x = node.label(), node[0]
                 Z = O[a][pO[a]]
                 pO[a] += 1
                 r = Rule1(a, x)
                 if r not in self.Eax:
                     self.Eax[r] = deepcopy(Z)
                 else:
                     self.Eax[r] += Z
                 if a not in self.H:
                     self.H[a] = deepcopy(Z)
                 else:
                     self.H[a] += Z
             else:
                 raise RuntimeError
         a = node.label()
         Y = I[a][pI[a]]
         pI[a] += 1
         if a not in self.pi:
             self.pi[a] = deepcopy(Y)
         else:
             self.pi[a] += Y
         if a not in self.F:
             self.F[a] = deepcopy(Y)
         else:
             self.F[a] += Y
     for a, param in self.pi.items():
         self.pi[a] = param / len(config.train)