class SimpleCBOW: def __init__(self, vocab_size, hidden_size): W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') self.input_layor0 = MatMul(W_in) self.input_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.input_layor0, self.input_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.input_layer0.forward(contexts[:, 0, :]) h1 = self.input_layer1.forward(contexts[:, 1, :]) h = (h0 + h1) / 2 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backword(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backword(ds) da /= 2 self.input_layer0.backward(da) self.input_layer1.backword(da) return None
def test_matmul(self): W = np.random.randn(7, 3) x = np.random.randn(10, 7) matmul = MatMul(W) dout = matmul.forward(x) dx = matmul.backward(dout) np.testing.assert_array_almost_equal(dout.shape, (10, 3)) np.testing.assert_array_almost_equal(dx.shape, (10, 7))
def main(): # サンプルのコンテキストデータ c0 = np.array([[1, 0, 0, 0, 0, 0, 0]]) c1 = np.array([[0, 0, 1, 0, 0, 0, 0]]) # 重みの初期化 W_in = np.random.randn(7, 3) W_out = np.random.randn(3, 7) # レイヤの生成 in_layer0 = MatMul(W_in) in_layer1 = MatMul(W_in) out_layer = MatMul(W_out) # 順伝搬 h0 = in_layer0.forward(c0) h1 = in_layer1.forward(c1) h = 0.5 * (h0 + h1) s = out_layer.forward(h) print(s)
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) # distribute diff to h0/h1 equally da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # レイヤの生成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # すべての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer0.forward(s, contexts[:, 0]) l2 = self.loss_layer1.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl0 = self.loss_layer1.backward(dout) dl1 = self.loss_layer0.backward(dout) ds = dl0 + dl1 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
class GNN: def __init__(self, D, T): self.T = T # 初期値の設定 self.params = {} self.params['W'] = np.random.normal(0, 0.4, (D, D)).astype(np.float64) self.params['A'] = np.random.normal(0, 0.4, (1, D)).astype(np.float64) self.params['b'] = np.array([0], dtype=np.float64) self.MaskADD = MaskADD() self.MatMul = MatMul(self.params['W']) self.Relu = Relu() self.ADD = ADD() self.Affine = Affine(self.params['A'], self.params['b']) self.Sigmoid = Sigmoid() self.sigmoid_loss = SigmoidWithLoss() def forward(self, x, H): for _ in range(self.T): a = self.MaskADD.forward(x, H) r = self.MatMul.forward(a) x = self.Relu.forward(r) h = self.ADD.forward(x) s = self.Affine.forward(h) return s def predict(self, x, H): s = self.forward(x, H) p = self.Sigmoid.forward(s) return p.flatten() def loss(self, x, H, y): s = self.forward(x, H) L = self.sigmoid_loss.forward(s, y) return L def get_gradient(self, x, H, y): f = lambda w: self.loss(x, H, y) grads = {} grads['W'] = gradient(f, self.params['W']) grads['A'] = gradient(f, self.params['A']) grads['b'] = gradient(f, self.params['b']) return grads
class GNN: def __init__(self, D, T, w): self.T = T self.D = D self.agg1 = MaskADD() self.agg2 = MatMul(w) self.agg3 = Relu() self.readout = ADD() def forward(self, x, H): for _ in range(self.T): a = self.agg1.forward(x, H) r = self.agg2.forward(a) x = self.agg3.forward(r) h = self.readout.forward(x) return h
import sys, os sys.path.append(os.pardir) from common.layer import MatMul import numpy as np c0 = np.zeros(7) c1 = np.zeros(7) c0[0] = 1 c1[2] = 1 W_in = np.random.randn(7, 3) W_out = np.random.randn(3, 7) in_layer0 = MatMul(W_in) in_layer1 = MatMul(W_in) out_layer = MatMul(W_out) h0 = in_layer0.forward(c0) h1 = in_layer1.forward(c1) h = (h0 + h1) / 2.0 s = out_layer.forward(h) print(s)