class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # レイヤの作成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # すべての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): print(contexts[:, 0]) h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期設定 W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(H, V).astype("f") # 各レイヤを作る。 self.in_layer = MatMul(W_in) # 予測すべきcontextの単語数分だけloss_layerを作成する必要がある self.out_layer = MatMul(W_out) self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() # 全てのlayer,重み,勾配をリストにまとめる layers = [ self.in_layer, self.out_layer, self.loss_layer0, self.loss_layer1, ] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 가중치 초기화 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 계층 생성 # 입력층 1개 self.in_layer = MatMul(W_in) # 출력층 1개 self.out_layer = MatMul(W_out) # 맥락의 수만큼 손실 계층을 구한다 self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모은다 layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산 표현을 저장한다 self.word_vecs = W_in
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * nprnadom.randn(H, V).astype('f') self.in_layer0 = MatMul(W_in) self.in_layer1 = MauMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
class SimpleCBOW: """ Simple continuous bag-of-words. """ def __init__(self, vocabulary_size, hidden_size): V, H = vocabulary_size, hidden_size # initialize weights W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # generate layers self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # list all weights and gradient layers layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # set distributed representation of words to variable self.word_vecs = W_in def forward(self, contexts, target): """ :param contexts: dim 3 of numpy array :param target: dim2 of numpy array """ h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): """ Continuous bag-of-words (CBOW) 0.5*da MatMul <-+ vector ----+ W_in | v | 0.5*da Softmax +-- [+] <- [x] <-- MatMul <-- With <-- Loss | ^ da W_out ds Loss 1 | 0.5 ----+ MatMul <-+ W_in 0.5*da """ ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
def test_affine(): # 簡素な全結合層 c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 W = np.random.randn(7, 3) # 重み h = np.dot( c, W) # 中間ノード @Note 単語ベクトルはone-hotなので、やっていることは重みWからある行の要素を抜き出す事に等しい print(f"h: {h}") # 1章で作成したMatMulレイヤーで処理する layer = MatMul(W) h2 = layer.forward(c) print(f"h2: {h2}") print("-" * 10)
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 가중치 초기화 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 계층 생성 # 입력층 1개 self.in_layer = MatMul(W_in) # 출력층 1개 self.out_layer = MatMul(W_out) # 맥락의 수만큼 손실 계층을 구한다 self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모은다 layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산 표현을 저장한다 self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期設定 W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(H, V).astype("f") # 各レイヤを作る。 self.in_layer = MatMul(W_in) # 予測すべきcontextの単語数分だけloss_layerを作成する必要がある self.out_layer = MatMul(W_out) self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() # 全てのlayer,重み,勾配をリストにまとめる layers = [ self.in_layer, self.out_layer, self.loss_layer0, self.loss_layer1, ] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) score = self.out_layer.forward(h) loss0 = self.loss_layer0.forward(score, contexts[:, 0]) loss1 = self.loss_layer1.forward(score, contexts[:, 1]) loss = loss0 + loss1 return loss def backward(self, dout=1): dl0 = self.loss_layer0.backward(dout) dl1 = self.loss_layer1.backward(dout) ds = dl0 + dl1 da = self.out_layer.backward(ds) self.in_layer.backward(da) return None
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(H, V).astype("f") self.i_layer0 = MatMul(W_in) self.i_layer1 = MatMul(W_in) self.o_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.i_layer0, self.i_layer1, self.o_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in
def __init__(self, vocab_size, wordvec_size, head_size, num_heads, num_encoders=3, num_decoders=3): S, D, H = vocab_size, wordvec_size, head_size rn = np.random.randn self.num_encoders = num_encoders self.num_decoders = num_decoders self.params, self.grads = [], [] # Double embed (encoder, decoder) embed_W1 = (rn(S, D) / 100).astype('f') self.e_embed = PositionalEmbedding(embed_W1) self.params += self.e_embed.params self.grads += self.e_embed.grads self.encoders, self.decoders = [], [] for _ in range(num_encoders): te = TransformerEncoder(wordvec_size=D, head_size=H, num_heads=num_heads) self.encoders.append(te) self.params += te.params self.grads += te.grads for _ in range(num_decoders): td = TransformerDecoder(wordvec_size=D, head_size=H, num_heads=num_heads) self.decoders.append(td) self.params += td.params self.grads += td.grads # 편의를 위해 linear 변수에 따로 weight 저장 self.linear = MatMul((rn(D, S) / np.sqrt(D)).astype('f')) self.params += self.linear.params self.grads += self.linear.grads # TimeSoftmaxWithLoss도 params와 grads가 있으나 사용되지 않기때문에 생략 self.softmax = TimeSoftmaxWithLoss(ignore_label=-1)
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = tf.Variable( tf.random.normal((V, H), mean=0.0, stddev=0.01, dtype='float')) W_out = tf.Variable( tf.random.normal((H, V), mean=0.0, stddev=0.01, dtype='float')) self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() layers = [ self.in_layer, self.out_layer, self.loss_layer1, self.loss_layer2 ] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') W_in = np.array( [[-1.0655735, 1.3231287, -1.1051644, -1.1049938, -1.0685176], [1.1559865, 0.08719956, 1.1672966, 1.1607609, 1.1567391], [-0.7532327, 0.6444376, -0.76896185, -0.71775854, -0.7918966], [0.9111972, 1.9940354, 0.6837302, 0.89859486, 0.87255], [-0.78328615, 0.6444221, -0.7729693, -0.7400077, -0.80646306], [-1.058986, 1.3268483, -1.1123687, -1.1059289, -1.0616288], [1.1203294, -1.6394324, 1.2104743, 1.1509397, 1.1612827]]).astype('f') # レイヤの生成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # 全ての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 가중치 초기화 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 계층 생성 # layer0, layer1은 weight-sharing self.in_layer0 = MatMul(W_in) ## 입력층은 윈도우 크기만큼 만들어야함, 인스턴스 생성. self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모음 layers = [ self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer ] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산 표현 저장 self.word_vecs = W_in
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # initialize weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # create layer self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # combine all weights and grads into list layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # set word vector to member variable self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
class SimpleSkipGram: def __init__(self, vocabulary_size, hidden_size): V, H = vocabulary_size, hidden_size # initialize weights W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # generate layers self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # list all weights and gradiants layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # set distributed representation of words to variable self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
def __init__(self, vocabulary_size, hidden_size): V, H = vocabulary_size, hidden_size # initialize weights W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # generate layers self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # list all weights and gradiants layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # set distributed representation of words to variable self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 初始化權重 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 產生各層 self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # 把全部的權重與梯度整合成清單 layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 在成員變數中,設定字詞的分式散表示 self.word_vecs = W_in
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 가중치 초기화 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 계층 생성 # layer0, layer1은 weight-sharing self.in_layer0 = MatMul(W_in) ## 입력층은 윈도우 크기만큼 만들어야함, 인스턴스 생성. self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모음 layers = [ self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer ] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산 표현 저장 self.word_vecs = W_in def forward(self, contexts, target): # 양옆 단어에 대한 x*Win을 batch만큼 수행. -> 해당단어가 중심단어에 관해 어느정도의 의미가 있는지를 나타내(분산표현) # -> one_hot으로 표현되어 matmul이 수행되므로 weight에서 해당 행이 분산표현 벡터(값)이 됨. h0 = self.in_layer0.forward( contexts[:, 0]) # (batch, 7) * (vocab_size(7), hidden) h1 = self.in_layer1.forward( contexts[:, 1]) # (bathc, 7) * (vocab_size, hidden) h = (h0 + h1) * 0.5 # 양 옆의 분산표현의 합. score = self.out_layer.forward( h) # (batch,hidden) * ( hidden, vocab_size ) # print(score) # print(target) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # レイヤの生成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # すべての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # refresh weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # make class self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # put all weights and gradients in one list layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # save a word's variance in instance variable self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 初始化权重 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 生成层 self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # 将所有的权重和梯度整理到列表中 layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 将单词的分布式表示设置为成员变量 self.word_vecs = W_in
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # refresh weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # make class self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # put all weights and gradients in one list layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # save a word's variance in instance variable self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') # レイヤの生成 self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # すべての重みと勾配をリストにまとめる layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forwarf(target) s = self.out_layer.forward(h) l1 = self.loss_layer1.forward(s, contexts[:, 0]) l2 = self.loss_layer2.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl1 = self.loss_layer1.backward(dout) dl2 = self.loss_layer2.backward(dout) ds = dl1 + dl2 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') W_in = np.array( [[-1.0655735, 1.3231287, -1.1051644, -1.1049938, -1.0685176], [1.1559865, 0.08719956, 1.1672966, 1.1607609, 1.1567391], [-0.7532327, 0.6444376, -0.76896185, -0.71775854, -0.7918966], [0.9111972, 1.9940354, 0.6837302, 0.89859486, 0.87255], [-0.78328615, 0.6444221, -0.7729693, -0.7400077, -0.80646306], [-1.058986, 1.3268483, -1.1123687, -1.1059289, -1.0616288], [1.1203294, -1.6394324, 1.2104743, 1.1509397, 1.1612827]]).astype('f') # レイヤの生成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # 全ての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # initialize weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # create layer self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() # combine all weights and grads into list layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # set word vector to member variable self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = tf.Variable( tf.random.normal((V, H), mean=0.0, stddev=0.01, dtype='float')) W_out = tf.Variable( tf.random.normal((H, V), mean=0.0, stddev=0.01, dtype='float')) self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer1 = SoftmaxWithLoss() self.loss_layer2 = SoftmaxWithLoss() layers = [ self.in_layer, self.out_layer, self.loss_layer1, self.loss_layer2 ] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期設定 W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(H, V).astype("f") # 各レイヤを作る。 # contextで使用する単語数分だけin_layerは作成する必要がある self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # 全てのlayer,重み,勾配をリストにまとめる layers = [ self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer ] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) # 平均を取る過程で0.5をかけているため da *= 0.5 self.in_layer0.backward(da) self.in_layer1.backward(da) return None
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # 계층 ## 입력층은 맥락에서 사용하는 단어의 수 (=윈도우 크기)만큼 만들어야 한다. self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모은다 layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산표현(W_in)을 저장한다. self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size #重み初期化 W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(H, V).astype("f") #レイヤの生成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() #全ての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads #メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, context, target): h0 = self.in_layer0.forward(context[:, 0]) h1 = self.in_layer1.forward(context[:, 0]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None