class AttentionWeight: def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None def forward(self, hs, h): N, T, H = hs.shape hr = h.reshape(N, 1, H) #.repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) a = self.softmax.forward(s) self.cache = (hs, hr) return a def backward(self, da): hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) return dhs, dh
def infererence(args): groups = 8 print('Loading image') image = Image.open(args.image) print('Preprocessing') transformer = get_transformer() input_data = preprocess(image, transformer) print('input_data ', input_data.shape) #conv layer w = np.load('./data/' + 'module.conv1.weight.npy') b = np.load('./data/' + 'module.conv1.bias.npy') conv_layer = Convolution(w, b, stride=2, pad=1) out = conv_layer.forward(input_data) #savetxt('./dump/' + 'conv1_out.txt', out) #max pooling maxpool_layer = Pooling(3, 3, 2, 1) out = maxpool_layer.forward(out) #savetxt('./dump/' + 'maxpool_out.txt', out) out = stage_shuffle(out, stage2_str, 3, groups) #savetxt('./dump/' + 'stage2.txt', out) out = stage_shuffle(out, stage3_str, 7, groups) #savetxt('./dump/' + 'stage3.txt', out) out = stage_shuffle(out, stage4_str, 3, groups) #savetxt('./dump/' + 'stage4.txt', out) h, w = out.shape[-2:] avgpool_layer = AVGPooling(h, w, 1, 0) out = avgpool_layer.forward(out).reshape(1, -1) w = np.load('./data/' + 'module.fc.weight.npy') b = np.load('./data/' + 'module.fc.bias.npy') w = w.transpose(1, 0) fc_layer = Affine(w, b) out = fc_layer.forward(out) softmax_layer = Softmax() out = softmax_layer.forward(out).reshape(-1) result = [] with open(args.idx_to_class) as json_file: json_data = json.load(json_file) ''' for key in json_data: print(key, json_data[key]) ''' for i in range(0, 1000): item = (out[i], json_data[str(i)]) result.append(item) result = sorted(result, key=lambda item: item[0], reverse=True) for i in range(0, 10): print(result[i])
class AttentionWeight: def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None def forward(self, hs, h): """順伝搬 Args: hs (ndarray): Encorderで取得した各単語ベクトルを連結した隠れベクトル h (ndarray): hsの最終単語に対応するベクトル Returns: a (ndarray): Attention用重みベクトル """ N, T, H = hs.shape hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) # スコア(重み付き和) a = self.softmax.forward(s) self.cache = (hs, hr) return a def backward(self, da): """逆伝搬 Args: da (ndarray): Attention用重みベクトルの誤差微分 Returns: dhs: Encorderで取得した各単語ベクトルを連結した隠れベクトルの誤差微分 dh: hsの最終単語に対応するベクトルの誤差微分 """ hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) # Sumの逆伝搬はRepeat dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) # Repeatの逆伝搬はSum return dhs, dh
import sys sys.path.append('..') from common.layers import Softmax import numpy as np N, T, H = 10, 5, 4 hs = np.random.randn(N, T, H) h = np.random.randn(N, H) hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr print(t.shape) # (10, 5, 4) s = np.sum(t, axis=2) print(s.shape) # (10, 5) softmax = Softmax() a = softmax.forward(s) print(a.shape) # (10, 5) # 각 단어의 중요도를 나타내는 가중치 a # 이 가중치 a의 합(가중합)을 이용해 맥박 벡터를 얻을 수 있다 # Decoder의 LSTM 계층의 은닉 상태 벡터를 h라고 했을 때, # 지금 목표는 h가 hs의 각 단어 벡터와 얼마나 비슷한가를 수치로 나타내는 것 # 여기서는 가장 단순한 방법인 벡터의 내적을 이용한다 - 두 벡터가 얼마나 같은 방향을 향하고 있는가, 두 벡터의 유사도 # 유사도를 산출 후 소프트맥스 함수를 적용하여 정규화
class AttentionWeight: ''' Encoderの全系列の隠れ状態hs(N, T, H)と Decoderの現系列の隠れ状態h(N, H)とのドット積をとり、 softmax関数にかけることで系列ごとのアライメントa(N, T)を 出力するレイヤ ''' def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None def forward(self, hs, h): ''' Decoderの隠れ状態h(N, H)をnp.repeatで(N, T, H)に拡張し、 hsとのアダマール積を取ってHについて総和を取り、 Softmax関数で正規化してアライメントa(N, T)を得る Parameters ---------- hs : np.ndarray(N, T, H) Encoderの全系列の隠れ状態 h : np.ndarray(N, H) Decoderの現系列の隠れ状態 Returns ------- np.ndarray(N, T) hsに対し、系列ごとの重みを示すアライメント ''' N, T, H = hs.shape hr = h.reshape(N, 1, H)#.repeat(T, axis=1) t = hs * hr # (N, T, H) s = t.sum(axis=2) a = self.softmax.forward(s) # (N, T) self.cache = (hs, hr) return a def backward(self, da): ''' sumの逆伝播はrepeat repeatの逆伝播はsum Parameters ---------- da : np.ndarray(N, T) アライメントの勾配 Returns ------- dhs, dh : np.ndarray(N, T, H), np.ndarray(N, H) 全系列の隠れ状態hsの勾配と系列の隠れ状態hの勾配 ''' hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr # (N, T, H) dhr = dt * hs # (N, T, H) dh = dhr.sum(axis=1) # (N, H) return dhs, dh