class AttentionWeight: def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None def forward(self, hs, h): N, T, H = hs.shape hr = h.reshape(N, 1, H) # .repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) a = self.softmax.forward(s) self.cache = (hs, hr) return a def backward(self, da): hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) return dhs, dh
def infererence(args): groups = 8 print('Loading image') image = Image.open(args.image) print('Preprocessing') transformer = get_transformer() input_data = preprocess(image, transformer) print('input_data ', input_data.shape) #conv layer w = np.load('./data/' + 'module.conv1.weight.npy') b = np.load('./data/' + 'module.conv1.bias.npy') conv_layer = Convolution(w, b, stride=2, pad=1) out = conv_layer.forward(input_data) #savetxt('./dump/' + 'conv1_out.txt', out) #max pooling maxpool_layer = Pooling(3, 3, 2, 1) out = maxpool_layer.forward(out) #savetxt('./dump/' + 'maxpool_out.txt', out) out = stage_shuffle(out, stage2_str, 3, groups) #savetxt('./dump/' + 'stage2.txt', out) out = stage_shuffle(out, stage3_str, 7, groups) #savetxt('./dump/' + 'stage3.txt', out) out = stage_shuffle(out, stage4_str, 3, groups) #savetxt('./dump/' + 'stage4.txt', out) h, w = out.shape[-2:] avgpool_layer = AVGPooling(h, w, 1, 0) out = avgpool_layer.forward(out).reshape(1, -1) w = np.load('./data/' + 'module.fc.weight.npy') b = np.load('./data/' + 'module.fc.bias.npy') w = w.transpose(1, 0) fc_layer = Affine(w, b) out = fc_layer.forward(out) softmax_layer = Softmax() out = softmax_layer.forward(out).reshape(-1) result = [] with open(args.idx_to_class) as json_file: json_data = json.load(json_file) ''' for key in json_data: print(key, json_data[key]) ''' for i in range(0, 1000): item = (out[i], json_data[str(i)]) result.append(item) result = sorted(result, key=lambda item: item[0], reverse=True) for i in range(0, 10): print(result[i])
class AttentionWeight: def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None def forward(self, hs, h): """順伝搬 Args: hs (ndarray): Encorderで取得した各単語ベクトルを連結した隠れベクトル h (ndarray): hsの最終単語に対応するベクトル Returns: a (ndarray): Attention用重みベクトル """ N, T, H = hs.shape hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) # スコア(重み付き和) a = self.softmax.forward(s) self.cache = (hs, hr) return a def backward(self, da): """逆伝搬 Args: da (ndarray): Attention用重みベクトルの誤差微分 Returns: dhs: Encorderで取得した各単語ベクトルを連結した隠れベクトルの誤差微分 dh: hsの最終単語に対応するベクトルの誤差微分 """ hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) # Sumの逆伝搬はRepeat dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) # Repeatの逆伝搬はSum return dhs, dh
def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None
import sys sys.path.append('..') from common.layers import Softmax import numpy as np N, T, H = 10, 5, 4 hs = np.random.randn(N, T, H) h = np.random.randn(N, H) hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr print(t.shape) # (10, 5, 4) s = np.sum(t, axis=2) print(s.shape) # (10, 5) softmax = Softmax() a = softmax.forward(s) print(a.shape) # (10, 5) # 각 단어의 중요도를 나타내는 가중치 a # 이 가중치 a의 합(가중합)을 이용해 맥박 벡터를 얻을 수 있다 # Decoder의 LSTM 계층의 은닉 상태 벡터를 h라고 했을 때, # 지금 목표는 h가 hs의 각 단어 벡터와 얼마나 비슷한가를 수치로 나타내는 것 # 여기서는 가장 단순한 방법인 벡터의 내적을 이용한다 - 두 벡터가 얼마나 같은 방향을 향하고 있는가, 두 벡터의 유사도 # 유사도를 산출 후 소프트맥스 함수를 적용하여 정규화
class AttentionWeight: ''' Encoderの全系列の隠れ状態hs(N, T, H)と Decoderの現系列の隠れ状態h(N, H)とのドット積をとり、 softmax関数にかけることで系列ごとのアライメントa(N, T)を 出力するレイヤ ''' def __init__(self): self.params, self.grads = [], [] self.softmax = Softmax() self.cache = None def forward(self, hs, h): ''' Decoderの隠れ状態h(N, H)をnp.repeatで(N, T, H)に拡張し、 hsとのアダマール積を取ってHについて総和を取り、 Softmax関数で正規化してアライメントa(N, T)を得る Parameters ---------- hs : np.ndarray(N, T, H) Encoderの全系列の隠れ状態 h : np.ndarray(N, H) Decoderの現系列の隠れ状態 Returns ------- np.ndarray(N, T) hsに対し、系列ごとの重みを示すアライメント ''' N, T, H = hs.shape hr = h.reshape(N, 1, H)#.repeat(T, axis=1) t = hs * hr # (N, T, H) s = t.sum(axis=2) a = self.softmax.forward(s) # (N, T) self.cache = (hs, hr) return a def backward(self, da): ''' sumの逆伝播はrepeat repeatの逆伝播はsum Parameters ---------- da : np.ndarray(N, T) アライメントの勾配 Returns ------- dhs, dh : np.ndarray(N, T, H), np.ndarray(N, H) 全系列の隠れ状態hsの勾配と系列の隠れ状態hの勾配 ''' hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr # (N, T, H) dhr = dt * hs # (N, T, H) dh = dhr.sum(axis=1) # (N, H) return dhs, dh
def __init__(self, name, nout, numpy_rng, theano_rng, batchsize=128): # CALL PARENT CONSTRUCTOR TO SETUP CONVENIENCE FUNCTIONS # (SAVE/LOAD, ...) super(HumanConvNet, self).__init__(name=name) self.numpy_rng = numpy_rng self.batchsize = batchsize self.theano_rng = theano_rng self.mode = theano.shared(np.int8(0), name='mode') self.nout = nout self.inputs = T.ftensor4('inputs') self.inputs.tag.test_value = numpy_rng.randn(self.batchsize, 1, 28, 28).astype(np.float32) self.targets = T.ivector('targets') self.targets.tag.test_value = numpy_rng.randint( nout, size=self.batchsize).astype(np.int32) self.layers = OrderedDict() self.layers['conv0'] = ConvLayer(rng=self.numpy_rng, inputs=self.inputs, filter_shape=(128, 1, 5, 5), image_shape=(None, 1, 28, 28), name='conv0', pad=2) self.layers['maxpool0'] = MaxPoolLayer(inputs=self.layers['conv0'], pool_size=(2, 2), stride=(2, 2), name='maxpool0') self.layers['bias0'] = ConvBiasLayer(inputs=self.layers['maxpool0'], name='bias0') self.layers['relu0'] = Relu(inputs=self.layers['bias0'], name='relu0') self.layers['conv1'] = ConvLayer(rng=self.numpy_rng, inputs=self.layers['relu0'], filter_shape=(64, 128, 3, 3), name='conv1', pad=1) self.layers['maxpool1'] = MaxPoolLayer(inputs=self.layers['conv1'], pool_size=(2, 2), stride=(2, 2), name='maxpool1') self.layers['bias1'] = ConvBiasLayer(inputs=self.layers['maxpool1'], name='bias1') self.layers['relu1'] = Relu(inputs=self.layers['bias1'], name='relu1') self.layers['reshape1'] = Reshape( inputs=self.layers['relu1'], shape=(self.layers['relu1'].outputs_shape[0], np.prod(self.layers['relu1'].outputs_shape[1:])), name='reshape1') self.layers['fc2'] = AffineLayer(rng=self.numpy_rng, inputs=self.layers['reshape1'], nouts=256, name='fc2') self.layers['relu2'] = Relu(inputs=self.layers['fc2'], name='relu2') self.layers['fc3'] = AffineLayer(rng=self.numpy_rng, inputs=self.layers['relu2'], nouts=self.nout, name='fc3') self.layers['softmax3'] = Softmax(inputs=self.layers['fc3'], name='softmax3') self.layers['clip3'] = Clip(inputs=self.layers['softmax3'], name='clip3', min_val=1e-6, max_val=1 - 1e-6) self.probabilities = self.forward(self.inputs) self._cost = T.nnet.categorical_crossentropy(self.probabilities, self.targets).mean() self.classification = T.argmax(self.probabilities, axis=1) self.params = [] for l in self.layers.values(): self.params.extend(l.params) self._grads = T.grad(self._cost, self.params) self.classify = theano.function( [self.inputs], self.classification, )