def gru(self, inputs, state, params): w_ih, w_ir, w_iu, w_hh, w_hr, w_hu, b_h, b_r, b_u, w_ho, b_o = params W_xz = w_iu W_hz = w_hu b_z = b_u W_xr = w_ir W_hr = w_hr b_r = b_r W_xh = w_ih W_hh = w_hh b_h = b_h W_hq = w_ho b_q = b_o H, = state outputs = [] np = mxnp for X in inputs: Z = npx.sigmoid(np.dot(X, W_xz) + np.dot(H, W_hz) + b_z) R = npx.sigmoid(np.dot(X, W_xr) + np.dot(H, W_hr) + b_r) H_tilda = np.tanh(np.dot(X, W_xh) + np.dot(R * H, W_hh) + b_h) H = Z * H + (1 - Z) * H_tilda Y = np.dot(H, W_hq) + b_q outputs.append(Y) return np.concatenate(outputs, axis=0), (H, )
def forward(self, input): out = self.smooth + FFx.sigmoid(self.gamma.data()) out = FF.reciprocal(out) out = input * out out = FFx.sigmoid(out) return out
def forward(self, x): square_of_sum = np.sum(self.embedding(x), axis=1)**2 sum_of_square = np.sum(self.embedding(x)**2, axis=1) x = self.linear_layer(self.fc(x).sum(1)) \ + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True) x = npx.sigmoid(x) return x
def forward(self, X, state): w_ih, w_ir, w_iu, w_hh, w_hr, w_hu, b_h, b_r, b_u, w_ho, b_o = self.params state = state[0] outputs = [] for x in X: r = npx.sigmoid(x @ w_ir + state @ w_hr + b_r) # reset gate权重 u = npx.sigmoid(x @ w_iu + state @ w_hu + b_u) # update gate权重 hr = state * r # hidden state resets h_tilda = mxnp.tanh(x @ w_ih + hr @ w_hh + b_h) # 输入x 重置后的隐藏状态hr 乘以各自对应的权重矩阵,得到候选隐藏状态 state = state * u + h_tilda * (1 - u) # 更新【隐藏状态 候选隐藏状态】 y = state @ w_ho + b_o # 计算输出 outputs.append(y) return mxnp.concatenate(outputs, axis=0), (state, )
def forward(self, input1, input2, input3): # These should work with ReLU as well q = FFx.sigmoid(self.query(input1)) k = FFx.sigmoid(self.key(input2)) # B,C,H,W v = FFx.sigmoid(self.value(input3)) # B,C,H,W att_spat = self.metric_space(q, k) # B,1,H,W v_spat = att_spat * v # emphasize spatial features att_chan = self.metric_channel(q, k) # B,C,1,1 v_chan = att_chan * v # emphasize spatial features v_cspat = 0.5 * (v_chan + v_spat) # emphasize spatial features v_cspat = self.norm(v_cspat) return v_cspat
def forward(self, x): if self._mode == 'erf': return npx.leaky_relu(x, act_type='gelu') elif self._mode == 'tanh': return 0.5 * x\ * (1.0 + np.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * (x ** 3)))) elif self._mode == 'sigmoid': return x * npx.sigmoid(1.702 * x) else: raise NotImplementedError
def forward(self, x): embed_x = self.embedding(x) square_of_sum = np.sum(embed_x, axis=1)**2 sum_of_square = np.sum(embed_x**2, axis=1) inputs = np.reshape(embed_x, (-1, self.embed_output_dim)) x = self.linear_layer(self.fc(x).sum(1)) \ + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True) \ + self.mlp(inputs) x = npx.sigmoid(x) return x
def test_sigmoid(): A = np.zeros((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = npx.sigmoid(A) assert B.shape == (INT_OVERFLOW, 2) assert B[0][0] == 0.5 B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert_almost_equal(A.grad[0][0], np.array([0.25]), \ rtol=1e-3, atol=1e-5)
from d2l import mxnet as d2l npx.set_np() #Plot ReLu function x = np.arange(-8.0, 8.0, 0.1) x.attach_grad() with autograd.record(): y = npx.relu(x) d2l.plot(x, y, 'x', 'relu(x)', figsize = (5, 2.5)) y.backward() d2l.plot(x, x.grad, 'x', 'grad of relu', figsize = (5, 2.5)) #Plot Sigmoid function with autograd.record(): y = npx.sigmoid(x) d2l.plot(x, y, 'x', 'sigmoid(x)', figsize = (5, 2.5)) y.backward() d2l.plot(x, x.grad, 'x', 'grad of sigmoid', figsize = (5, 2.5)) #Plot tanh function with autograd.record(): y = np.tanh(x) #npx doesnt have tanh function d2l.plot(x, y, 'x', 'tanh(x)', figsize = (5, 2.5)) y.backward() d2l.plot(x, x.grad, 'x', 'grad of tanh', figsize = (5, 2.5)) #Calculate the derivative of the pReLU activation function #with autograd.record(): # y = npx.relu(x) + 0.01 * min(0, x) #d2l.plot(x, y, 'x', 'prelu(x)', figsize = (5, 2.5))
def forward(self, positive, negative): distances = positive - negative loss = - np.sum(np.log(npx.sigmoid(distances)), 0, keepdims=True) return loss