Example #1
0
def rnn(inputs, hidden_states, params):
    '''
    inputs shape: (num_steps[seq-len],batch_size,vocab_size)
    return:
          outputs,(H,)
    '''
    W_xh, W_hh, b_h, W_ho, b_o = params
    H, = hidden_states
    outputs = []
    hidden_states = []
    # X shape: (batch_size,vocab_size)
    print(
        f"rnn loops  {inputs.shape[0]} times along seq_length axis---------\n")
    i = 1
    for X in inputs:  # 沿着num_steps(sequence length)循环
        print(f"loops  {i} times \n")
        i += 1
        H = mxnp.dot(X, W_xh) + mxnp.dot(H, W_hh) + b_h
        H = mxnp.tanh(H)
        hidden_states.append(H)

        print(
            f"---rnn input(X,H) and weights' shape---------\n"
            f"   ---X.shape={X.shape},W_xh.shape={W_xh.shape}\n"
            f"   ---H.shape={H.shape},W_hh.shape={W_hh.shape},b_h.shape={b_h.shape}\n"
            f"   ---W_ho.shape={W_ho.shape},b_o.shape={b_o.shape}\n")

        Y = mxnp.dot(H, W_ho) + b_o
        outputs.append(Y)
        print(f"---rnn output's shape---------\n"
              f"   ---Y.shape={Y.shape},H.shape={H.shape}\n")
    Ys = mxnp.concatenate(outputs, axis=0)
    print(f"Final Ys.shape={Ys.shape}")
    return Ys, (H, ), hidden_states, outputs
Example #2
0
 def forward(self, x):
     if self._mode == 'erf':
         return npx.leaky_relu(x, act_type='gelu')
     elif self._mode == 'tanh':
         return 0.5 * x\
                * (1.0 + np.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * (x ** 3))))
     elif self._mode == 'sigmoid':
         return x * npx.sigmoid(1.702 * x)
     else:
         raise NotImplementedError
Example #3
0
 def forward(self, queries, keys, values, valid_lens):
     queries, keys = self.W_q(queries), self.W_k(keys)
     # After dimension expansion, shape of `queries`: (`batch_size`, no. of
     # queries, 1, `num_hiddens`) and shape of `keys`: (`batch_size`, 1,
     # no. of key-value pairs, `num_hiddens`). Sum them up with
     # broadcasting
     features = np.expand_dims(queries, axis=2) + np.expand_dims(
         keys, axis=1)
     features = np.tanh(features)
     # There is only one output of `self.w_v`, so we remove the last
     # one-dimensional entry from the shape. Shape of `scores`:
     # (`batch_size`, no. of queries, no. of key-value pairs)
     scores = np.squeeze(self.w_v(features), axis=-1)
     self.attention_weights = masked_softmax(scores, valid_lens)
     # Shape of `values`: (`batch_size`, no. of key-value pairs, value
     # dimension)
     return npx.batch_dot(self.dropout(self.attention_weights), values)
Example #4
0
    def forward(self, X, state):
        w_ih, w_ir, w_iu, w_hh, w_hr, w_hu, b_h, b_r, b_u, w_ho, b_o = self.params
        state = state[0]
        outputs = []
        for x in X:
            r = npx.sigmoid(x @ w_ir + state @ w_hr + b_r)  # reset gate权重
            u = npx.sigmoid(x @ w_iu + state @ w_hu + b_u)  # update gate权重

            hr = state * r  # hidden state resets
            h_tilda = mxnp.tanh(x @ w_ih + hr @ w_hh +
                                b_h)  # 输入x  重置后的隐藏状态hr 乘以各自对应的权重矩阵,得到候选隐藏状态

            state = state * u + h_tilda * (1 - u)  # 更新【隐藏状态 候选隐藏状态】

            y = state @ w_ho + b_o  # 计算输出
            outputs.append(y)
        return mxnp.concatenate(outputs, axis=0), (state, )
Example #5
0
#Plot ReLu function
x = np.arange(-8.0, 8.0, 0.1)
x.attach_grad()
with autograd.record():
    y = npx.relu(x)
d2l.plot(x, y, 'x', 'relu(x)', figsize = (5, 2.5))
y.backward()
d2l.plot(x, x.grad, 'x', 'grad of relu', figsize = (5, 2.5))

#Plot Sigmoid function
with autograd.record():
    y = npx.sigmoid(x)
d2l.plot(x, y, 'x', 'sigmoid(x)', figsize = (5, 2.5))
y.backward()
d2l.plot(x, x.grad, 'x', 'grad of sigmoid', figsize = (5, 2.5))

#Plot tanh function
with autograd.record():
    y = np.tanh(x) #npx doesnt have tanh function
d2l.plot(x, y, 'x', 'tanh(x)', figsize = (5, 2.5))
y.backward()
d2l.plot(x, x.grad, 'x', 'grad of tanh', figsize = (5, 2.5))

#Calculate the derivative of the pReLU activation function
#with autograd.record():
#    y = npx.relu(x) + 0.01 * min(0, x)
#d2l.plot(x, y, 'x', 'prelu(x)', figsize = (5, 2.5))
#y.backward()
#d2l.plot(x, x.grad, 'x', 'grad of relu', figsize = (5, 2.5))
Example #6
0
W1 = np.array([[0.9, 0.3], [-0.7, 0.3]])
W2 = np.array([-0.3, -0.9])
b1 = np.array([0.9,-0.7])
b2 = np.array([-0.7])
params = [W1, b1, W2, b2]
for param in params:
    param.attach_grad()

X = np.array([[1, 1], [0, 1], [0, 0], [1, 0]])
X.attach_grad()
y_true = np.array([1, -1, 1, -1])

for i in range(len(y_true)):
    with autograd.record():
        H = np.tanh(np.dot(W1, X[i]) + b1)
        O = np.tanh(np.dot(W2, H) + b2)
        L = (y_true[i] - O) ** 2
        L = 1/2 * L
    L.backward()
    W1 -= W1.grad
    W2 -= W2.grad
    b1 -= b1.grad
    b2 -= b2.grad
    print('iteration:', i+1)
    print('true label', y_true[i])
    print('input', X[i])
    print('predicted label:', O)
    print('updated W1', W1)
    print('updated b1', b1)
    print('updated W2', W2)
Example #7
0
import math
from mxnet import np, npx, gluon, autograd
from mxnet.gluon import nn
from d2l import mxnet as d2l
npx.set_np()

#とりあえずNo1だけについて
W1 = np.array([[0.9, 0.3, 0.9], [-0.7, 0.3, -0.7]])
W2 = np.array([-0.3, -0.9, -0.7])
b1 = np.array([1])
b2 = np.array([1])
params = [W1, b1, W2, b2]
for param in params:
    param.attach_grad()

X = np.array([1, 1, 1])
X.attach_grad()
y_true = np.array([1])

with autograd.record():
    H = np.tanh(np.dot(W1, X))
    O = np.tanh(np.dot(W2, np.append(H, np.array([1]))))
    L = (y_true - O)**2
    L = 1 / 2 * L
L.backward()
print('predicted value:', O)
print('updated W1', W1 - W1.grad)
print('updated W2', W2 - W2.grad)
Example #8
0
import math
from mxnet import np, npx, gluon, autograd
from mxnet.gluon import nn
from d2l import mxnet as d2l
npx.set_np()

initial_w1 = np.array([[0.9, 0.3, 0.9], [-0.7, 0.3, -0.7]])
initial_w2 = np.array([-0.3, -0.9, -0.7])

true_labels = np.array([1, -1, 1, -1])
inputs = np.array([[1, 1, 1], [0, 1, 1], [0, 0, 1], [1, 0, 1]])

#No1についての予測, 以下No. n のm層目に関してh_n_m などと記述
h_1_1 = np.dot(initial_w1, inputs[0])
z_1_1 = np.append(np.tanh(h1), np.array([1]))
z_1_3 = np.dot(initial_w2, z_1_1)
y_hat_1 = np.tanh(z_1_3)

y_hat_1

#No1についてのロス