def net(X): """ two layers of fully connected NN """ X = X.reshape(-1, num_inputs) H = relu(np.dot(X, W1) + b1) return np.dot(H, W2) + b2
def rnn(inputs, hidden_states, params): ''' inputs shape: (num_steps[seq-len],batch_size,vocab_size) return: outputs,(H,) ''' W_xh, W_hh, b_h, W_ho, b_o = params H, = hidden_states outputs = [] hidden_states = [] # X shape: (batch_size,vocab_size) print( f"rnn loops {inputs.shape[0]} times along seq_length axis---------\n") i = 1 for X in inputs: # 沿着num_steps(sequence length)循环 print(f"loops {i} times \n") i += 1 H = mxnp.dot(X, W_xh) + mxnp.dot(H, W_hh) + b_h H = mxnp.tanh(H) hidden_states.append(H) print( f"---rnn input(X,H) and weights' shape---------\n" f" ---X.shape={X.shape},W_xh.shape={W_xh.shape}\n" f" ---H.shape={H.shape},W_hh.shape={W_hh.shape},b_h.shape={b_h.shape}\n" f" ---W_ho.shape={W_ho.shape},b_o.shape={b_o.shape}\n") Y = mxnp.dot(H, W_ho) + b_o outputs.append(Y) print(f"---rnn output's shape---------\n" f" ---Y.shape={Y.shape},H.shape={H.shape}\n") Ys = mxnp.concatenate(outputs, axis=0) print(f"Final Ys.shape={Ys.shape}") return Ys, (H, ), hidden_states, outputs
def net(X): X = X.reshape(-1, num_inputs) H1 = npx.relu(np.dot(X, W1) + b1) if autograd.is_training(): H1 = dropout(H1, drop_prob1) H2 = npx.relu(np.dot(H1, W2) + b2) if autograd.is_training(): H2 = dropout(H2, drop_prob2) return np.dot(H2, W3) + b3
def test_np_dot(): shapes = [ ((3, 0), (0, 4)), ((3, ), (3, )), # Case 1 ((3, 4), (4, 5)), # Case 2 ((), ()), # Case 3 ((3, 4, 5), ()), # Case 3.5.1 ((), (3, 4, 5)), # Case 3.5.2 ((3, 4, 5), (5, )), # Case 4 ((3, 4, 5), (5, 2)), # Case 5 ((5, ), (5, 2)), ((3, 5, 4), (5, 4, 3)), ((3, 4), (5, 4, 3)), ((4, ), (5, 4, 3)) ] eps = 1e-3 for shape_a, shape_b in shapes: np_a = _np.random.uniform(-1.0, 1.0, shape_a) np_a[abs(np_a) < eps] = 2 * eps np_b = _np.random.uniform(-1.0, 1.0, shape_b) np_b[abs(np_b) < eps] = 2 * eps a = mx.nd.array(np_a) b = mx.nd.array(np_b) np_res = _np.dot(np_a, np_b) mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray()) assert mx_res.shape == np_res.shape assert_almost_equal(np_res, mx_res.asnumpy(), rtol=1e-5, atol=1e-5) mx_a = mx.sym.Variable("a") mx_b = mx.sym.Variable("b") mx_sym = mx.sym.np.dot(mx_a.as_np_ndarray(), mx_b.as_np_ndarray()).as_nd_ndarray() if (len(shape_a) > 0 and len(shape_b) > 0 and _np.prod(shape_a) > 0 and _np.prod(shape_b) > 0): check_numeric_gradient(mx_sym, { "a": a, "b": b }, numeric_eps=eps, rtol=1e-2, atol=1e-3) bad_shapes = [((4, 5), (2, 3)), ((3, 4, 5), (6, ))] for shape_a, shape_b in bad_shapes: a = mx.nd.array( random.random()) if len(shape_a) == 0 else rand_ndarray(shape_a) b = mx.nd.array( random.random()) if len(shape_b) == 0 else rand_ndarray(shape_b) try: mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray()) except mx.base.MXNetError: continue assert False
def corr2d_multi_in_out_1x1(X, K): c_i, h, w = X.shape c_o = K.shape[0] X = X.reshape((c_i, h * w)) K = K.reshape((c_o, c_i)) Y = np.dot(K, X) # Matrix multiplication in the fully-connected layer return Y.reshape((c_o, h, w))
def synthetic_data(w, b, num_examples): #@save """Generate y = Xw + b + noise.""" X = np.random.normal(0, 1, (num_examples, len(w))) #print(X) y = np.dot(X, w) + b y += np.random.normal(0, 0.01, y.shape) return X, y.reshape((-1, 1))
def forward(self, X): X = self.dense(X) X = npx.relu(np.dot(X, self.rand_weight.data()) + 1) X = self.dense(X) while np.abs(X).sum() > 1: X /= 2 return X.sum()
def compute_linear_regression( features: np.array, weights: np.array, bias: int ) -> np.array: """ Linear regression implementation. Equal to: X*w + b """ return np.dot(features, weights) + bias
def synthetic_data(w, b, num_examples) -> tuple: """ generate y = X w + b + noise """ X = np.random.normal(0, 1, (num_examples, len(w))) y = np.dot(X, w) + b y += np.random.normal(0, 0.01, y.shape) return X, y
def test_dot(): A = np.ones((1, INT_OVERFLOW), dtype='float32') B = np.ones((INT_OVERFLOW, 1), dtype='float32') A.attach_grad() with mx.autograd.record(): C = np.dot(A, B) assert_almost_equal(C.asnumpy(), [INT_OVERFLOW], rtol=1e-5, atol=1e-5) C.backward() assert A.grad.shape == (1, INT_OVERFLOW)
def forward(self, X): ''' Calculates tensor reduction with following steps : * Step 1 : A_ik = Sum_j W_ijk . X_j * Step 2 : Sum_i A_ik . X_i INPUT : * X : (i,j) OUTPUT : array of dimension k ''' Xi = X[:self._dim_i] Xj = X[self._dim_i:self._dim_i + self._dim_j] Wki = np.zeros((dim_k, dim_i)) for k in range(dim_k): Wk_ij = self._weight.data()[:, :, k] #(i,j) Wki[k, :] = np.dot(Wk_ij, Xj) #(k,i) Wki.shape, Xj.shape self._reducedWeight.data()[:] = np.dot(Wki, Xi) return self._reducedWeight.data()
def corr2d_multi_in_out_2x2(X, K): c_i, h, w = X.shape c_o, c_ii, kh, kw = K.shape assert c_ii == c_i, "Kernel channel dimensions don't match input" X = X.reshape((c_i, h * w)) K = K.reshape((c_o, kh * kw, c_i)) Y = np.dot(K, X) # Matrix multiplication in the fully-connected layer Y = Y.sum(axis=2) # input channel dimension return Y.reshape((c_o, kh, kw))
def forward(self, inp): # pylint: disable=arguments-differ """ Parameters ---------- inp Shape (...,) Returns ------- out Shape (..., units) """ if self._div_val == 1.0: emb = np.take(getattr(self, 'embed0_weight').data(), inp, axis=0) if self._units != self._embed_size: emb = np.dot(emb, getattr(self, 'inter_proj0_weight').data()) else: emb = None for i, (l_idx, r_idx) in enumerate( zip([0] + self._cutoffs, self._cutoffs + [self._vocab_size])): emb_i = np.take(getattr(self, 'embed{}_weight'.format(i)).data(), inp - l_idx, axis=0, mode='clip') emb_i = np.dot( emb_i, getattr(self, 'inter_proj{}_weight'.format(i)).data()) if emb is None: emb = emb_i else: emb = np.where( np.expand_dims((inp >= l_idx) * (inp < r_idx), axis=-1), emb_i, emb) if self._scaled: emb = emb * self._emb_scale return emb
def products(A): x = np.arange(4) y = np.ones(4) print("x . y : {}, {}".format(np.dot(x, y), np.sum(x * y))) print("A . x : {} has shape {}".format(np.dot(A, x), np.dot(A, x).shape)) B = np.ones(shape=(4, 3)) print("A . B : {} has shape {}".format(np.dot(A, B), np.dot(A, B).shape)) print("{}.{} has shape {}".format(A.shape, B.shape, np.dot(A, B).shape))
def synthetic_regression_data(w, b, num_examples): """Used to make synthetic data on the run Args: w (list): list of weights for independent variables b (int): some value for bias num_examples (int): number of observations Returns X, Y (independent data, target data): synthetic data and target data """ X = np.random.normal(0, 1, (num_examples, len(w))) y = np.dot(X, w) + b y += np.random.normal(0, 0.01, y.shape) return X, y.reshape((-1, 1))
def generate_synthetic_data( weights: np.array, bias: float, num_examples: int ) -> Tuple[np.array, np.array]: """ Generate synthetic data that represents: y = X*w + b + noise """ # Create a random noraml distribution between 0 and 1 that have a shape of # num_examples multiplied by the number of weights.j features = np.random.normal(0, 1, (num_examples, len(weights))) # Compute the real linear regression. targets = np.dot(features, weights) + bias # Add noise to all of our targets with a standard deviation of at most # 0.01, making our problem relatively easy. targets += np.random.normal(0, 0.01, targets.shape) return features, targets
# sum_c_without_dims = A.sum(axis=[0, 1]) # # print(sum_C) # print(sum_C.shape) # print(sum_c_without_dims) # print(sum_c_without_dims.shape) # # print(A/sum_c_without_dims) # print(A/sum_C) # print(A.cumsum(axis=0)) # print(A.cumsum(axis=1)) # dot product # x = np.ones(4) # y = np.arange(4) # # print(x) # print(y) # # print(np.dot(x, y)) # Matrix vector product A = np.arange(20).reshape(5, 4) B = np.ones(4) print(A) print(B) print(np.dot(A, B))
def net(X): internal = np.dot(X.reshape(-1, num_inputs), W) + b return softmax(internal)
W1 = np.array([[0.9, 0.3], [-0.7, 0.3]]) W2 = np.array([-0.3, -0.9]) b1 = np.array([0.9,-0.7]) b2 = np.array([-0.7]) params = [W1, b1, W2, b2] for param in params: param.attach_grad() X = np.array([[1, 1], [0, 1], [0, 0], [1, 0]]) X.attach_grad() y_true = np.array([1, -1, 1, -1]) for i in range(len(y_true)): with autograd.record(): H = np.tanh(np.dot(W1, X[i]) + b1) O = np.tanh(np.dot(W2, H) + b2) L = (y_true[i] - O) ** 2 L = 1/2 * L L.backward() W1 -= W1.grad W2 -= W2.grad b1 -= b1.grad b2 -= b2.grad print('iteration:', i+1) print('true label', y_true[i]) print('input', X[i]) print('predicted label:', O) print('updated W1', W1) print('updated b1', b1) print('updated W2', W2)
def net(X): return softmax(np.dot(X.reshape((-1, W.shape[0])), W) + b)
def gram_matrix(x): _, d, h, w = x.shape x = x.reshape(d, h * w) gram = np.dot(x, x.T) / (d * h * w) return gram
def linreg(X,w,b): return np.dot(X,w)+b
def linreg(X, w, b): #@save """The linear regression model.""" return np.dot(X, w) + b
def net(X): return softmax(np.dot(X.reshape(-1, num_inputs), W) + b)
def net(X): X = X.reshape(-1, num_inputs) H = relu(np.dot(X, W1) + b1) return np.dot(H, W2) + b2
sum_A = A.sum(axis=1, keepdims=True) # for instance, siince sum_A still keeps its 2 axes after summing each row, we can divide A by sum_A with broadcasing. A / sum_A # we can call the cumsum function # this function will not reduce the input tensor along any axis. A.cumsum(axis=0) ############### 2.3.7. Dor Products ############### y = np.ones(4) x y np.dot(x, y) # we can express the dot product of two vectors equivalently by performing an elementwise multiplication and then a sum: np.sum(x * y) ############### 2.3.8. Matrix-Vector Products ############### # we can begin to understand matrix-vector products A.shape, x.shape, np.dot(A, x) ############### 2.3.9. Matrix-Matrix Multiplication ############### # if you have gotten the hang of dot products and matrix-vector products, then matrix-matrix multiplication should be straightforward. B = np.ones(shape=(4, 3))
def synthetic_data(w, b, num_examples): X = np.random.normal(0, 1, (num_examples, len(w))) y = np.dot(X, w) + b y += np.random.normal(0, 0.01, y.shape) # Adding noise :) return X, y
import math from mxnet import np, npx, gluon, autograd from mxnet.gluon import nn from d2l import mxnet as d2l npx.set_np() #とりあえずNo1だけについて W1 = np.array([[0.9, 0.3, 0.9], [-0.7, 0.3, -0.7]]) W2 = np.array([-0.3, -0.9, -0.7]) b1 = np.array([1]) b2 = np.array([1]) params = [W1, b1, W2, b2] for param in params: param.attach_grad() X = np.array([1, 1, 1]) X.attach_grad() y_true = np.array([1]) with autograd.record(): H = np.tanh(np.dot(W1, X)) O = np.tanh(np.dot(W2, np.append(H, np.array([1])))) L = (y_true - O)**2 L = 1 / 2 * L L.backward() print('predicted value:', O) print('updated W1', W1 - W1.grad) print('updated W2', W2 - W2.grad)
def net(X): X = X.reshape((-1, num_inputs)) H1 = relu(np.dot(X, W1) + b1) H2 = relu(np.dot(H1, W2) + b2) return np.dot(H2, W3) + b3
def logreg(X, w, b): #@save """The logistic regression model.""" return 1 / (1 + np.exp(np.dot(X, w.T))) + b