def test_sliding2d(): """ Input: A (padded) valid Tensor for slides. [width, height] width_idx height_idx stride Output: A sliding Tensor [K, K], K = kernel_size """ n_samples = 2 width = 4 height = 5 a = Tensor(np.random.randn(n_samples, width, height), requires_grad=True) a.print() kernel_size = 2 # Symmetric Squared stride = 1 width_idx = 0 height_idx = 1 b = slide.Sliding2D(width_idx=width_idx, height_idx=height_idx, kernel_size=kernel_size, stride=stride)(a) b.print() b.backward() print(a.grad)
def test_convcore2d(): # input n_input_channel = 3 input_width = 5 input_height = 5 # filters n_output_channel = 2 # n_filter kernel_size = 5 stride = 1 padding = 1 n_samples = 7 W = Tensor(np.random.randn(kernel_size, kernel_size), requires_grad=True) X = Tensor(np.random.randn(n_samples,input_width, input_height), requires_grad=True) Y_pred = conv.ConvCore2D(n_input_channel=n_input_channel, input_width=input_width, input_height=input_height, n_output_channel=n_output_channel, kernel_size=kernel_size, stride=stride, padding=padding, W=W)(X) #Y_pred.print() Y_pred.backward() print(X.grad)
def test_padding2d(): n_samples = 4 width = 2 height = 3 a = Tensor(np.random.randn(n_samples, width, height), requires_grad=True) a.print() padding = 1 b = pad.Padding2D(padding=padding)(a) b.print() b.backward() print(a.grad)
def test_conv2d(): # input n_input_channel = 3 input_width = 28 input_height = 28 # filters n_output_channel = 2 # n_filter kernel_size = 5 stride = 2 padding = 1 n_samples = 7 X = Tensor(np.random.randn(n_samples, n_input_channel, input_width, input_height), requires_grad=True) Y_pred = conv.Conv2D(n_input_channel=n_input_channel, input_width=input_width, input_height=input_height, n_output_channel=n_output_channel, kernel_size=kernel_size, stride=stride, padding=padding)(X) #Y_pred.print() Y_pred.backward() print(X.grad) # Y_pred = ReLU()(Y_pred) """
def forward(self, *args): assert len(args) == 2 assert isinstance(args[0], Tensor) assert isinstance(args[1], Tensor) self.A = args[0] # Y self.B = args[1] # Y_pred assert self.A.data.shape == self.B.data.shape # loss = .5 * ((Y_pred - Y) ** 2) / n_samples n_samples = self.A.data.shape[0] loss_value = 0.5 * (np.sum((self.B.data - self.A.data) ** 2))\ / n_samples C = Tensor(loss_value) C.name = self.name C.grad_fn = self # A = Y is the label, which is constant. self.A.requires_grad = False # B = Y_pred if self.B.requires_grad: C.requires_grad = True self.A.parent = C self.B.parent = C C.left_child = self.A C.right_child = self.B return C
def forward(self, *args): assert len(args) == 2 assert isinstance(args[0], Tensor) assert isinstance(args[1], Tensor) self.A = args[0] self.B = args[1] # Currrently, A is the batch samples assert self.A.data.shape[1:] == self.B.data.shape C = Tensor(self.A.data * self.B.data) # In numpy, * means element-wise multiply C.name = self.name C.grad_fn = self if self.A.requires_grad or self.B.requires_grad: C.requires_grad = True self.A.parent = C self.B.parent = C C.left_child = self.A C.right_child = self.B return C
def forward(self, *args): """ # Input X: [n_samples, width, height] (Padded Size) width_idx, height_idx, kernel_size, stride # Output Y_pred: [n_samples, K, K], kernel_size * kernel_size Y_pred = X[:, W_i*S:W_i*S+K, H_i*S:H_i*S+K] Y_pred = X [:, width_idx * stride : width_idx * stride + kernel_size, height_idx * stride : height_idx * stride + kernel_size] """ assert len(args) == 1 assert isinstance(args[0], Tensor) assert isinstance(args[0].data, np.ndarray) assert len(args[0].data.shape) == 3 X = args[0] self.X = X # 1.Save input tensors for current function Y_pred_data = self.X.data[:, self.width_idx * self.stride:self.width_idx * self.stride + self.kernel_size, self.height_idx * self.stride:self.height_idx * self.stride + self.kernel_size] Y_pred = Tensor(Y_pred_data) Y_pred.grad_fn = self # 3. Set grad_fn & requires_grad for current function if self.X.requires_grad: Y_pred.requires_grad = True Y_pred.left_child = X # 4. Set parent-child relationships. X.parent = Y_pred return Y_pred # 2. Return new Tensor
def test_Linear(): n_samples = 5 n_input = 4 n_output = 3 X = Tensor(np.random.randn(n_samples, n_input), requires_grad=False, name='X') Y = Tensor(np.random.randn(n_samples, n_output), name='Y') model = linear.Linear(n_input, n_output, bias=True) loss_fn = mse.MSELoss() optim = sgd.SGD(lr=1e-3) model.compile(loss_fn=loss_fn, optimizer=optim) model.fit(X, Y, verbose=0, epochs=100) print('Linear best_epoch=%s, min_loss=%.4f' % (model.best_epoch, model.min_loss_val))
def forward(self, *args): assert len(args) == 1 assert isinstance(args[0], Tensor) assert self.repeat_time > 0 self.A = args[0] C_data = np.repeat(self.A.data, self.repeat_time) if self.target_shape: C_data = C_data.reshape(self.target_shape) C = Tensor(C_data) C.grad_fn = self C.left_child = self.A self.A.parent = C self.output_shape = C_data.shape if self.A.requires_grad: C.requires_grad = True return C
def forward(self, *args): assert len(args) == 1 assert isinstance(args[0], Tensor) self.A = args[0] C_data = np.sum(self.A.data, axis=self.axis) if isinstance(self.target_shape, tuple): C_data = C_data.reshape(self.target_shape) self.output_shape = C_data.shape C = Tensor(C_data) C.name = self.name C.grad_fn = self if self.A.requires_grad: C.requires_grad = True self.A.parent = C # self.B.parent = C C.left_child = self.A #C.right_child = self.B return C
def forward(self, *args): assert len(args) == 2 assert isinstance(args[0], Tensor) assert isinstance(args[1], Tensor) self.A = args[0] self.B = args[1] assert self.shape == self.B.data.shape C_data = self.A.data set_sub_ndarray(C_data, self.B.data, self.coordinate_tuple) assert C_data.shape == self.A.data.shape C = Tensor(C_data) C.left_child = self.A C.right_child = self.B self.output_shape = C_data.shape C.grad_fn = self self.A.parent = C self.B.parent = C if self.A.requires_grad or self.B.requires_grad: C.requires_grad = True return C
def forward(self, *args): """ # Input X: [n_samples, width, height] # Output Y_pred: [n_samples, width + 2P, height + 2P] """ assert len(args) == 1 assert isinstance(args[0], Tensor) assert isinstance(args[0].data, np.ndarray) assert len(args[0].data.shape) == 3 n_samples, width, height = args[0].data.shape self.n_samples = n_samples X = args[0] self.X = X # 1.Save input tensors for current function P = self.padding # !!! Do Zero Padding Here Y_pred_data = np.zeros((self.n_samples, width + 2 * P, height + 2 * P)) # Copy X.data into Y, leave paddings in the around. if P == 0: Y_pred_data = X.data else: Y_pred_data[:, P:-P, P:-P] = X.data Y_pred = Tensor(Y_pred_data) Y_pred.grad_fn = self # 3. Set grad_fn & requires_grad for current function if self.X.requires_grad: Y_pred.requires_grad = True Y_pred.left_child = X # 4. Set parent-child relationships. X.parent = Y_pred return Y_pred # 2. Return new Tensor
def forward(self, *args, **kwargs): assert len(args) == 1 assert isinstance(args[0], Tensor) assert isinstance(args[0].data, np.ndarray) self.A = args[0] self.shape = self.A.data.shape C_data = self.A.data.reshape(self.target_shape) C = Tensor() C.data = C_data C.left_child = self.A # C.right_child = self.B C.grad_fn = self self.A.parent = C #self.B.parent = C if self.A.requires_grad: C.requires_grad = True return C
def test_Dense(): n_samples = 5 n_input = 4 n_output = 3 X = Tensor(np.random.randn(n_samples, n_input), requires_grad=False, name='X') Y = Tensor(np.random.randn(n_samples, n_output), name='Y') model = dnn.Dense(n_input, n_output, bias=True) loss_fn = mse.MSELoss() optim = sgd.SGD(lr=1e-3) activation = relu.ReLU() model.compile(loss_fn=loss_fn, optimizer=optim, activation=activation) model.fit(X, Y, verbose=0, epochs=100) print('Dense best_epoch=%s, min_loss=%.4f' % (model.best_epoch, model.min_loss_val))
def test_sgd(): n_samples = 5 n_input = 4 X_1 = Tensor(np.random.randn(n_samples, n_input), requires_grad=True, name='X_1') X_2 = Tensor(np.random.randn(n_samples, n_input), requires_grad=True, name='X_2') Y = Tensor(np.random.randn(n_samples, n_input), requires_grad=False, name='Y') Y_pred = Add()(X_1, X_2) loss_ = mse.MSELoss()(Y, Y_pred) loss_.backward() old_x1 = X_1.data old_x2 = X_2.data optim = sgd.SGD(loss_, lr=1e-1) optim.step() new_x1 = X_1.data new_x2 = X_2.data print("=" * 10) print(old_x1, '\n') print(new_x1, '\n') print("=" * 10) print(old_x2, '\n') print(new_x2, '\n') """
def test_mse_loss(): n_samples = 5 n_output = 4 Y = Tensor(np.random.randn(n_samples, n_output), name='Y') Y_pred = Tensor(np.random.randn(n_samples, n_output), requires_grad=True, name='Y_pred') loss_ = mse.MSELoss('loss')(Y, Y_pred) Y.print() Y_pred.print() loss_.print() loss_.backward() print(Y_pred.grad)
def forward(self, *args): assert len(args) == 2 assert isinstance(args[0], Tensor) assert isinstance(args[1], Tensor) self.A = args[0] self.B = args[1] assert self.A.data.shape == self.B.data.shape C = Tensor(self.A.data - self.B.data) C.name = self.name C.grad_fn = self if self.A.requires_grad or self.B.requires_grad: C.requires_grad = True self.A.parent = C self.B.parent = C C.left_child = self.A C.right_child = self.B return C
def forward(self, *args): assert len(args) == 1 assert isinstance(args[0], Tensor) self.A = args[0] # Sigmoid: f(x) = sigmoid(x) C = Tensor(sigmoid(self.A.data)) C.name = self.name C.grad_fn = self if self.A.requires_grad: C.requires_grad = True self.A.parent = C C.left_child = self.A self.C = C return C
def forward(self, *args): assert len(args) == 1 assert isinstance(args[0], Tensor) self.A = args[0] # ReLU: f(x) = max(0, x) # For numpy, relu(x) = x * (x > 0), relu_grad(x) = 1 * (x > 0) #C = Tensor(np.clip(self.A.data, a_min=0, a_max=np.Infinity)) C = Tensor(self.A.data * (self.A.data > 0)) C.name = self.name C.grad_fn = self if self.A.requires_grad: C.requires_grad = True self.A.parent = C C.left_child = self.A return C
def forward(self, *args): assert len(args) == 1 assert isinstance(args[0], Tensor) self.A = args[0] C_data = get_sub_ndarray(self.A.data, self.coordinate_tuple) C = Tensor() C.data = C_data C.left_child = self.A # C.right_child = self.B C.grad_fn = self self.A.parent = C #self.B.parent = C if self.A.requires_grad: C.requires_grad = True return C
def forward(self, *args, **kwargs): """ # ConvCore2D (Convolution on a 2-D array of X.data, By a kernel W) ## Input: X, [n_samples, input_width, input_height], ## Output: Y [n_samples, output_width, output_height] W [K, K], weights of one channel. padding_X = Padding2D(padding, channel_idx)(X) [n_samples, input_width + 2P, input_height + 2P] Y_ij = Sliding2D(i, j, stride, channel_idx)(padding_X) [K, K] Y = SetSubTensor(i, j)(Y_ij) """ assert len(args) == 1 assert isinstance(args[0], Tensor) X = args[0] #assert X.data.shape[1:] == (self.input_width, self.input_height) assert isinstance(self.W, Tensor) assert isinstance(self.W.data, np.ndarray) assert self.W.data.shape == (self.kernel_size, self.kernel_size) output_width = int((self.input_width - self.kernel_size + 2 * self.padding) \ / self.stride + 1) output_height = int((self.input_height - self.kernel_size + 2 * self.padding) \ / self.stride + 1) n_samples = X.data.shape[0] # Y_pred: [n_samples, output_width, output_height] Y_pred = Tensor(np.zeros((n_samples, output_width, output_height))) # X: [n_samples, input_width, input_height] # padding_X: [n_samples, input_width+2P, input_weight+2P] padding_X = Padding2D(padding=self.padding)(X) assert padding_X.data.shape == (n_samples, self.input_width + 2 * self.padding, self.input_height + 2 * self.padding) for i in range(output_width): for j in range(output_height): # sub_X: [n_samples, K, K] sub_X = Sliding2D(width_idx=i, height_idx=j, stride=self.stride, kernel_size=self.kernel_size)(padding_X) assert sub_X.data.shape == (n_samples, self.kernel_size, self.kernel_size) # sub_X: [n_samples, K, K] # W: [K, K] # Y_pred_ij: [n_samples, K, K] # Rely on Right-align Broadcast of numpy in `ElementWiseMul`. Y_pred_ij = BatchElementWiseMul()(sub_X, self.W) assert Y_pred_ij.data.shape == (n_samples, self.kernel_size, self.kernel_size) # Y_pred_ij: [n_samples, 1, 1] target_shape = (n_samples, 1, 1) Y_pred_ij = Sum(axis=(1, 2), target_shape=target_shape)(Y_pred_ij) assert Y_pred_ij.data.shape == (n_samples, 1, 1) # Y_pred: [n_samples, output_width, output_height] # Y_pred_ij: [n_samples, 1, 1] coord_tuple = ((0, n_samples), (i, i + 1), (j, j + 1)) Y_pred = SetSubTensor(coord_tuple)(Y_pred, Y_pred_ij) assert Y_pred.data.shape == (n_samples, output_width, output_height) return Y_pred
def forward(self, *args): """ # Dimension Computation Rule > output_dim = (N - K + 2P) / S + 1 > output_dim: output width or height > N: input_dim (input width or height) > K: filter_size, kernel_size > S: stride > P: padding # Input X: [n_samples, n_input_channel, input_width, input_height] # Output Y: [n_samples, n_output_channel, output_width, output_height] # Parameters W = n_output_channel * (K * K) [n_output_channel, K, K] b = n_output_channel * 1 [n_output, 1] total_n_parameters = n_output_channel(n_filters) * (kernel_size * kernel_size + 1 if is_bias else 0) output_width = (input_width - K + 2P) / S + 1 output_height = (input_height - K + 2P) / S + 1 # ===============================Important!!!================================ # ===== Generated Process Y = SetSubTensor(i)([Y_i]), i = 0, 1, ..., n_output_channel-1, Y_i = ListAdd()([ A_j ]) + b_i j = 0, 1, ..., n_input_channel-1, ListAdd iterate over input channels. A_j = ConvCore2D ( X_j, W_i ), See Wikipedia for cross-correlation. # ===== Forward Rule """ assert len(args) == 1 assert isinstance(args[0], Tensor) X = args[0] #[n_samples, n_input_channel, input_width, input_height] self.n_samples = X.data.shape[0] self.output_width = int((self.input_width - self.kernel_size + 2 * self.padding) \ / self.stride + 1) self.output_height = int((self.input_height - self.kernel_size + 2 * self.padding) \ / self.stride + 1) # [n_samples, n_output_channel, output_width, output_height] Y_pred = Tensor(np.zeros((self.n_samples, self.n_output_channel, self.output_width, self.output_height)), requires_grad=True) for i in range(self.n_output_channel): Y_i = Tensor(np.zeros( (self.n_samples, self.output_width, self.output_height)), requires_grad=True) for j in range(self.n_input_channel): # X: [n_samples, n_input_channel, input_width, input_height] # X_j: [n_samples, 1, input_width, input_height] coord_tuple = ((0, self.n_samples), (j, j + 1), (0, self.input_width), (0, self.input_height)) X_j = GetSubTensor(coord_tuple)(X) # X_j: [n_samples, 1, input_width, input_height] # X_j(Reshaped): [n_samples, input_width, input_height] target_shape = (self.n_samples, self.input_width, self.input_height) X_j = Reshape(target_shape=target_shape)(X_j) # W: [n_output_channel, K, K] # W_i: [1, K, K] coord_tuple = ((i, i + 1), (0, self.kernel_size), (0, self.kernel_size)) W_i = GetSubTensor(coord_tuple)(self.W) # W_i: [K, K] target_shape = (self.kernel_size, self.kernel_size) W_i = Reshape(target_shape=target_shape)(W_i) assert W_i.data.shape == (self.kernel_size, self.kernel_size) # A_j: [n_samples, output_width, output_height] # W_i: [K, K] A_j = ConvCore2D(W=W_i, input_width=self.input_width, input_height=self.input_height, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)(X_j) assert A_j.data.shape == (self.n_samples, self.output_width, self.output_height) # Y_i: [n_samples, output_width, output_height] # A_j: [n_samples, output_width, output_height] Y_i = Add()(Y_i, A_j) """ # Actually, bias can be added in the very end of the # output_chanel. if self.is_bias: # b: [n_output_channel, 1] # b_i: [1, 1] coord_tuple = ((i, i+1), (0, 1)) b_i = GetSubTensor(coord_tuple)(self.b) # Y_i: [n_samples, output_width, output_height] # b_i: [1, 1] # Rely on broadcast of numpy in `Add`. # Here b_i is [1, 1], `Reshape` is not needed. Y_i = Add()(Y_i, b_i) """ # Y_pred = [n_sample, n_output_channel, output_width, output_height] # Y_i: [n_samples, output_width, output_height] coord_tuple = ((0, self.n_samples), (i, i + 1), (0, self.output_width), (0, self.output_height)) target_shape = (self.n_samples, 1, self.output_width, self.output_height) Y_i = Reshape(target_shape=target_shape)(Y_i) assert Y_i.data.shape == target_shape Y_pred = SetSubTensor(coord_tuple)(Y_pred, Y_i) if self.is_bias: # Y_pred: [n_samples, n_output_channel, output_width, output_height] # b: [n_output_channel, 1] repeat_time = self.output_width * self.output_height target_shape = (self.n_output_channel, self.output_width, self.output_height) b = Repeat(repeat_time=repeat_time, target_shape=target_shape)(self.b) assert b.data.shape == (self.n_output_channel, self.output_width, self.output_height) # Note: Rely on Broadcast of numpy. Y_pred = Add()(Y_pred, b) assert Y_pred.data.shape == (self.n_samples, self.n_output_channel, self.output_width, self.output_height) return Y_pred
def test_sequential(): n_samples = 5 n_input = 4 n_output = 3 n_tmp = 10 X = Tensor(np.random.randn(n_samples, n_input), requires_grad=False, name='X') Y = Tensor(np.random.randn(n_samples, n_output), requires_grad=False, name='Y') li = linear.Linear(n_input=n_input, n_output=n_output)(X) li_2 = linear.Linear(n_input=n_output, n_output=n_tmp)(li) Y_pred = linear.Linear(n_input=n_tmp, n_output=n_output)(li_2) #print(Y_pred.data) model = sequential.Sequential() linear_model = linear.Linear(n_input=n_input, n_output=n_output) linear_model_2 = linear.Linear(n_input=n_output, n_output=n_tmp) linear_model_3 = linear.Linear(n_input=n_tmp, n_output=n_output) model.add_model(linear_model) model.add_model(linear_model_2) model.add_model(linear_model_3) model.compile() model.fit(X, Y, epochs=100, verbose=0) print('Linear best_epoch=%s, min_loss=%.4f' % (model.best_epoch, model.min_loss_val)) model = sequential.Sequential() dense_model = dnn.Dense(n_input=n_input, n_output=n_output, activation='relu', lr=1e-2) dense_model_2 = dnn.Dense(n_input=n_output, n_output=n_tmp, activation='relu', lr=1e-2) dense_model_3 = dnn.Dense(n_input=n_tmp, n_output=n_output, activation='sigmoid') model.add_model(dense_model) model.add_model(dense_model_2) model.add_model(dense_model_3) model.compile() model.fit(X, Y, epochs=100, verbose=0) print('Dense best_epoch=%s, min_loss=%.4f' % (model.best_epoch, model.min_loss_val)) model = sequential.Sequential( dnn.Dense(n_input=n_input, n_output=n_output, activation='relu', lr=1e-2), dnn.Dense(n_input=n_output, n_output=n_tmp, activation='relu', lr=1e-2), dnn.Dense(n_input=n_tmp, n_output=n_output, activation='sigmoid')) #model.add_model(dense_model) #model.add_model(dense_model_2) #model.add_model(dense_model_3) model.compile() model.fit(X, Y, epochs=100, verbose=0) print('Dense best_epoch=%s, min_loss=%.4f' % (model.best_epoch, model.min_loss_val))
def random_init_tensor(shape, **kwargs): # Valid numpy shape, int or tuple of int # --- assert isinstance(shape, int) or isinstance(shape, tuple) return Tensor(np.random.random(shape), **kwargs)
def forward(self, *args): assert len(args) == 2 assert isinstance(args[0], Tensor) assert isinstance(args[1], Tensor) self.A = args[0] self.B = args[1] # May not have the same shape, use broadcast instead. # assert self.A.data.shape == self.B.data.shape if not isinstance(self.A.data, np.ndarray): C = Tensor(self.B.data) elif not isinstance(self.B.data, np.ndarray): C = Tensor(self.A.data) else: C = Tensor(self.A.data + self.B.data) C.name = self.name C.grad_fn = self if self.A.requires_grad or self.B.requires_grad: C.requires_grad = True self.A.parent = C self.B.parent = C C.left_child = self.A C.right_child = self.B self.output_shape = C.data.shape return C