def test_dnn(self): class dnn_mnist_model(nn.Module): def __init__(self): super(dnn_mnist_model, self).__init__() self.fc1 = nn.Linear(28 * 28, 256) self.fc2 = nn.Linear(256, 10) self.relu1 = nn.ReLU() self.relu2 = nn.ReLU() def forward(self, X): X = self.fc1(X) X = self.relu1(X) X = self.fc2(X) X = self.relu2(X) return X x, y, tx, ty = load_mnist() x = x.reshape((-1, BATCHSIZE, 28 * 28)) y = y.reshape((-1, BATCHSIZE, 1)) tx = tx.reshape((-1, 1, 28 * 28)) ty = ty.reshape((-1, 1, 1)) model = dnn_mnist_model() self.assertIsInstance(model, nn.Module) t_x = madml.tensor(x / 225.) t_y = madml.tensor(y).onehot(label_count=10) # loss_fn = nn.MSELoss() loss_fn = nn.CrossEntropyLoss(with_logit=True) optim = optimizer.Adam(model.parameters(), lr=1e-3) train_loop(model, loss_fn, optim, t_x, t_y) test_x = madml.tensor(tx / 255.) test_y = madml.tensor(ty) acc = test_loop(model, test_x, test_y) print(sum(acc) / len(acc))
def test_identity(): class identity_model(nn.Module): def __init__(self): super(identity_model, self).__init__() self.fc1 = nn.linear(32, 32, False) self.fc2 = nn.linear(32, 32, False) def forward(self, X): X = self.fc1(X) X = self.fc2(X) return X model = identity_model() print(model.parameters()) x = np.ones((2, 32)) t_x = madml.tensor(x) t_y = madml.tensor(x.copy()) loss_fn = nn.mseloss() optim = optimizer.adam(model.parameters(), lr=1e-2) for i in range(108): optim.zero_grad() logit = model(t_x) loss = loss_fn(logit, t_y) loss.backward() optim.step() print('===', i, logit.shape, loss.host_data, loss_fn.accuracy()) if i % 10 == 0: print(logit.host_data)
def test_identity(self): import madml import madml.nn as nn import madml.optimizer as optimizer class identity_model(nn.Module): def __init__(self): super(identity_model, self).__init__() self.fc1 = nn.linear(32, 32) self.fc2 = nn.linear(32, 32) def forward(self, X): X = self.fc1(X) X = self.fc2(X) return X model = identity_model() self.assertIsInstance(model, nn.Module) x = np.ones((2, 32)) t_x = madml.tensor(x) t_y = madml.tensor(x.copy()) loss_fn = nn.mseloss() optim = optimizer.adam(model.parameters(), lr=1e-2) for i in range(100): optim.zero_grad() logit = model(t_x) loss = loss_fn(logit, t_y) loss.backward() optim.step() print('===', i, logit.shape, loss.host_data, loss_fn.accuracy()) self.assertTrue(loss_fn.accuracy() > 0.9)
def test_cnn(self): import madml import madml.nn as nn import madml.optimizer as optimizer class cnn_mnist_model(nn.Module): def __init__(self): super(cnn_mnist_model, self).__init__() self.conv1 = nn.conv2d(1, 32, 3, padding=1) self.pool = nn.maxpool2d(2, 2) self.conv2 = nn.conv2d(32, 48, 3) self.fc1 = nn.linear(48 * 2 * 2, 120) # (599, 192) self.fc2 = nn.linear(120, 84) self.fc3 = nn.linear(84, 10) self.relu1 = nn.relu() self.relu2 = nn.relu() self.relu3 = nn.relu() self.relu4 = nn.relu() def forward(self, X): X = self.conv1(X) X = self.relu1(X) X = self.pool(X) # 32 x 14 x 14 X = self.conv2(X) # 46 x 12 x 12 X = self.relu2(X) X.flatten() X = self.fc1(X) X = self.relu3(X) X = self.fc2(X) X = self.relu4(X) X = self.fc3(X) return X BATCHSIZE = 599 x, y, = load_digits(return_X_y=True) tx, ty = x[:-100], y[:-100] x = x.reshape((-1, BATCHSIZE, 1, 8, 8)) y = y.reshape((-1, BATCHSIZE, 1)) tx = tx.reshape((-1, 1, 1, 8, 8)) ty = ty.reshape((-1, 1, 1)) model = cnn_mnist_model() self.assertIsInstance(model, nn.Module) t_x = madml.tensor(x / 1.) t_y = madml.tensor(y).onehot(label_count=10) # loss_fn = nn.MSELoss() loss_fn = nn.crossentropyloss(with_logit=True) optim = optimizer.adam(model.parameters(), lr=1e-3) train_loop(model, loss_fn, optim, t_x, t_y, epochs=30) test_x = madml.tensor(tx / 1.) test_y = madml.tensor(ty) acc = test_loop(model, test_x, test_y) print(sum(acc) / len(acc)) self.assertTrue(True)
def test_mnst_cnn(): class cnn_mnist_model(nn.Module): def __init__(self): super(cnn_mnist_model, self).__init__() self.conv1 = nn.conv2d(1, 32, 3, padding=1) self.conv2 = nn.conv2d(32, 32, 1, stride=2) self.conv3 = nn.conv2d(32, 48, 3) self.fc1 = nn.linear(48 * 2 * 2, 120) # (599, 192) self.fc2 = nn.linear(120, 84) self.fc3 = nn.linear(84, 10) self.relu1 = nn.relu() self.relu2 = nn.relu() self.relu3 = nn.relu() self.relu4 = nn.relu() #self.fc3.to(0) #self.fc2.to(0) #self.fc1.to(0) #self.conv1.to(0) #self.conv2.to(0) #self.conv3.to(0) def forward(self, X): X = self.conv1(X) X = self.relu1(X) X = self.conv2(X) X = self.conv3(X) X = self.relu2(X) X = madml.flatten(X) X = self.fc1(X) X = self.relu3(X) X = self.fc2(X) X = self.relu4(X) X = self.fc3(X) return X BATCHSIZE = 599 x, y, = load_digits(return_X_y=True) tx, ty = x[:-100], y[:-100] x = x.reshape((-1, BATCHSIZE, 1, 8, 8)) y = y.reshape((-1, BATCHSIZE, 1)) tx = tx.reshape((-1, 1, 1, 8, 8)) ty = ty.reshape((-1, 1, 1)) model = cnn_mnist_model() t_x = madml.tensor(x / 1.) t_y = madml.tensor(y).onehot(label_count=10) loss_fn = nn.mseloss() #loss_fn = nn.crossentropyloss(with_logit=True) optim = optimizer.adam(model.parameters(), lr=1e-3) train_loop(model, loss_fn, optim, t_x, t_y, epochs=30)
def test_crossentropy(self): x = np.random.rand(3, 5).astype(np.float32) labels = np.random.randint(0, high=5, size=(3, )) t1 = madml.tensor(x) target = madml.tensor(labels) module = nn.CrossEntropyLoss() loss = module.forward_cpu(t1, target) dx = module.backward_cpu() print(loss.host_data, dx.gradient.host_data)
def test_cnn(self): class cnn_mnist_model(nn.Module): def __init__(self): super(cnn_mnist_model, self).__init__() self.conv1 = nn.Conv2d(1, 32, 3, padding=1) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(32, 48, 3) self.fc1 = nn.Linear(48 * 12 * 12, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) self.relu1 = nn.ReLU() self.relu2 = nn.ReLU() self.relu3 = nn.ReLU() self.relu4 = nn.ReLU() def forward(self, X): X = self.conv1(X) X = self.relu1(X) X = self.pool(X) # 32 x 14 x 14 X = self.conv2(X) # 46 x 12 x 12 X = self.relu2(X) X.flatten() X = self.fc1(X) X = self.relu3(X) X = self.fc2(X) X = self.relu4(X) X = self.fc3(X) return X x, y, tx, ty = load_mnist() x = x.reshape((-1, BATCHSIZE, 1, 28, 28)) y = y.reshape((-1, BATCHSIZE, 1)) tx = tx.reshape((-1, 1, 1, 28, 28)) ty = ty.reshape((-1, 1, 1)) model = cnn_mnist_model() self.assertIsInstance(model, nn.Module) t_x = madml.tensor(x / 225.) t_y = madml.tensor(y).onehot(label_count=10) # loss_fn = nn.MSELoss() loss_fn = nn.CrossEntropyLoss(with_logit=True) optim = optimizer.Adam(model.parameters(), lr=1e-3) train_loop(model, loss_fn, optim, t_x, t_y) test_x = madml.tensor(tx / 255.) test_y = madml.tensor(ty) acc = test_loop(model, test_x, test_y) print(sum(acc) / len(acc))
def test_crossentropy(self): import madml import madml.nn as nn x = np.random.rand(3, 5).astype(np.float32) labels = np.random.randint(0, high=5, size=(3,)) t1 = madml.tensor(x) target = madml.tensor(labels) module = nn.crossentropyloss() loss = module.forward(t1, target) dx = module.backward() print(loss.host_data, dx.host_data)
def __init__(self, params: List[Parameter], lr: float = 1e-2, lr_decay: float = 0., weight_decay: float = 0, initial_accumulator_value: int = 0, eps: float = 1e-10) -> None: if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= lr_decay: raise ValueError("Invalid lr_decay value: {}".format(lr_decay)) if not 0.0 <= weight_decay: raise ValueError( "Invalid weight_decay value: {}".format(weight_decay)) if not 0.0 <= initial_accumulator_value: raise ValueError( "Invalid initial_accumulator_value value: {}".format( initial_accumulator_value)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) defaults = dict(lr=lr, lr_decay=lr_decay, eps=eps, weight_decay=weight_decay, initial_accumulator_value=initial_accumulator_value) super(Adagrad, self).__init__(params, defaults) for p in self.params: p.optimizer_stuff = [ tensor([0.0 for _ in range(p.param.size)], p.param.shape, requires_grad=False) ]
def __init__(self, params: List[Parameter], lr: float = 1e-3, momentum: float = 0.0, dampening: int = 0, weight_decay: float = 0, nesterov: bool = False) -> None: if lr < 0.0: raise ValueError("Invalid learning rate: {}".format(lr)) if momentum < 0.0: raise ValueError("Invalid momentum value: {}".format(momentum)) if weight_decay < 0.0: raise ValueError( "Invalid weight_decay value: {}".format(weight_decay)) if nesterov and (momentum <= 0 or dampening != 0): raise ValueError( "Nesterov momentum requires a momentum and zero dampening") defaults = dict(lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay, nesterov=nesterov) super(SGD, self).__init__(params, defaults) if momentum > 0.0: for p in self.params: p.optimizer_stuff = [ tensor([0.0 for _ in range(p.param.size)], p.param.shape, requires_grad=nesterov) ]
def test_convolution(): kernel_shape = [3, 3] stride = [1, 1] padding = [1, 1] dilation = [1, 1] x = np.array([[[[0., 1., 2., 3., 4.], [5., 6., 7., 8., 9.], [10., 11., 12., 13., 14.], [15., 16., 17., 18., 19.], [20., 21., 22., 23., 24.]]]]).astype(np.float32) y_with_padding = np.array([[[12., 21., 27., 33., 24.], [33., 54., 63., 72., 51.], [63., 99., 108., 117., 81.], [93., 144., 153., 162., 111.], [72., 111., 117., 123., 84.]]]).astype(np.float32).reshape([1, 1, 5, 5]) t1 = madml.tensor(x) module = nn.conv2d(1, 1, kernel_shape, stride, padding, dilation, weight_init='ones') t2 = module.forward(t1) y = t2.host_data module.to(0) t3 = module.forward(t1) y_hat = t3.download() print(y_hat == y) input()
def forward_cpu(self, x: tensor) -> tensor: y = zeros_like(x) self.mask = tensor(np.random.rand(*x.shape), x.shape) self.mask.host_data = self.mask.host_data < self.prob tmp = x.host_data / (1 - self.prob) tmp[self.mask.host_data] = 0 self.cache = [x, y] return y
def identity_train_loop(model_class=identity_model): model = model_class() x = np.ones((2, 32)) t_x = madml.tensor(x) t_y = madml.tensor(x.copy()) loss_fn = nn.MSELoss() optim = optimizer.Adam(model.parameters(), lr=1e-2) logit = None for _ in range(100): optim.zero_grad() logit = model(t_x) loss = loss_fn(logit, t_y) loss.backward() optim.step() print(logit.shape, loss.host_data, loss_fn.accuracy()) print(logit.host_data)
def test_relu(self): x = np.random.uniform(-2, 2, size=81).reshape([9, 9]) t1 = madml.tensor(x) module = nn.ReLU() logit = module.forward_cpu(t1) logit.gradient.host_data = x y = logit.host_data dx = module.backward_cpu().gradient.host_data self.assertTrue((np.sum(y) == np.sum(dx)).all())
def test_dnn(self): import madml import madml.nn as nn import madml.optimizer as optimizer class dnn_mnist_model(nn.Module): def __init__(self): super(dnn_mnist_model, self).__init__() self.fc1 = nn.linear(8 * 8, 256) self.fc2 = nn.linear(256, 10) self.relu1 = nn.relu() self.relu2 = nn.relu() def forward(self, X): X = self.fc1(X) X = self.relu1(X) X = self.fc2(X) X = self.relu2(X) return X BATCHSIZE = 599 x, y, = load_digits(return_X_y=True) tx, ty = x[:-100], y[:-100] x = x.reshape((-1, BATCHSIZE, 8 * 8)) y = y.reshape((-1, BATCHSIZE, 1)) tx = tx.reshape((-1, 1, 8 * 8)) ty = ty.reshape((-1, 1, 1)) model = dnn_mnist_model() self.assertIsInstance(model, nn.Module) t_x = madml.tensor(x / 1.) t_y = madml.tensor(y).onehot(label_count=10) # loss_fn = nn.MSELoss() loss_fn = nn.crossentropyloss(with_logit=True) optim = optimizer.adam(model.parameters(), lr=1e-3) train_loop(model, loss_fn, optim, t_x, t_y, epochs=30) test_x = madml.tensor(tx / 1.) test_y = madml.tensor(ty) acc = test_loop(model, test_x, test_y, early_stop=10) print(sum(acc) / len(acc)) self.assertTrue(True)
def test_tensor(self): import madml x = np.array([[[[0., 1., 2., 3., 4.], # (1, 1, 5, 5) input tensor [5., 6., 7., 8., 9.], [10., 11., 12., 13., 14.], [15., 16., 17., 18., 19.], [20., 21., 22., 23., 24.]]]]).astype(np.float32) t1 = madml.tensor(x) self.assertTrue(t1.shape == list(x.shape)) self.assertTrue((t1.host_data == x).all())
def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None: super(_Loss, self).__init__() if size_average is not None or reduce is not None: self.reduction = None # _Reduction.legacy_get_string(size_average, reduce) else: self.reduction = reduction self.y = tensor([0], [1]) self.losses = []
def test_conv(self): kernel_shape = [3, 3] stride = [1, 1] padding = [1, 1] dilation = [1, 1] x = np.array([[[[0., 1., 2., 3., 4.], [5., 6., 7., 8., 9.], [10., 11., 12., 13., 14.], [15., 16., 17., 18., 19.], [20., 21., 22., 23., 24.]]]]).astype(np.float32) y_with_padding = np.array([[[12., 21., 27., 33., 24.], [33., 54., 63., 72., 51.], [63., 99., 108., 117., 81.], [93., 144., 153., 162., 111.], [72., 111., 117., 123., 84.]] ]).astype(np.float32).reshape([1, 1, 5, 5]) t1 = madml.tensor(x) module = nn.Conv2d(1, 1, kernel_shape, stride, padding, dilation, weight_init='ones') t2 = module.forward_cpu(t1) y = t2.host_data self.assertTrue((y == y_with_padding).all()) padding = [0, 0] y_without_padding = np.array([[[[54., 63., 72.], [99., 108., 117.], [144., 153., 162.]]]]).astype( np.float32).reshape([1, 1, 3, 3]) module2 = nn.Conv2d(1, 1, kernel_shape, stride, padding, dilation, weight_init='ones') t3 = module2.forward_cpu(t1) y2 = t3.host_data self.assertTrue((y2 == y_without_padding).all()) dy = np.array([[[[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]]]]).astype(np.float32).reshape([1, 1, 3, 3]) dx = np.array([[[[0., 1., 3., 3., 2.], [3., 8., 15., 12., 7.], [9., 21., 36., 27., 15.], [9., 20., 33., 24., 13.], [6., 13., 21., 15., 8.]]]]).reshape([1, 1, 5, 5]) t3.gradient.host_data = dy _ = module2.backward_cpu() y3 = t1.gradient.host_data self.assertTrue((y3 == dx).all())
def test_relu(self): import madml import madml.nn as nn x = np.random.uniform(-2, 2, size=81).reshape([9, 9]) t1 = madml.tensor(x) module = nn.relu() logit = module.forward(t1) y = logit.host_data logit.gradient.host_data = x dlogit = module.backward() dx = dlogit.host_data self.assertTrue((np.sum(y) == np.sum(dx)).all())
def test_relu(): x = np.random.uniform(-2, 2, size=81).reshape([9, 9]) t1 = madml.tensor(x) module = nn.relu() t3 = module._forward_gpu(t1) y_hat = t3.download() print(y_hat) print() t2 = module._forward_cpu(t1) y = t2.host_data print(y) input()
def __init__(self, params: List[Parameter], lr: float = 1e-3, betas: List[float] = (0.9, 0.999), eps: float = 1e-8, weight_decay: float = 0.0, amsgrad: bool = False) -> None: if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= betas[0] < 1.0: raise ValueError("Invalid beta parameter at index 0: {}".format( betas[0])) if not 0.0 <= betas[1] < 1.0: raise ValueError("Invalid beta parameter at index 1: {}".format( betas[1])) if not 0.0 <= weight_decay: raise ValueError( "Invalid weight_decay value: {}".format(weight_decay)) defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad) self.counter = 1 super(Adam, self).__init__(params, defaults) for p in self.params: p.optimizer_stuff = [ tensor([0.0 for _ in range(p.param.size)], p.param.shape, requires_grad=True), tensor([0.0 for _ in range(p.param.size)], p.param.shape, requires_grad=True) ]
def test_linear(): a = np.random.ranf([3, 5]).astype(np.float32) t1 = madml.tensor(a) module = nn.linear(5, 5, use_gpu=True) t2 = module._forward_cpu(t1) y = t2.host_data t3 = module._forward_gpu(t1) y_hat = t3.download() t1.gradient.host_data = a print(y_hat == y) input()
def test_maxpool(): kernel_shape = [2, 2] stride = [1, 1] padding = [0, 0] dilation = [1, 1] x = np.arange(0, 100).astype(np.float32).reshape([2, 2, 5, 5]) t1 = madml.tensor(x) print(t1, '\n----------------------\n') module = nn.maxpool2d(kernel_shape, stride, padding, dilation) t3 = module(t1) y_hat = t3.host_data print(y_hat, '\n\n')#, dx_hat, '\n\n') print('---------------------') input()
def test_maxpool(self): kernel_shape = [2, 2] stride = [1, 1] padding = [0, 0] dilation = [1, 1] x = np.arange(0, 100).astype(np.float32).reshape([2, 2, 5, 5]) t1 = madml.tensor(x) module = nn.MaxPool2d(kernel_shape, stride, padding, dilation) t2 = module.forward_cpu(t1) y = t2.host_data test = x[..., 1:, 1:] self.assertTrue((test == y).all()) t2.gradient.host_data = y _x = module.backward_cpu() dx = t1.gradient.host_data[..., 1:, 1:] self.assertTrue(True)
def forward(self, x: tensor) -> tensor: if self.vol_col is None: self.batch_size = x.shape[0] self.in_channels = x.shape[1] self._col = [1 for _ in range(MAX_DIMS)] self._vol = [1 for _ in range(MAX_DIMS)] for i in range(1, self.dims + 1): self._col[-i] = int( (x.shape[-i] + 2 * self.padding[-i] - self.dilation[-i] * (self.kernel_size[-i] - 1) - 1) // self.stride[-i]) + 1 self._vol[-i] = x.shape[-i] self.channel_offset *= self.kernel_size[i] self.output_shape = [ self._col[i] for i in range(-1, -(self.dims + 1), -1) ] self.y = self.register_output_shape( [self.batch_size, self.in_channels, *self.output_shape]) out_size = np.prod(self.output_shape) max_idx_size = self.in_channels * self.batch_size * out_size self.max_idx = tensor( [0 for _ in range(max_idx_size)], [self.in_channels * self.batch_size, out_size], dtype=int) self.pool_kernel_y = self.register_kernel(vknn.max_reduce, False) self.pool_kernel_dcol = self.register_kernel(vknn.max_reduce, True) self.vol_col = self.register_module(vol2col, self.batch_size, self.in_channels, self._vol, self._col, self.kernel_size, self.stride, self.padding, self.dilation) self.col = self.vol_col.forward(x) self.col.reshape( [self.in_channels * self.batch_size, self.channel_offset, -1]) self.y.reshape([self.in_channels * self.batch_size, -1]) super(_MaxPoolNd, self).forward(x) self.y.reshape([self.batch_size, self.in_channels, *self.output_shape]) return self.y
def test_linear(self): import madml import madml.nn as nn a = np.random.ranf([3, 5]).astype(np.float32) t1 = madml.tensor(a) self.assertTrue((t1.host_data == a).all()) module = nn.linear(5, 5) t2 = module.forward(t1) y = t2.host_data module.to(0) t3 = module.forward(t1) y_hat = t3.download() self.assertTrue((y == y_hat).all()) t2.gradient.host_data = a t2.gradient.upload() dx = module.backward() dx_hat = dx.download() print(dx_hat) self.assertTrue((dx_hat != 0.0).all())
def __init__(self, params: List[Parameter], lr: float = 1e-2, alpha: float = 0.99, eps: float = 1e-8, weight_decay: float = 0, momentum: int = 0, centered: bool = False) -> None: if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= momentum: raise ValueError("Invalid momentum value: {}".format(momentum)) if not 0.0 <= weight_decay: raise ValueError( "Invalid weight_decay value: {}".format(weight_decay)) if not 0.0 <= alpha: raise ValueError("Invalid alpha value: {}".format(alpha)) defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, centered=centered, weight_decay=weight_decay) super(RMSprop, self).__init__(params, defaults) for x, p in self.params.items(): p.optimizer_stuff = [ tensor([0.0 for _ in range(p.param.size)], p.param.shape, requires_grad=False) ]
def test_spiral(self): import madml import madml.nn as nn import madml.optimizer as optimizer from numpy import pi import matplotlib.pyplot as plt N = 400 theta = np.sqrt(np.random.rand(N)) * 2 * pi # np.linspace(0,2*pi,100) r_a = 2 * theta + pi data_a = np.array([np.cos(theta) * r_a, np.sin(theta) * r_a]).T x_a = data_a + np.random.randn(N, 2) res_a = np.append(x_a, np.zeros((N, 1)), axis=1) r_b = -2 * theta - pi data_b = np.array([np.cos(theta) * r_b, np.sin(theta) * r_b]).T x_b = data_b + np.random.randn(N, 2) res_b = np.append(x_b, np.ones((N, 1)), axis=1) res = np.append(res_a, res_b, axis=0) np.random.shuffle(res) ax1 = plt.subplot(121) ax1.margins(0.05) ax1.scatter(x_a[:, 0], x_a[:, 1]) ax1.scatter(x_b[:, 0], x_b[:, 1]) class spiral_model(nn.Module): def __init__(self): super(spiral_model, self).__init__() self.fc1 = nn.linear(2, 16) self.fc2 = nn.linear(16, 16) self.fc3 = nn.linear(16, 2) self.tanh1 = nn.tanh() self.tanh2 = nn.tanh() self.sig = nn.relu() def forward(self, X): X = self.fc1(X) X = self.tanh1(X) X = self.fc2(X) X = self.tanh2(X) X = self.fc3(X) X = self.sig(X) return X model = spiral_model() self.assertIsInstance(model, nn.Module) x = res[..., :-1] y = res[..., 2] t_x = madml.tensor(x) t_y = madml.tensor(y) t_y = t_y.onehot(2) t_y.reshape([800, 2]) loss_fn = nn.crossentropyloss(with_logit=True) # loss_fn = nn.MSELoss() optim = optimizer.adam(model.parameters(), lr=1e-2) logits = None for i in range(100): optim.zero_grad() logit = model(t_x) logits = logit.host_data loss = loss_fn(logit, t_y) loss.backward() optim.step() print('===', i, logit.shape, loss.host_data) logits = np.argmax(logits, axis=-1) result = res[:, :-1] ax2 = plt.subplot(122) ax2.scatter(result[logits == 0.][:, 0], result[logits == 0.][:, 1]) ax2.scatter(result[logits == 1.][:, 0], result[logits == 1.][:, 1]) plt.savefig('input_output.png') acc = (logits - y).mean() print(1. - acc) self.assertTrue(1.0 - acc > 0.9)
def forward(self, x: tensor) -> tensor: self.y = self.register_output_shape(x.shape) self.mask = tensor(np.random.rand(*x.shape), x.shape) super(dropout, self).forward(x) return self.y