def main(): model = MLP(1, 4, 1) init_random_params(model) optimizer = chainer0.optimizers.SGD( lr=0.0001) # chainer0.optimizers.Adam() optimizer.setup(model) x, t = build_toy_dataset() x, t = Variable(x), Variable(t) max_iter = 1000 weight_prior_variance = 10.0 # Set up figure. fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.show(block=False) for i in range(max_iter): model.cleargrads() loss = logprob(model, x, t) loss += log_gaussian(model, weight_prior_variance) loss.backward() optimizer.update() if i % 100 == 0: plot_nn(model, ax, x.data, t.data) print("Iteratoin {} log likelihood {}".format(i, loss.data))
def test_train(self): np.random.seed(0) ws = list() ws.append(Variable(np.random.rand(2, 3))) ws.append(Variable(np.random.rand(3, 1))) data = Variable(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) target = Variable(np.array([[0], [1], [0], [1]])) expected = [ 6.210704901174886, 1.4434549528818301, 0.809644434846779, 0.7585458232291216, 0.7437140298400363, 0.7316218334889659, 0.7198939685708763, 0.708293823629362, 0.6967362454336858, 0.6851547179015602 ] log = [] for i in range(10): t = matmul(data, ws[0]) t = sigmoid2(t) pred = matmul(t, ws[1]) diff = (pred - target) loss = sum(diff * diff) loss.backward() for w in ws: w.data -= w.grad * 0.1 w.cleargrad() #print(loss.data) log.append(loss.data) self.assertTrue(np.allclose(np.array(log), np.array(expected)))
def __init__(self, loc, scale): loc = Variable(loc) if not isinstance(loc, Variable) else loc scale = Variable(scale) if not isinstance(scale, Variable) else scale self.loc = loc self.scale = scale self.log_scale = F.log(scale)
def __init__(self, loc, scale_tril): loc = Variable(loc) if not isinstance(loc, Variable) else loc scale_tril = Variable(scale_tril) if not \ isinstance(scale_tril, Variable) else scale_tril self.loc = loc self.scale = scale_tril
def test_forward(self): x = Variable(np.array([[-1, 0, 1, 2], [2, 0, 1, -1]])) t = Variable(np.array([3, 0])) y = F.softmax_cross_entropy(x, t) expected = 0.440189 self.assertTrue(abs(y.data - expected) < 1e-6)
def test_backward(self): x = Variable(np.array([[-1, 0, 1, 2], [2, 0, 1, -1]])) t = Variable(np.array([3, 0])) y = F.softmax_cross_entropy(x, t) y.backward() #print(x.grad) expected = np.array([[0.0160293, 0.04357216, 0.11844141, -0.17804287], [-0.17804287, 0.04357216, 0.11844141, 0.0160293]]) #print(x.grad - expected) self.assertTrue(np.allclose(x.grad, expected))
def __init__(self, in_size, out_size=None, nobias=False): super().__init__() self.nobias = nobias if out_size is None: in_size, out_size = out_size, in_size self.out_size = out_size with self.init_scope(): self.W = Variable(None) if in_size is not None: self._initialize_params(in_size) self.b = None if nobias else Variable(np.zeros(out_size))
def test_train(self): def _d(*args): s = args[0] if len(args) == 1 else args[0] * args[1] return np.arange(0, s).reshape(args) * 0.01 # Network definition class MLP(chainer0.Chain): def __init__(self, n_in, n_units, n_out): super().__init__() with self.init_scope(): self.l1 = L.Linear(n_in, n_units) self.l2 = L.Linear(n_units, n_units) self.l3 = L.Linear(n_units, n_out) def forward(self, x): x = F.tanh(self.l1(x)) x = F.tanh(self.l2(x)) return self.l3(x) def logprob(model, x, t, noise_scale=0.1): noise_scale = np.array(noise_scale) pred = model(x) logp = D.Normal(t, noise_scale).log_prob(pred) return -1 * F.sum(logp) def build_toy_dataset(n_data=80, noise_std=0.1): rs = np.random.RandomState(0) inputs = np.concatenate([ np.linspace(0, 3, num=int(n_data / 2)), np.linspace(6, 8, num=int(n_data / 2)) ]) targets = np.cos(inputs) + rs.randn(n_data) * noise_std inputs = (inputs - 4.0) / 2.0 inputs = inputs[:, np.newaxis] targets = targets[:, np.newaxis] / 2.0 return inputs, targets def init_random_params(model, init_scale=0.1): for param in model.params(): param.data = _d(*param.data.shape) model = MLP(1, 4, 1) init_random_params(model) x, t = build_toy_dataset() x, t = Variable(x), Variable(t) loss = logprob(model, x, t) expected = 399.9346419766375 self.assertEqual(float(loss.data), float(expected))
def test_forward(self): x = Variable(np.array([[1, 2, 3], [2, 3, 4]])) W = Variable(np.eye(5)) y = embed_id(x, W) expected = np.array( [[[0, 1, 0, 0, 0 ], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0]], [[0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]] ) self.assertTrue(np.alltrue(y.data == expected))
def test_backward(self): x = Variable(np.array([[1, 2, 3], [2, 3, 4]])) W = Variable(np.eye(5)) y = embed_id(x, W) y.backward() expected = np.array( [[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]) self.assertTrue(np.alltrue(W.grad == expected))
def test_forward(self): x_data = np.array([-1, -2, 3, 4]) x = Variable(x_data) y = F.relu(x) x_data[x_data < 0] = 0 expected = x_data self.assertTrue(np.allclose(y.data, expected))
def test_forward(self): x_data = np.random.rand(10) x = Variable(x_data) y = F.max(x) expected = np.max(x_data) self.assertTrue(y.data == expected)
def forward(self, x): if self.h is None: batch_size = x.data.shape[0] self.h = Variable(np.zeros((batch_size, self.hidden_size))) a = self.l1(x) + self.l2(self.h) self.h = F.tanh(a) return self.h
def __init__(self, in_size, out_size, initialW=None, ignore_label=None): super().__init__() self.ignore_label = ignore_label if initialW is None: initialW = np.random.rand(in_size, out_size) - 0.5 / out_size with self.init_scope(): self.W = Variable(initialW)
def test_train(self): x = Variable(np.array([1, 2, 1, 2])) target = Variable(np.array([[0], [1], [0], [1]])) model = Chain( embed=EmbedID(5, 3), linear=Linear(3, 1), ) def forward(x): x = model.embed(x) x = tanh(x) x = model.linear(x) x = sigmoid(x) return x optimizer = SGD(lr=0.5) optimizer.setup(model) np.random.seed(0) model.embed.W.data = np.random.rand(5, 3) model.linear.W.data = np.random.rand(3, 1) log = [] for i in range(10): pred = forward(x) loss = mean_squared_error(pred, target) model.cleargrads() loss.backward() optimizer.update() log.append(loss.data) expected = np.array([ 0.25458621375800417, 0.24710456626288174, 0.24017425722643587, 0.23364699169761943, 0.22736806682064464, 0.2211879225084124, 0.2149697611450082, 0.20859448689275056, 0.2019642998089552, 0.195005940360243 ]) res = np.allclose(np.array(log), expected) self.assertTrue(res)
def plot_nn(model, ax, inputs, targets): # Plot data and functions. plt.cla() ax.plot(inputs.ravel(), targets.ravel(), 'bx', ms=12) plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300, 1)) outputs = model(Variable(plot_inputs)) ax.plot(plot_inputs, outputs.data, 'r', lw=3) ax.set_ylim([-1, 1]) plt.draw() plt.pause(1.0 / 60.0)
def backward(self, dout): N = self.y.shape[0] dx = self.y.copy() dx[np.arange(N), self.t] -= 1 dx = Variable(dx) dx *= dout dx = dx / N return dx, None
def test_train(self): data = Variable(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) target = Variable(np.array([[0], [1], [0], [1]])) model = Chain( f1=Linear(2, 3, nobias=True), f2=Linear(3, 1, nobias=True), ) np.random.seed(0) model.f1.W.data = np.random.rand(2, 3) model.f2.W.data = np.random.rand(3, 1) expected = [ 1.0820313915580297, 0.6937790311924391, 0.4807580742799925, 0.36192715863503955, 0.29491127763318664, 0.25681463756748657, 0.23500603629345754, 0.22242443947250173, 0.21508930205325136, 0.21074454544426313 ] log = [] def forward(x): h = sigmoid(model.f1(x)) return model.f2(h) optimizer = SGD(lr=0.1) optimizer.setup(model) for i in range(10): y = forward(data) loss = mean_squared_error(y, target) model.cleargrads() loss.backward() optimizer.update() #print(loss.data) log.append(loss.data) self.assertTrue(np.allclose(np.array(log), np.array(expected)))
def main(): model = L.Classifier(MLP(100, 10)) optimizer = chainer0.optimizers.SGD() # chainer0.optimizers.Adam() optimizer.setup(model) (x_train, t_train), (x_test, t_test) = mnist.load_data() """ if args.resume: # Resume from a snapshot serializers.load_npz('{}/mlp.model'.format(args.resume), model) serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer) """ max_epoch = 10 batch_size = 256 batch_num = x_train.shape[0] max_iter = batch_num // batch_size L2_reg = 0.001 print(" Epoch | Train accuracy | Test accuracy ") for e in range(max_epoch): for i in range(max_iter): model.cleargrads() x = x_train[(i * batch_size):((i + 1) * batch_size)] t = t_train[(i * batch_size):((i + 1) * batch_size)] x, t = Variable(x), Variable(np.array(t, "i")) loss = model(x, t) loss += L2_reg * l2_norm(model.params()) loss.backward() optimizer.update() model(x_train, t_train) acc_train = model.accuracy model(x_test, t_test) acc_test = model.accuracy print("{:15}|{:20}|{:20}".format(e, acc_train.data, acc_test.data))
def test_grad(self): def taylor_sine(x): # Taylor approximation to sine function ans = currterm = x i = 0 while abs(currterm).data > 0.001: currterm = -currterm * x**2 / ((2 * i + 3) * (2 * i + 2)) ans = ans + currterm i += 1 return ans x = Variable(np.array([np.pi])) y = taylor_sine(x) y.backward() #print('Gradient of sin(pi) is', x.grad[0]) expected = -0.9998995297042175 self.assertEqual(x.grad[0], expected)
def test_forward(self): y = Normal(np.array(0.0), np.array(1.0)).prob(Variable(np.array(0))) expected = 0.3989422804014327 self.assertEqual(y.data, expected)
import numpy as np from chainer0 import Function, Variable from chainer0.functions.basic_math import Add, Mul x = Variable(np.array([2.0])) y = x**2 + x + 1.0 y.backward(enable_double_backprop=True) dx = x.grad_var print('y', y.data) print('dx', x.grad) assert y.data == 7. assert x.grad == 5. x.cleargrad() dx.backward() print('ddx', x.grad) assert x.grad == 2. dx = x.grad_var x.cleargrad() dx.backward() print('dddx', x.grad) assert x.grad == 0.
import numpy as np import matplotlib.pyplot as plt import chainer0 from chainer0 import Function, Variable import chainer0.functions as F x_data = np.linspace(-7, 7, 200) x = Variable(x_data) y = F.tanh(x) y.backward(enable_double_backprop=True) vals = [y.data.flatten()] for i in range(4): vals.append(x.grad.flatten()) dx = x.grad_var x.cleargrad() dx.backward(enable_double_backprop=True) ''' for i in range(4): vals.append(x.grad.flatten()) dx = x.grad_var chainer0.grad(dx) ''' for i, v in enumerate(vals): plt.plot(x_data, vals[i]) plt.show()
def test_forward2(self): x = Variable(np.random.rand(10, 20, 30)) y = F.sum(x, axis=1) expected = np.sum(x.data, axis=1) self.assertTrue(np.allclose(y.data, expected))
def test_forward(self): x = Variable(np.random.rand(10)) y = F.sum(x) expected = np.sum(x.data) self.assertTrue(np.allclose(y.data, expected))
def grad_descent_sqrt_iter(a): return lambda x: x - 0.05 * (x**2 - a) def sqrt(a, guess=10.): return fixed_point(newton_sqrt_iter, a, guess, distance, 1e-4) #return fixed_point(grad_descent_sqrt_iter, a, guess, distance, 1e-4) def distance(x, y): x_data = x.data if isinstance(x, Variable) else x y_data = y.data if isinstance(y, Variable) else y return np.abs(x_data - y_data) x = Variable(np.array(2.)) y = F.sqrt(x) gy, = chainer0.grad([y], [x]) ggy, = chainer0.grad([gy], [x]) x2 = Variable(np.array(2.)) y2 = sqrt(x2) gy2, = chainer0.grad([y2], [x2]) ggy2, = chainer0.grad([gy2], [x2]) print(y) print(y2) print() print(gy) print(gy2)
def logistic_predictions(weights, inputs): score = matmul(inputs, weights) return sigmoid(score) def training_loss(weights): # Training loss is the negative log-likelihood of the training labels. preds = logistic_predictions(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -sum(log(label_probabilities)) # Build a toy dataset. inputs = Variable(np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]])) #targets = Variable(np.array([[True], [True], [False], [True]])) #weights = Variable(np.array([[0.0], [0.0], [0.0]])) targets = Variable(np.array([True, True, False, True])) weights = Variable(np.array([0.0, 0.0, 0.0])) # Define a function that returns gradients of training loss using Autograd. print("Initial loss:", training_loss(weights).data) assert training_loss(weights).data == 2.772588722239781 for i in range(100):
import numpy as np import heapq import matplotlib.pyplot as plt import chainer0 from chainer0 import Function, Variable import chainer0.functions as F from chainer0.computational_graph import get_dot_graph x = Variable(np.array([1.0]), name='x') #y = F.sin(x) #y = (y + F.exp(x) - 0.5) * y #y.backward() y = F.tanh(x) y.backward() for i in range(3): gx = x.grad_var x.cleargrad() gx.backward() txt = get_dot_graph(gx) print(txt)
import numpy as np import matplotlib.pyplot as plt import chainer0 from chainer0 import Function, Variable import chainer0.functions as F x_data = np.linspace(-7, 7, 200) x = Variable(x_data) y = F.tanh(x) gx, = chainer0.grad([y], [x], enable_double_backprop=True) vals = [y.data.flatten()] for i in range(4): vals.append(gx.data.flatten()) gx, = chainer0.grad([gx], [x], enable_double_backprop=True) for i, v in enumerate(vals): plt.plot(x_data, vals[i]) plt.show()
def test_forward2(self): y = Normal(np.array(0.0), np.array(1.0)).prob(Variable(np.array(0.5))) expected = 0.3520653267642995 self.assertEqual(y.data, expected)