def __init__(self, n_input, n_hidden, n_output): # stateful (shared) variables self.l1 = L.Linear(n_input, n_hidden) self.l2 = L.Linear(n_hidden, n_hidden) self.l3 = L.Linear(n_hidden, n_hidden) self.fc = L.Linear(n_hidden, n_output) # self.optimizer = O.SGD2(list(self.get_params()), lr=1e-3, momentum=0.95, nestrov=True) self.optimizer = O.Adam(lr=1e-3) # computation graphs x = tt.matrix(name="x") t = tt.lvector(name="t") act = tt.nnet.relu h = act(self.l1(x)) h = act(self.l2(h)) h = act(self.l3(h)) y = tt.nnet.softmax(self.fc(h)) loss = tt.sum(tt.nnet.categorical_crossentropy(y, t)) acc = tt.sum(tt.eq(tt.argmax(y, axis=1), t)) # functions self.train = theano.function([x, t], [loss, acc], updates=self.optimizer.updates( self.get_params(), loss)) self.test = theano.function([x], y)
def __init__(self, n_output): # stateful (shared) variables self.conv1 = L.Conv2D(1, 10, kernel=(5, 5)) self.conv2 = L.Conv2D(10, 20, kernel=(5, 5)) self.dropout = L.Dropout() self.fc1 = L.Linear(320, 50) self.fc2 = L.Linear(50, n_output) self.optimizer = O.Adam(lr=1e-3) # computation graphs x = tt.tensor4(name="x") t = tt.lvector(name="t") act = tt.nnet.relu h = self.conv1(x) h = pool_2d(h, (2, 2), mode="max", ignore_border=True) h = act(h) h = self.conv2(h) h = self.dropout(h) h = pool_2d(h, (2, 2), mode="max", ignore_border=True) h = act(h) h = h.reshape([x.shape[0], -1]) h = act(self.fc1(h)) h = self.dropout(h) h = self.fc2(h) y = tt.nnet.softmax(h) loss = tt.sum(tt.nnet.categorical_crossentropy(y, t)) acc = tt.sum(tt.eq(tt.argmax(y, axis=1), t)) # functions self.train = theano.function([x, t], [loss, acc], updates=self.optimize( loss, self.optimizer), givens={self.is_train: numpy.int8(True)}) self.test = theano.function([x, t], [loss, acc], givens={self.is_train: numpy.int8(False)})
def __init__(self, lr=1e-3, activation=tt.nnet.sigmoid, n_layers=3, weight_init=I.XavierUniform(), n_rand=128, n_g_hidden=512, n_d_hidden=32, n_input=2, n_d_output=1): self.n_rand = n_rand self.rng = tt.shared_randomstreams.RandomStreams() self.g = L.MLP(n_rand, n_g_hidden, n_input, n_layers=n_layers, activation=activation, weight_init=weight_init) self.d = L.MLP(n_input, n_d_hidden, n_d_output, n_layers=n_layers, activation=activation, weight_init=weight_init) n_sample = tt.lscalar("n_sample") self.generate = theano.function([n_sample], self.g( self.rng.normal( (n_sample, n_rand)))) xs = tt.matrix("xs") d_loss = self.d_loss(xs) d_optimizer = O.Adam(lr) self.train_d = theano.function([xs], d_loss, updates=self.d.optimize( d_loss, d_optimizer)) g_loss = self.g_loss(n_sample) g_optimizer = O.Adam(lr) self.train_g = theano.function([n_sample], g_loss, updates=self.g.optimize( g_loss, g_optimizer))
ys, self.h1, self.h2 = self.predict_fun(xs, self.h1, self.h2) return ys n_step = 0.01 n_batch = 4 n_seq = 50 times = numpy.arange(n_seq * n_batch, dtype=theano.config.floatX).reshape(n_batch, n_seq, 1).transpose(1, 0, 2) nxs = numpy.sin(times / n_step) for o in [ O.SGD(lr=1e-1, momentum=0.95, nestrov=True), O.Adadelta(lr=1.0), O.Adam(lr=1e-4) ]: log(o) model = RNN(o) initial_predict = model.predict(nxs[:-1]) initial_loss = None start = time() for i in range(100): x_data = nxs[:-1] t_data = nxs[1:] l = model.train(x_data, t_data) if initial_loss is None: initial_loss = l if i % 10 == 0: log("loss", l)