def _gpu_matrix_dot(matrix_a, matrix_b, matrix_c=None): """ Performs matrix multiplication. Attempts to use the GPU if it's available. If the matrix multiplication is too big to fit on the GPU, this falls back to the CPU after throwing a warning. Parameters ---------- matrix_a : WRITEME matrix_b : WRITEME matrix_c : WRITEME """ if not hasattr(ZCA._gpu_matrix_dot, 'theano_func'): ma, mb = T.matrices('A', 'B') mc = T.dot(ma, mb) ZCA._gpu_matrix_dot.theano_func = \ theano.function([ma, mb], mc, allow_input_downcast=True) theano_func = ZCA._gpu_matrix_dot.theano_func try: if matrix_c is None: return theano_func(matrix_a, matrix_b) else: matrix_c[...] = theano_func(matrix_a, matrix_b) return matrix_c except MemoryError: warnings.warn('Matrix multiplication too big to fit on GPU. ' 'Re-doing with CPU. Consider using ' 'THEANO_FLAGS="device=cpu" for your next ' 'preprocessor run') return np.dot(matrix_a, matrix_b, matrix_c)
def __init__(self, gen_params, # dictionary of generative model parameters GEN_MODEL, # class that inherits from GenerativeModel rec_params, # dictionary of approximate posterior ("recognition model") parameters REC_MODEL, # class that inherits from RecognitionModel xDim=2, # dimensionality of latent state yDim=2 # dimensionality of observations ): # instantiate rng's self.srng = RandomStreams(seed=234) self.nrng = np.random.RandomState(124) #--------------------------------------------------------- ## actual model parameters self.X, self.Y = T.matrices('X','Y') # symbolic variables for the data self.xDim = xDim self.yDim = yDim # instantiate our prior & recognition models self.mrec = REC_MODEL(rec_params, self.Y, self.xDim, self.yDim, self.srng, self.nrng) self.mprior = GEN_MODEL(gen_params, self.xDim, self.yDim, srng=self.srng, nrng = self.nrng) self.isTrainingRecognitionModel = True; self.isTrainingGenerativeModel = True;
def sample_parallel(): print "並列" x, y = T.matrices("a", "b") diff = x - y abs_diff = abs(diff) diff_sq = diff**2 # 2つの行列を入力, 3つの行列のベクトルを出力 f = theano.function([x, y], [diff, abs_diff, diff_sq]) print f([[0,1],[2,3]], [[10,11],[12,13]]) print
def test_grad(self, cls_ofg): x, y, z = T.matrices('xyz') e = x + y * z op = cls_ofg([x, y, z], [e]) f = op(x, y, z) f = f - T.grad(T.sum(f), y) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 assert np.all(11.0 == fn(xv, yv, zv))
def test_grad(self): x, y, z = T.matrices('xyz') e = x + y * z op = OpFromGraph([x, y, z], [e], mode='FAST_RUN', grad_depth=2) f = op(x, y, z) f = f - T.grad(T.sum(f), y) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX)*3 zv = numpy.ones((2, 2), dtype=config.floatX)*5 assert numpy.all(11.0 == fn(xv, yv, zv))
def test_connection_pattern(self, cls_ofg): # Basic case x, y, z = T.matrices('xyz') out1 = x * y out2 = y * z op1 = cls_ofg([x, y, z], [out1, out2]) results = op1.connection_pattern(None) expect_result = [[True, False], [True, True], [False, True]] assert results == expect_result # Graph with ops that don't have a 'full' connection pattern # and with ops that have multiple outputs m, n, p, q = T.matrices('mnpq') o1, o2 = op1(m, n, p) out1, out2 = op1(o1, q, o2) op2 = cls_ofg([m, n, p, q], [out1, out2]) results = op2.connection_pattern(None) expect_result = [[True, False], [True, True], [False, True], [True, True]] assert results == expect_result # Inner graph where some computation doesn't rely on explicit inputs srng = RandomStreams(seed=234) rv_u = srng.uniform((2, 2)) x, y = T.matrices('xy') out1 = x + rv_u out2 = y + 3 out3 = 3 + rv_u op3 = cls_ofg([x, y], [out1, out2, out3]) results = op3.connection_pattern(None) expect_result = [[True, False, False], [False, True, False], [True, False, True]] assert results == expect_result
def _build(self): if self._debug: theano.config.compute_test_value = 'warn' X,W = T.matrices('X','W') if self._debug: X.tag.test_value = np.random.rand(3,1) W.tag.test_value = np.random.rand(5,3) Z = T.dot(W,X) A = self._activation(Z) self._fpropagate = function([X, W],A) self._layers = [] self._generate_initial_weights()
def test_grad_grad(self): x, y, z = T.matrices('xyz') e = x + y * z op = OpFromGraph([x, y, z], [e]) f = op(x, y, z) f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX) * 3 zv = numpy.ones((2, 2), dtype=config.floatX) * 5 assert numpy.allclose(6.0, fn(xv, yv, zv))
def test_straightforward(self): x, y, z = T.matrices('xyz') e = x + y * z op = OpFromGraph([x, y, z], [e], mode='FAST_RUN') f = op(x, y, z) - op(y, z, x) # (1+3*5=array of 16) - (3+1*5=array of 8) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX)*3 zv = numpy.ones((2, 2), dtype=config.floatX)*5 #print function, function.__module__ #print fn.maker.fgraph.toposort() fn(xv, yv, zv) assert numpy.all(8.0 == fn(xv, yv, zv)) assert numpy.all(8.0 == fn(xv, yv, zv))
def __init__(self, shape): self.in_size, self.out_size = shape self.W = init_weights(shape) self.b = init_bias(self.out_size) self.gW = init_gradws(shape) self.gb = init_bias(self.out_size) D, X = T.matrices("D", "X") def _active(X): return T.nnet.sigmoid(T.dot(X, self.W) + self.b) self.active = theano.function(inputs = [X], outputs = _active(X)) def _derive(D, X): return D * ((1 - X) * X) self.derive = theano.function( inputs = [D, X], outputs = _derive(D, X) ) def _propagate(D): return T.dot(D, self.W.T) self.propagate = theano.function(inputs = [D], outputs = _propagate(D)) x, dy = T.rows("x","dy") updates_grad = [(self.gW, self.gW + T.dot(x.T, dy)), (self.gb, self.gb + dy)] self.grad = theano.function( inputs = [x, dy], updates = updates_grad ) updates_clear = [ (self.gW, self.gW * 0), (self.gb, self.gb * 0)] self.clear_grad = theano.function( inputs = [], updates = updates_clear ) lr = T.scalar() t = T.scalar() updates_w = [ (self.W, self.W - self.gW * lr / t), (self.b, self.b - self.gb * lr / t)] self.update = theano.function( inputs = [lr, t], updates = updates_w )
def test_setitem(self): x, y, z, w = T.matrices('x', 'y', 'z', 'w') i1 = TheanoInterval(x, y) i2 = TheanoInterval(z, w) i1[:, 1:3] = i2 f = function([x, y, z, w], [i1.lower, i1.upper]) ex_x = A([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) ex_z = A([[20, 30], [50, 60], [80, 90]]) ex_y = 100 * ex_x ex_w = 100 * ex_z l, u = f(ex_x, ex_y, ex_z, ex_w) rl = A([[1., 20., 30.], [4., 50., 60.], [7., 80., 90.]]) ru = rl * 100 assert_array_equal(l, rl) assert_array_equal(u, ru)
def test_size_changes(self): x, y, z = T.matrices('xyz') e = T.dot(x, y) op = OpFromGraph([x, y], [e], mode='FAST_RUN') f = op(x, op(y, z)) fn = function([x, y, z], f) xv = numpy.ones((2, 3), dtype=config.floatX) yv = numpy.ones((3, 4), dtype=config.floatX)*3 zv = numpy.ones((4, 5), dtype=config.floatX)*5 res = fn(xv, yv, zv) assert res.shape == (2, 5) assert numpy.all(180.0 == res) res = fn(xv, yv, zv) assert res.shape == (2, 5) assert numpy.all(180.0 == res)
def test_size_changes(self, cls_ofg): x, y, z = tt.matrices("xyz") e = tt.dot(x, y) op = cls_ofg([x, y], [e]) f = op(x, op(y, z)) fn = function([x, y, z], f) xv = np.ones((2, 3), dtype=config.floatX) yv = np.ones((3, 4), dtype=config.floatX) * 3 zv = np.ones((4, 5), dtype=config.floatX) * 5 res = fn(xv, yv, zv) assert res.shape == (2, 5) assert np.all(180.0 == res) res = fn(xv, yv, zv) assert res.shape == (2, 5) assert np.all(180.0 == res)
def test_straightforward(self): x, y, z = T.matrices('xyz') e = x + y * z op = OpFromGraph([x, y, z], [e], mode='FAST_RUN') f = op(x, y, z) - op(y, z, x) # (1+3*5=array of 16) - (3+1*5=array of 8) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX) * 3 zv = numpy.ones((2, 2), dtype=config.floatX) * 5 #print function, function.__module__ #print fn.maker.fgraph.toposort() fn(xv, yv, zv) assert numpy.all(8.0 == fn(xv, yv, zv)) assert numpy.all(8.0 == fn(xv, yv, zv))
def test_size_changes(self, cls_ofg): x, y, z = T.matrices('xyz') e = T.dot(x, y) op = cls_ofg([x, y], [e]) f = op(x, op(y, z)) fn = function([x, y, z], f) xv = np.ones((2, 3), dtype=config.floatX) yv = np.ones((3, 4), dtype=config.floatX) * 3 zv = np.ones((4, 5), dtype=config.floatX) * 5 res = fn(xv, yv, zv) assert res.shape == (2, 5) assert np.all(180.0 == res) res = fn(xv, yv, zv) assert res.shape == (2, 5) assert np.all(180.0 == res)
def test_size_changes(self): x, y, z = T.matrices('xyz') e = T.dot(x, y) op = OpFromGraph([x, y], [e]) f = op(x, op(y, z)) fn = function([x, y, z], f) xv = numpy.ones((2, 3), dtype=config.floatX) yv = numpy.ones((3, 4), dtype=config.floatX) * 3 zv = numpy.ones((4, 5), dtype=config.floatX) * 5 res = fn(xv, yv, zv) assert res.shape == (2, 5) assert numpy.all(180.0 == res) res = fn(xv, yv, zv) assert res.shape == (2, 5) assert numpy.all(180.0 == res)
def test_connection_pattern(self): # Basic case x, y, z = T.matrices('xyz') out1 = x * y out2 = y * z op1 = OpFromGraph([x, y, z], [out1, out2]) results = op1.connection_pattern(None) expect_result = [[True, False], [True, True], [False, True]] assert results == expect_result # Graph with ops that don't have a 'full' connection pattern # and with ops that have multiple outputs m, n, p, q = T.matrices('mnpq') o1, o2 = op1(m, n, p) out1, out2 = op1(o1, q, o2) op2 = OpFromGraph([m, n, p, q], [out1, out2]) results = op2.connection_pattern(None) expect_result = [[True, False], [True, True], [False, True], [True, True]] assert results == expect_result # Inner graph where some computation doesn't rely on explicit inputs srng = RandomStreams(seed=234) rv_u = srng.uniform((2, 2)) x, y = T.matrices('xy') out1 = x + rv_u out2 = y + 3 out3 = 3 + rv_u op3 = OpFromGraph([x, y], [out1, out2, out3]) results = op3.connection_pattern(None) expect_result = [[True, False, False], [False, True, False], [True, False, True]] assert results == expect_result
def test_straightforward(self, cls_ofg): x, y, z = T.matrices('xyz') e = x + y * z op = cls_ofg([x, y, z], [e]) # (1+3*5=array of 16) - (3+1*5=array of 8) f = op(x, y, z) - op(y, z, x) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 # print function, function.__module__ # print fn.maker.fgraph.toposort() fn(xv, yv, zv) assert np.all(8.0 == fn(xv, yv, zv)) assert np.all(8.0 == fn(xv, yv, zv))
def test_shared(self): x, y, z = T.matrices('xyz') s = shared(numpy.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = OpFromGraph([x, y, z], [e]) # (1+3*5=array of 16) - (3+1*5=array of 8) f = op(x, y, z) - op(y, z, x) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX) * 3 zv = numpy.ones((2, 2), dtype=config.floatX) * 5 # print function, function.__module__ # print fn.maker.fgraph.toposort() assert numpy.allclose(8.0, fn(xv, yv, zv)) assert numpy.allclose(8.0, fn(xv, yv, zv))
def test_getitem(self): x, y = T.matrices('x', 'y') i = TheanoInterval(x, y) i0, i1, i2 = i[0, 0], i[1, 1], i[2, 2] l0, l1, l2 = i0.lower, i1.lower, i2.lower u0, u1, u2 = i0.upper, i1.upper, i2.upper f = function([x, y], [l0, l1, l2, u0, u1, u2]) ex_x = A([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) ex_y = ex_x * 10 [rl0, rl1, rl2, ru0, ru1, ru2] = f(ex_x, ex_y) assert_equal(rl0, 1) assert_equal(rl1, 5) assert_equal(rl2, 9) assert_equal(ru0, 10) assert_equal(ru1, 50) assert_equal(ru2, 90)
def test_shared(self, cls_ofg): x, y, z = T.matrices('xyz') s = shared(np.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = cls_ofg([x, y, z], [e]) # (1+3*5=array of 16) - (3+1*5=array of 8) f = op(x, y, z) - op(y, z, x) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 # print function, function.__module__ # print fn.maker.fgraph.toposort() assert np.allclose(8.0, fn(xv, yv, zv)) assert np.allclose(8.0, fn(xv, yv, zv))
def __init__(self, shape, X): prefix = "Softmax_" self.in_size, self.out_size = shape self.W = init_weights(shape, prefix + "W") self.b = init_bias(self.out_size, prefix + "b") self.gW = init_gradws(shape, prefix + "gW") self.gb = init_bias(self.out_size, prefix + "gb") D = T.matrices("D") self.X = X def _active(X): return T.nnet.softmax(T.dot(X, self.W) + self.b) self.active = theano.function(inputs = [self.X], outputs = _active(self.X)) def _propagate(D): return T.dot(D, self.W.T) self.propagate = theano.function(inputs = [D], outputs = _propagate(D)) x, dy = T.rows("x","dy") updates_grad = [(self.gW, self.gW + T.dot(x.T, dy)), (self.gb, self.gb + dy)] self.grad = theano.function( inputs = [x, dy], updates = updates_grad ) updates_clear = [ (self.gW, self.gW * 0), (self.gb, self.gb * 0)] self.clear_grad = theano.function( inputs = [], updates = updates_clear ) lr = T.scalar() t = T.scalar() updates_w = [ (self.W, self.W - self.gW * lr / t), (self.b, self.b - self.gb * lr / t)] self.update = theano.function( inputs = [lr, t], updates = updates_w ) self.params = [self.W, self.b]
def test_max(self): al = A([[1, 2], [3, 4]]) au = A([[2, 2], [4, 7]]) bl = A([[0, 3], [3, -4]]) bu = A([[2, 4], [3, -3]]) alt, aut, blt, but = T.matrices('alt', 'aut', 'blt', 'but') ai = TheanoInterval(alt, aut) bi = TheanoInterval(blt, but) ci = ai.max(bi) d = {alt: al, aut: au, blt: bl, but: bu} res = ci.eval(d) rl = res[0] ru = res[1] ansl = A([[1, 3], [3, 4]]) ansu = A([[2, 4], [4, 7]]) array_almost_equal(rl, ansl) array_almost_equal(ru, ansu)
def test_shared_grad(self, cls_ofg): x, y, z = tt.matrices("xyz") s = shared(np.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = cls_ofg([x, y, z], [e]) f = op(x, y, z) f = f - tt.grad(tt.sum(f), y) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 assert np.allclose(11.0 + s.get_value(), fn(xv, yv, zv)) # grad again the shared variable f = op(x, y, z) f = f - tt.grad(tt.sum(f), s) fn = function([x, y, z], f) assert np.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
def test_shared_grad(self): x, y, z = T.matrices('xyz') s = shared(numpy.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = OpFromGraph([x, y, z], [e]) f = op(x, y, z) f = f - T.grad(T.sum(f), y) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX) * 3 zv = numpy.ones((2, 2), dtype=config.floatX) * 5 assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv)) # grad again the shared variable f = op(x, y, z) f = f - T.grad(T.sum(f), s) fn = function([x, y, z], f) assert numpy.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
def main(): """A simple test case.""" rgb1, rgb2 = T.matrices('rgb1', 'rgb2') jab1 = srgb_to_ucs(rgb1, 100, 20, 20, **Surrounds.AVERAGE) jab2 = srgb_to_ucs(rgb2, 100, 20, 20, **Surrounds.AVERAGE) loss = delta_e(jab1, jab2)**2 grad_ = T.grad(loss, rgb2) grad = theano.function([rgb1, rgb2], grad_) # Inversion of CAM02-UCS via gradient descent target = floatX([[0.25, 0.25, 1]]) x = np.zeros_like(target) + 0.5 print(x) for i in range(1500): g = grad(target, x) x -= 1e-6 * g if i % 100 == 99: print(x)
def test_shared_grad(self): x, y, z = T.matrices('xyz') s = shared(numpy.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = OpFromGraph([x, y, z], [e], mode='FAST_RUN') f = op(x, y, z) f = f - T.grad(T.sum(f), y) fn = function([x, y, z], f) xv = numpy.ones((2, 2), dtype=config.floatX) yv = numpy.ones((2, 2), dtype=config.floatX) * 3 zv = numpy.ones((2, 2), dtype=config.floatX) * 5 assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv)) # grad again the shared variable f = op(x, y, z) f = f - T.grad(T.sum(f), s) fn = function([x, y, z], f) assert numpy.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
def test_shared_grad(self, cls_ofg): x, y, z = T.matrices('xyz') s = shared(np.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = cls_ofg([x, y, z], [e]) f = op(x, y, z) f = f - T.grad(T.sum(f), y) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 assert np.allclose(11.0 + s.get_value(), fn(xv, yv, zv)) # grad again the shared variable f = op(x, y, z) f = f - T.grad(T.sum(f), s) fn = function([x, y, z], f) assert np.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
def test_ops1(self): # __add__, __sub__, __mul__, x, y, z, w = T.matrices('x', 'y', 'z', 'w') i1 = TheanoInterval(x, y) i2 = TheanoInterval(z, w) l1 = A([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) u1 = l1 * 10 + 1 l2 = l1 * 10 + 2 u2 = l1 * 10 + 3 r_add1 = i1 + i2 r_add2 = i1 + x r_sub1 = i1 - i2 r_sub2 = i1 - x r_mul1 = i1 * i2 r_mul2 = i1 * x fres = [r_add1.lower, r_add1.upper, r_add2.lower, r_add2.upper, r_sub1.lower, r_sub1.upper, r_sub2.lower, r_sub2.upper, r_mul1.lower, r_mul1.upper, r_mul2.lower, r_mul2.upper] f = function([x, y, z, w], fres) add1l, add1u, add2l, add2u, \ sub1l, sub1u, sub2l, sub2u, \ mul1l, mul1u, mul2l, mul2u, = f(l1, u1, l2, u2) ops_results = [add1l, add1u, add2l, add2u, sub1l, sub1u, sub2l, sub2u, mul1l, mul1u, mul2l, mul2u] r_add1l = l1 + l2 r_add2l = l1 + l1 r_add1u = u1 + u2 r_add2u = u1 + l1 r_sub1l = l1 - u2 r_sub2l = l1 - l1 r_sub1u = u1 - l2 r_sub2u = u1 - l1 r_mul1l = l1 * l2 r_mul2l = l1 * l1 r_mul1u = u1 * u2 r_mul2u = u1 * l1 results = [r_add1l, r_add1u, r_add2l, r_add2u, r_sub1l, r_sub1u, r_sub2l, r_sub2u, r_mul1l, r_mul1u, r_mul2l, r_mul2u] for i in range(len(ops_results)): array_almost_equal(ops_results, results)
def test_top_with_numpy_generator(): # Random dataset dataset = np.random.normal(0, 1, (1000, 10)) # target = 0 if sum>.5 else 1 target = 0. + (np.dot(dataset, np.ones((10, 1))) > .5) def dataset_generator(): return NumpyDatasetGenerator(dataset=(dataset, target), batch_size=100) W = theano.shared(np.random.normal(0, 1, (10, 1)).astype(top.up.floatX)) X, T = tensor.matrices('X', 'Y') Y = tensor.nnet.sigmoid(tensor.dot(X, W)) cost = tensor.nnet.binary_crossentropy(Y, T).mean() opt = top.Optimizer(W, cost, input=[X, T], method='sgd', learning_rate=.005) opt.iterate_epochs(1000, dataset_generator) print W.get_value()
def test_eval(self): txl, txu, tyl, tyu = T.matrices('xl', 'xu', 'yl', 'yu') xl = A([[1, 2], [3, 4]]) xu = A([[2, 4], [6, 9]]) yl = A([[-1, -5], [0, 3]]) yu = A([[4, 2], [0, 3]]) ix = TheanoInterval(txl, txu) iy = TheanoInterval(tyl, tyu) iz = ix + iy d = {txl: xl, txu: xu, tyl: yl, tyu: yu} zl, zu = iz.eval(d) array_almost_equal(zl, xl + yl) array_almost_equal(zu, xu + yu) i2 = TheanoInterval(theano.shared(1), theano.shared(3)) i2l, i2u = i2.eval() assert_equal(i2l, 1) assert_equal(i2u, 3) i2l, i2u = i2.eval({}) assert_equal(i2l, 1) assert_equal(i2u, 3)
def SGD(eta, minibatch_size, n_minibatch, n_epochs): # Testing and Validation data are the outputs of the last inputs. print 'Calling SGD() ..' index = T.iscalar('index') x, y = T.matrices('x', 'y') tree = TreeLSTM(x, n_in) updates = [(param, param - eta * gparam) for param, gparam in zip(tree.params, T.grad(tree.loss(y), tree.params))] train_fn = theano.function([index], tree.loss(y), updates=updates, givens={x: train_x[:, minibatch_size * n_in * index: (minibatch_size + 1) * n_in * index], y: train_y[:, minibatch_size * index: (minibatch_size + 1) * index]} ) # Compilation over ################# ## TRAIN MODEL ## ################# for epoch in n_epochs: for idx in range(n_minibatch): train_fn(idx)
def compile(self,X,n_negative_samples=None): if n_negative_samples is None: n_negative_samples = 1000 pos_samples = X.loc[:, self.column_ranges.keys()].values.astype(floatX) pos_data, neg_data = T.matrices('SigData', 'BckData') pos_w, neg_w, parameters = T.vectors('SigW', 'BckW', 'parameters') neg_samples, neg_weight = self.generate_negative_samples(n_negative_samples=n_negative_samples, strategy=self.sampling_strategy) givens = {pos_data: pos_samples, neg_data: neg_samples, neg_w: neg_weight} pdf = self.prepare_pdf() pdfs, summands = pdf(pos_data, neg_data, neg_weights=neg_w, weights=parameters) result = - T.mean(pos_w * T.log(pdfs)) self.Tfunction = theano.function([parameters,pos_w], result, givens=givens) self.Tderivative = theano.function([parameters,pos_w], T.grad(result, parameters), givens=givens) self.X=X
def __init__(self, layers, mini_batch_size): """Takes a list of `layers`, describing the network architecture, and a value for the `mini_batch_size` to be used during training by stochastic gradient descent. """ self.layers = layers self.mini_batch_size = mini_batch_size self.params = [ param for layer in self.layers for param in layer.params ] self.x = T.matrices("x") self.y = T.ivector("y") init_layer = self.layers[0] init_layer.set_inpt(self.x, self.x, self.mini_batch_size) for j in range(1, len(self.layers)): prev_layer, layer = self.layers[j - 1], self.layers[j] layer.set_inpt(prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) self.output = self.layers[-1].output self.output_dropout = self.layers[-1].output_dropout
def test_4_conditionals(): a, b = T.scalars('a', 'b') x, y = T.matrices('x', 'y') f_switch = theano.function([a, b, x, y], T.switch(T.lt(a, b), T.mean(x), T.mean(y))) f_lazy_ifelse = theano.function([a, b, x, y], ifelse(T.lt(a, b), T.mean(x), T.mean(y))) x_val = np.ones((100, 100), dtype=theano.config.floatX)*1 y_val = np.ones((100, 100), dtype=theano.config.floatX)*2 # vectorized switch is going to evaluate both options np.testing.assert_almost_equal( f_switch(1, 2, x_val, y_val), 1 ) # lazy evaluation is going to evaluate only single option np.testing.assert_almost_equal( f_lazy_ifelse(2, 1, x_val, y_val), 2 )
def test_reshape(self): tl, tu = T.matrices('l', 'u') xl = A([[1, 2, 3], [4, 5, 6]]) xu = xl + 3 i = TheanoInterval(tl, tu) i1 = i.reshape((1, 6)) i2 = i.reshape((2, 3)) i3 = i.reshape((3, 2)) i4 = i.reshape((6, 1)) [l1, u1] = i1.eval({tl: xl, tu: xu}) [l2, u2] = i2.eval({tl: xl, tu: xu}) [l3, u3] = i3.eval({tl: xl, tu: xu}) [l4, u4] = i4.eval({tl: xl, tu: xu}) assert_array_equal(l1, xl.reshape((1, 6))) assert_array_equal(l2, xl.reshape((2, 3))) assert_array_equal(l3, xl.reshape((3, 2))) assert_array_equal(l4, xl.reshape((6, 1))) assert_array_equal(u1, xu.reshape((1, 6))) assert_array_equal(u2, xu.reshape((2, 3))) assert_array_equal(u3, xu.reshape((3, 2))) assert_array_equal(u4, xu.reshape((6, 1)))
def test_broadcasting_pattern(self): from lasagne.layers import ElemwiseSumLayer, InputLayer import lasagne import theano.tensor as T import numpy as np import theano a, b = T.matrices('a', 'b') a_ = np.ones((2, 1), dtype=theano.config.floatX) b_ = np.ones((2, 5), dtype=theano.config.floatX) l_a = InputLayer((2, 1)) l_b = InputLayer((2, 5)) l_o = ElemwiseSumLayer([l_a, l_b]) shp = l_o.output_shape # set broadcastable table output = lasagne.layers.get_output(l_o, { l_a: a, l_b: b }).eval({ a: a_, b: b_ }) np.testing.assert_array_almost_equal(output, np.ones((2, 5)) + 1.0) assert shp == output.shape # test that None dimensions are not modified l_a = InputLayer((2, None)) l_b = InputLayer((2, None)) l_o = ElemwiseSumLayer([l_a, l_b]) shp = l_o.output_shape # set broadcastable table a = T.addbroadcast(a, 1) output = lasagne.layers.get_output(l_o, { l_a: a, l_b: b }).eval({ a: a_, b: b_ }) np.testing.assert_array_almost_equal(output, np.ones((2, 5)) + 1.0) assert shp == (2, None)
self.z = theano.dot(self.inpts, self.W) + self.B self.pool_size = pool_size self.filter_shape = filter_shape conv_out = conv.conv2d(self.inpts, image_shape=self.image_shape, filters=self.W, filter_shape=self.filter_shape) pool_out = downsample.pool_2d(input=conv_out, ds=self.pool_size, ignore_border=True) self.out = T.nnet.sigmoid(pool_out) x = T.matrices("x") y = T.matrices("y") ly1 = convlyer(x, filter_shape=(5, 5), image_shape=(28, 28), pool_size=(2, 2)) ly2 = lyers(ly1.out, 12, 100, activation="relu") ly3 = lyers(ly2.out, 100, 10, activation="sigm") lamda = 0.1 cost = T.mean(1 / 2 * T.square(ly3.outputs - y)) params = [param for ly in [ly1, ly2, ly3] for param in ly.params] grads = T.grad(cost, params) update = [(param, param - lamda * grad) for param, grad in zip(params, grads)] train = theano.function([x, y], cost, updates=update) predict = theano.function([x], ly3.outputs) for t_data in training_data[0:10]:
def test_gpu_rowwise_switch(): assert theano.config.device.startswith("gpu"), "Need to test on GPU!" data = [ # 4 x 2 (np.array([[0.22323515, 0.36703175], [0.82260513, 0.3461504], [0.82362652, 0.81626087], [0.95270008, 0.2226797]]), np.array([[0.36341551, 0.20102882], [0.24144639, 0.45237923], [0.39951822, 0.7348066], [0.16649647, 0.60306537]]), np.array([1, 0, 1, 1]), np.array([[0.22323515, 0.36703175], [0.24144639, 0.45237923], [0.82362652, 0.81626087], [0.95270008, 0.2226797]])), # 2 x 3 x 4 (np.array([[[0.48769062, 0.82649632, 0.2047115, 0.41437615], [0.25290664, 0.87164914, 0.80968588, 0.49295084], [0.71438099, 0.97913502, 0.37598001, 0.76958707]], [[0.37605973, 0.538358, 0.74304674, 0.84346291], [0.95310617, 0.61540292, 0.49881143, 0.1028554], [0.83481996, 0.90969569, 0.40410424, 0.34419989]]]), np.array([[[0.7289117, 0.97323253, 0.19070121, 0.64164653], [0.26816493, 0.76093069, 0.95284825, 0.77350426], [0.55415519, 0.39431256, 0.86588665, 0.50031027]], [[0.1980869, 0.7753601, 0.26810868, 0.3628802], [0.2488143, 0.21278388, 0.09724567, 0.58457886], [0.12295105, 0.75321368, 0.37258797, 0.27756972]]]), np.array([1, 0]), np.array([[[0.48769062, 0.82649632, 0.2047115, 0.41437615], [0.25290664, 0.87164914, 0.80968588, 0.49295084], [0.71438099, 0.97913502, 0.37598001, 0.76958707]], [[0.1980869, 0.7753601, 0.26810868, 0.3628802], [0.2488143, 0.21278388, 0.09724567, 0.58457886], [0.12295105, 0.75321368, 0.37258797, 0.27756972]]])) ] A2, B2 = T.matrices("AB") A3, B3 = T.tensor3("A"), T.tensor3("B") mask = T.ivector("mask") switch2 = T.switch(mask.dimshuffle(0, "x"), A2, B2) switch3 = T.switch(mask.dimshuffle(0, "x", "x"), A3, B3) f2 = theano.function([A2, B2, mask], switch2) f3 = theano.function([A3, B3, mask], switch3) print "Graph of 2dim switch:" theano.printing.debugprint(f2.maker.fgraph.outputs[0]) print "Graph of 3dim switch:" theano.printing.debugprint(f3.maker.fgraph.outputs[0]) for instance in data: # Retrieve appropriate function func = f2 if instance[0].ndim == 2 else f3 # Cast to float-friendly types instance = [ x.astype(np.float32) if x.dtype.kind == 'f' else x.astype(np.int32) for x in instance ] yield tuple([_test_gpu_rowwise_switch_inner, func] + instance)
import theano import theano.tensor as T import numpy as np from breze.arch.construct.layer.distributions import DiagGauss n, m = 10, 5 A, B = T.matrices('A', 'B') X = np.empty((3, 4), dtype=np.float32) print 'X shape =', X.shape # # this works # S = theano.shared(np.random.randn(n, m)) # sample = T.tile(S[np.newaxis, :, :], (A.shape[0], 1, 1), ndim=3) # this does not work mean_val, var_val = (np.random.randn(n, m) for _ in xrange(2)) var_val = var_val**2 + 1e-5 mean_raw, var_raw = (theano.shared(v) for v in (mean_val, var_val)) mean, var = (T.tile(v[np.newaxis, :, :], (A.shape[0], 1, 1), ndim=3) for v in (mean_raw, var_raw)) gaus = DiagGauss(mean, var) sample = gaus.sample() foo_sample_A = theano.function([A], sample) print 'foo_sample_A.shape =', foo_sample_A(X).shape # (3, 10, 5) sample += B[0, 0] * 0 # to avoid unused input errorr
def __init__(self, state = 'x', measurement = 'z', motion_transition = None, measurement_transition = None): self.N = len(state.split(' ')) self.M = len(measurement.split(' ')) self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.F, self.H = T.matrices('F','H') self.dt = T.scalar('dt') self.X_ = T.dot(self.F, self.X) self.fX_ = G.jacobian(T.flatten(self.X_), self.X) self.P_ = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + self.dt * self.Q self.h = T.dot(self.H, self.X_) self.y = self.Z - self.h self.hX_ = G.jacobian(self.h, self.X_) self.matrix_inv = T.nlinalg.MatrixInverse() self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.y) self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.F, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.F, self.H, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True) if motion_transition == None: self.motion_transition = np.eye(self.N) else: self.motion_transition = np.array(motion_transition) if measurement_transition == None: self.measurement_transition = np.eye(self.M) else: self.measurement_transition = np.array(motion_transition)
import theano import theano.tensor as T import numpy as np # from OkapiV2.Layers.Activations import SoftmaxLayer from OkapiV2 import Losses x, y = T.matrices('xy') # regular softmax and crossentropy sm = T.nnet.softmax(x) cm1 = T.nnet.categorical_crossentropy(sm, y) g1 = T.grad(cm1.mean(), x) # numerically stable log-softmax with crossentropy '''xdev = x-x.max(1, keepdims=True) lsm = xdev - T.log(T.sum(T.exp(xdev), axis=1, keepdims=True))''' # lsm = SoftmaxLayer().get_output(x, None) + 1e-7 '''sm2 = T.exp(lsm) cm2 = -T.sum(y*lsm, axis=1)''' cm2 = Losses.AltSoftmaxLoss().get_train_loss(x, y, [None]) # cm2 = T.nnet.categorical_crossentropy(sm2, y) g2 = T.grad(cm2.mean(), x) # create some inputs into a softmax that are large and labels a = np.exp(10*np.random.rand(5, 10).astype(theano.config.floatX)) # create some one-hot coded labels b = np.eye(5, 10).astype(theano.config.floatX) # show equivalence of softmax and exponentiated numerically stable log-softmax '''f1 = theano.function([x], [sm, sm2]) sm1, sm2 = f1(a)
import time import numpy import theano from theano import tensor as tt from theano.ifelse import ifelse a, b = tt.scalars('a', 'b') x, y = tt.matrices('x', 'y') z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y)) z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y)) f_switch = theano.function([a, b, x, y], z_switch) f_lazyifelse = theano.function([a, b, x, y], z_lazy) val1 = 0. val2 = 1. big_mat1 = numpy.ones((10000, 1000)) big_mat2 = numpy.ones((10000, 1000)) n_times = 10 tic = time.clock() for i in xrange(n_times): f_switch(val1, val2, big_mat1, big_mat2) print 'time spent evaluating both values %f sec' % (time.clock() - tic) tic = time.clock() for i in xrange(n_times):
# IfElse v.s. Switch # (1) Both ops build a condition over symbolic variables. # (2) IfElse takes a boolean condition and two variables as inputs. # (3) Swith takes a tensor as condition an two variables as inputs. switch # is an elementwise operation and is thus more general than ifelse. # (4) Whereas switch evaluates both output variables, ifelse is lazy and # only evaluates one variable with respect to the condition. # Example from theano import tensor as T from theano.ifelse import ifelse import theano, time, numpy a, b = T.scalars('a', 'b') x, y = T.matrices('x', 'y') z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y)) z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y)) f_switch = theano.function([a, b, x, y], z_switch, mode=theano.Mode(linker='vm')) f_lazyifelse = theano.function([a, b, x, y], z_lazy, mode=theano.Mode(linker='vm')) val1 = 0.0 val2 = 1.0 big_mat1 = numpy.ones((10000, 20000), dtype=numpy.float32) big_mat2 = numpy.ones((10000, 20000), dtype=numpy.float32)
x=T.dscalar('x') y=T.dscalar('y') z=x+y f=function([x,y],z) print f(2,3) print f(132.5,322.4) print z.eval({x:1,y:2}) #Add two mitrix x=T.dmatrix('x') y=T.dmatrix('y') z=x+y f=function([x,y],z) print f([[1,2],[1,2]],[[10,20],[10,20]]) #multiply outputs a,b=T.matrices('a','b') diff=a-b abs_diff = abs(diff) diff_squared = diff ** 2 f=theano.function([a,b],[diff,abs_diff,diff_squared]) d,e,f=f([[1,2,3],[10,20,30]],[[100,200,300],[1,2,3]]) print "diff is : \n" print d print "abs_diff is : \n" print e print "diff_squared is : \n" print f
import theano.tensor as T from theano import function a, b = T.matrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff ** 2 f = function([a, b], [diff, abs_diff, diff_squared]) print f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
from theano import function import theano from theano.ifelse import ifelse import time if __name__ == '__main__': from minitest import * inject(numpy.allclose, 'must_close') # http://deeplearning.net/software/theano/tutorial/conditions.html with test("if"): a,b = T.scalars('a', 'b') x,y = T.matrices('x', 'y') z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y)) f_lazyifelse = theano.function([a, b, x, y], z_lazy, mode=theano.Mode(linker='vm')) val1 = 0. val2 = 1. big_mat1 = numpy.ones((2, 2)) big_mat2 = numpy.ones((2, 2)) f_lazyifelse(val1, val2, big_mat1, big_mat2).must_close( [1.0]) with test("if with value"): a = T.scalar()
def make_train_fun( self, agent, sequence_length=25, # how many steps to make before updating weights observation_shape=(1, 64, 64), # same as env.observation_space.shape reward_scale=1e-3, #rewards are multiplied by this. May be useful if they are large. gamma=0.99, #discount from TD ): """Compiles a function to train for one step""" #make replay environment observations = T.tensor(theano.config.floatX, broadcastable=(False, ) * (2 + len(observation_shape)), name="observations[b,t,color,width,height]") actions = T.imatrix("actions[b,t]") rewards, is_alive = T.matrices("rewards[b,t]", "is_alive[b,t]") prev_memory = [l.input_var for l in agent.agent_states.values()] replay = SessionBatchEnvironment(observations, [observation_shape], actions=actions, rewards=rewards, is_alive=is_alive) #replay sessions _, _, _, _, (logits_seq, V_seq) = agent.get_sessions( replay, session_length=sequence_length, experience_replay=True, initial_hidden=prev_memory, unroll_scan= False, #speeds up compilation 10x, slows down training by 20% (still 4x faster than TF :P ) ) rng_updates = agent.get_automatic_updates( ) #updates of random states (will be passed to a function) # compute pi(a|s) and log(pi(a|s)) manually [use logsoftmax] # we can't guarantee that theano optimizes logsoftmax automatically since it's still in dev logits_flat = logits_seq.reshape([-1, logits_seq.shape[-1]]) policy_seq = T.nnet.softmax(logits_flat).reshape(logits_seq.shape) logpolicy_seq = T.nnet.logsoftmax(logits_flat).reshape( logits_seq.shape) # get policy gradient elwise_actor_loss, elwise_critic_loss = a2c.get_elementwise_objective( policy=logpolicy_seq, treat_policy_as_logpolicy=True, state_values=V_seq[:, :, 0], actions=replay.actions[0], rewards=replay.rewards * reward_scale, is_alive=replay.is_alive, gamma_or_gammas=gamma, n_steps=None, return_separate=True) # add losses with magic numbers # (you can change them more or less harmlessly, this usually just makes learning faster/slower) # also regularize to prioritize exploration reg_logits = T.mean(logits_seq**2) reg_entropy = T.mean(T.sum(policy_seq * logpolicy_seq, axis=-1)) loss = 0.1 * elwise_actor_loss.mean() + 0.25 * elwise_critic_loss.mean( ) + 1e-3 * reg_entropy + 1e-2 * reg_logits # Compute weight updates, clip by norm grads = T.grad(loss, self.weights) grads = lasagne.updates.total_norm_constraint(grads, 10) updates = lasagne.updates.adam(grads, self.weights, 1e-4) # compile train function inputs = [observations, actions, rewards, is_alive] + prev_memory return theano.function(inputs, updates=rng_updates + updates, allow_input_downcast=True)
def __init__(self, rng, layer, shape, X, is_train = 1, batch_size = 1, p = 0.5): prefix = "GRU_" self.in_size, self.out_size = shape self.W_xr = init_weights((self.in_size, self.out_size), prefix + "W_xr" + "_" + layer) self.W_hr = init_weights((self.out_size, self.out_size), prefix + "W_hr" + "_" + layer) self.b_r = init_bias(self.out_size, prefix + "b_r" + "_" + layer) self.W_xz = init_weights((self.in_size, self.out_size), prefix + "W_xz" + "_" + layer) self.W_hz = init_weights((self.out_size, self.out_size), prefix + "W_hz" + "_" + layer) self.b_z = init_bias(self.out_size, prefix + "b_z" + "_" + layer) self.W_xh = init_weights((self.in_size, self.out_size), prefix + "W_xh" + "_" + layer) self.W_hh = init_weights((self.out_size, self.out_size), prefix + "W_hh" + "_" + layer) self.b_h = init_bias(self.out_size, prefix + "b_h" + "_" + layer) # for gradients self.gW_xr = init_gradws((self.in_size, self.out_size), prefix + "gW_xr" + "_" + layer) self.gW_hr = init_gradws((self.out_size, self.out_size), prefix + "gW_h" + "_" + layer) self.gb_r = init_bias(self.out_size, prefix + "gb_r" + "_" + layer) self.gW_xz = init_gradws((self.in_size, self.out_size), prefix + "gW_xz" + "_" + layer) self.gW_hz = init_gradws((self.out_size, self.out_size), prefix + "gW_hz" + "_" + layer) self.gb_z = init_bias(self.out_size, prefix + "gb_z" + "_" + layer) self.gW_xh = init_gradws((self.in_size, self.out_size), prefix + "gW_xh" + "_" + layer) self.gW_hh = init_gradws((self.out_size, self.out_size), prefix + "gW_hh" + "_" + layer) self.gb_h = init_bias(self.out_size, prefix + "gb_h" + "_" + layer) def _active(x, pre_h): r = T.nnet.sigmoid(T.dot(x, self.W_xr) + T.dot(pre_h, self.W_hr) + self.b_r) z = T.nnet.sigmoid(T.dot(x, self.W_xz) + T.dot(pre_h, self.W_hz) + self.b_z) gh = T.tanh(T.dot(x, self.W_xh) + T.dot(r * pre_h, self.W_hh) + self.b_h) h = z * pre_h + (1 - z) * gh return r, z, gh, h self.X = X H = T.matrix("H") [r, z, gh, h], updates = theano.scan(_active, sequences=[self.X], outputs_info=[None, None, None, H]) self.active = theano.function( inputs = [self.X, H], outputs = [r, z, gh, h] ) h = T.reshape(h, (self.X.shape[0], self.out_size)) # dropout if p > 0: srng = T.shared_randomstreams.RandomStreams(rng.randint(999999)) mask = srng.binomial(n = 1, p = 1-p, size = h.shape, dtype = theano.config.floatX) self.activation = T.switch(T.eq(is_train, 1), h * mask, h * (1 - p)) # is_train = 1 else: self.activation = T.switch(T.eq(is_train, 1), h, h) # is_train # TODO ->scan def _derive(prop, r, post_r, z, gh, pre_h, post_dh, post_dgh, post_dr, post_dz): dh = prop + T.dot(post_dr, self.W_hr.T) + T.dot(post_dz, self.W_hz.T) + T.dot(post_dgh * post_r, self.W_hh.T) + post_dh * z dgh = dh * (1 - z) * (1 - gh ** 2) dr = T.dot(dgh * pre_h, self.W_hh.T) * ((1 - r) * r) dz = (dh * (pre_h - gh)) * ((1 - z) * z) return dh, dgh, dr, dz prop, r, z, gh, pre_h, post_dh, post_dgh, post_dr, post_dz, post_r = \ T.matrices("prop", "r", "z", "gh", "pre_h", "post_dh", "post_dgh", "post_dr", "post_dz", "post_r") self.derive = theano.function( inputs = [prop, r, post_r, z, gh, pre_h, post_dh, post_dgh, post_dr, post_dz], outputs = _derive(prop, r, post_r, z, gh, pre_h, post_dh, post_dgh, post_dr, post_dz) ) x, dz, dr, dgh = T.rows("x", "dz", "dr", "dgh") updates_grad = [(self.gW_xr, self.gW_xr + T.dot(x.T, dr)), (self.gW_xz, self.gW_xz + T.dot(x.T, dz)), (self.gW_xh, self.gW_xh + T.dot(x.T, dgh)), (self.gW_hr, self.gW_hr + T.dot(pre_h.T, dr)), (self.gW_hz, self.gW_hz + T.dot(pre_h.T, dz)), (self.gW_hh, self.gW_hh + T.dot((r * pre_h).T, dgh)), (self.gb_r, self.gb_r + dr), (self.gb_z, self.gb_z + dz), (self.gb_h, self.gb_h + dgh)] self.grad = theano.function( inputs = [x, r, pre_h, dz, dr, dgh], updates = updates_grad ) updates_clear = [ (self.gW_xr, self.gW_xr * 0), (self.gW_xz, self.gW_xz * 0), (self.gW_xh, self.gW_xh * 0), (self.gW_hr, self.gW_hr * 0), (self.gW_hz, self.gW_hz * 0), (self.gW_hh, self.gW_hh * 0), (self.gb_r, self.gb_r * 0), (self.gb_z, self.gb_z * 0), (self.gb_h, self.gb_h * 0)] self.clear_grad = theano.function( inputs = [], updates = updates_clear ) lr = T.scalar() t = T.scalar() tm1 = T.scalar() updates_w = [ (self.W_xr, self.W_xr - self.gW_xr * lr / t), (self.W_xz, self.W_xz - self.gW_xz * lr / t), (self.W_xh, self.W_xh - self.gW_xh * lr / t), (self.W_hr, self.W_hr - self.gW_hr * lr / tm1), (self.W_hz, self.W_hz - self.gW_hz * lr / tm1), (self.W_hh, self.W_hh - self.gW_hh * lr / tm1), (self.b_r, self.b_r - self.gb_r * lr / t), (self.b_z, self.b_z - self.gb_z * lr / t), (self.b_h, self.b_h - self.gb_h * lr / t)] self.update = theano.function( inputs = [lr, t, tm1], updates = updates_w ) DZ, DR, DGH = T.matrices("DZ", "DR", "DGH") def _propagate(DR, DZ, DGH): return (T.dot(DR, self.W_xr.T) + T.dot(DZ, self.W_xz.T) + T.dot(DGH, self.W_xh.T)) self.propagate = theano.function(inputs = [DR, DZ, DGH], outputs = _propagate(DR, DZ, DGH)) self.params = [self.W_xr, self.W_hr, self.b_r, self.W_xz, self.W_hz, self.b_z, self.W_xh, self.W_hh, self.b_h]
# Dataset generation parameters ns = 300 # number of source points nt = 20 # number of target points nb_tr = 3 # number of trials for averaging num_src = 5 # number of source domains # variables to stock the results lambdaWa = [] Wdista = [] MMDa = [] true_errora = [] # variables used in teano for mmd calculation Xth, Yth = T.matrices('X', 'Y') sigmath = T.scalar('sigma') fn = theano.function([Xth, Yth, sigmath], mmd.rbf_mmd2(Xth, Yth, sigma=sigmath)) a, b = np.ones((ns, )) / ns, np.ones( (nt, )) / nt # empirical distributions for source and target domains reg = 1e-1 # entropic regularization for \lambda computation Mb = ot.utils.dist0(ns) # cost matrix on bins Mb /= Mb.max() # normalization if plot_moons: # to plot the data (avoid when len(theta_range)>5) fig, axes = plt.subplots(len(theta_range), num_src, figsize=(21, 16)) for j, it in enumerate(theta_range): lambdaW = []
from theano import tensor as T from theano.ifelse import ifelse import theano, time, numpy a,b = T.matrices('a', 'b') x,y = T.matrices('x', 'y') #I would think of it as just another operator that acts on three symbolic variables, if the first is true, return the second, else return the third. #But for many operators (like - and +) theano has overloaded them for symbolic variables, so probably you don't feel the difference. #For example, if a and b are numbers, then c=a+b creates a variable c with the value of a+b. If a and b are symbolic variables, then c=a+b creates another symbolic variable c, that will apply (element-wise) addition to a and b when the corresponded function gets called/evaluated. #Here's an introduction on theano operators and graphs. http://deeplearning.net/software/theano/extending/graphstructures.html val1 = numpy.zeros((100, 100)) val2 = numpy.ones((100, 100)) big_mat1 = numpy.ones((100, 100)) big_mat2 = numpy.zeros((100, 100)) z_switch = T.switch(T.lt(a-b,0),big_mat1, big_mat2) #set = (a,b) #z_lazy = ifelse(T.lt(*set), big_mat1, big_mat2) f_switch = theano.function([a,b], z_switch, mode=theano.Mode(linker='vm')) #f_lazyifelse = theano.function([a,b], z_lazy, # mode=theano.Mode(linker='vm')) n_times = 10 tic = time.clock()
def make_gradient(self, steps=30, interpolator='PchipInterpolator', bg=BgColors.NEUTRAL, diff_weight=1e4, callback=None): global opfunc start = time.perf_counter() def _loss(y, ideal_jab, ideal_diff): jab = ucs.symbolic.srgb_to_ucs(y, 80, 16, ucs.srgb_to_xyz(bg)[1] * 80, **Surrounds.AVERAGE) diff = jab[1:, :] - jab[:-1, :] ucs_loss = T.sum(T.sqr(jab - ideal_jab)) diff_loss = T.mean(T.sqr(diff - ideal_diff)) return ucs_loss + diff_loss * diff_weight if opfunc is None: rgb, _ideal_jab, _ideal_diff = T.matrices('rgb', 'ideal_jab', 'ideal_diff') loss_sym = _loss(rgb, _ideal_jab, _ideal_diff) grad_sym = T.grad(loss_sym, rgb) # Ensure this function is compiled ucs.srgb_to_ucs([1, 1, 1]) print('Building opfunc()...', file=sys.stderr) opfunc = theano.function([rgb, _ideal_jab, _ideal_diff], [loss_sym, grad_sym], allow_input_downcast=True, on_unused_input='ignore') print('Done building functions in {:.3g} seconds.'.format( time.perf_counter() - start), file=sys.stderr) # If the method was called only to precompile Theano functions, return early if self.x is None: return if self.colorspace == 'rgb': conds = Conditions(Y_w=100, Y_b=ucs.srgb_to_xyz(self.bg)[1] * 100) jmh = ucs.jab_to_jmh(ucs.srgb_to_ucs(self.y, conds)) elif self.colorspace == 'jmh': jmh = self.y.copy() jmh[:, 2] = ucs.H_to_h(self.y[:, 2]) else: raise ValueError('colorspace must be RGB or JMH') jmh[:, 2] = np.rad2deg(np.unwrap(np.deg2rad(jmh[:, 2]))) if self.periodic: jmh[-1] = jmh[0] interp = interpolate.CubicSpline(self.x, jmh, axis=0, bc_type='periodic') else: if not hasattr(interpolate, interpolator): raise ValueError( 'interpolator must exist in scipy.interpolate') interp = getattr(interpolate, interpolator)(self.x, jmh, axis=0) ideal_jmh = np.zeros((steps, 3)) x = np.linspace(self.x[0], self.x[-1], steps) for i, n in enumerate(x): ideal_jmh[i] = interp(n) ideal_jab = ucs.jmh_to_jab(ideal_jmh) ideal_diff = ideal_jab[1:, :] - ideal_jab[:-1, :] y = floatX(np.random.uniform(-1e-8, 1e-8, size=ideal_jab.shape)) + 0.5 opt = AdamOptimizer(y, opfunc=lambda y: opfunc(y, ideal_jab, ideal_diff), proj=lambda y: np.clip(y, 0, 1)) for i in opt: if i % 100 == 0: loss_ = float(opfunc(y, ideal_jab, ideal_diff)[0]) if callback is not None: callback('Iteration {:d}, loss = {:.3f}'.format(i, loss_)) # i = 0 # y_shape = y.shape # def lbfgs_callback(y_opt): # nonlocal i # i += 1 # if i % 100 == 0: # loss_ = float(opfunc(y_opt.reshape(y_shape), ideal_jab, ideal_diff)[0]) # if callback is not None: # callback('Iteration {:d}, loss = {:.3f}'.format(i, loss_)) # y = lbfgs(y, lambda y: opfunc(y, ideal_jab, ideal_diff), callback=lbfgs_callback) done = time.perf_counter() s = ( 'Loss was {:.3f} after {:d} iterations; make_gradient() took {:.3f} seconds.' ).format( float(opfunc(y, ideal_jab, ideal_diff)[0]), i, done - start, ) return x, y, s
def SGD(eta, n_minibatch, n_epochs, valid_steps, valid_interval, valid_headwords): # Testing and Validation data are the outputs of the last inputs. print 'Calling SGD() ..' t0 = time.time() index = T.iscalar('index') x, y = T.matrices('x', 'y') print 'The length of valid_headwords is', len(valid_headwords) print 'n_epochs * n_minibatch * n_in is', n_epochs * n_minibatch * n_in / valid_interval # minibatch size needs to be added later in assert statement. assert len(valid_headwords) >= (n_epochs * n_minibatch)/ valid_interval, 'The length of valid_headwords need to be' \ 'greater than n_epochs * n_minibatch/ valid_interval' # Need to put (* minibatch_size) when support for minibatches is introduced. model = SentenceLSTMLayers(n_in_tree, n_nodes) model_output = model.output(x) valid_output = model.output(x, n_steps=valid_steps, valid=True) loss = model.loss(y, model_output) pred = model.pred(model_output) validation_pred = model.pred(valid_output) params = model.params updates = [(param, param - eta * gparam) for param, gparam in zip(params, T.grad(loss, params))] # No need to return the updates by scan function within RNNStackedLayers. Only needed when shared variables are # updated within step function. This is not the case here. See this source train_fn = theano.function([index], [loss, pred], updates=updates, givens={x: train_x[:, n_in * index: n_in * (index + 1)], y: train_y[:, index: index + 1]}) valid_fn = theano.function([x], validation_pred) # Compilation over ################# ## TRAIN MODEL ## ################# words_seen = 0 match_idx = 0 valid_idx = 0 training_loss = [] for i in range(n_epochs): print 'The current epoch number is', i t1 = time.time() for idx in range(n_minibatch): train_loss, train_pred = train_fn(idx) training_loss.append(train_loss) if (i*n_minibatch+idx) % valid_interval == 0: valid_op = [] valid_headword = valid_headwords[valid_idx] valid_input = word_vecs[valid_headword] print '---------------------------------------------------------------' assert valid_input.shape[0] == vec_dims, 'ASSERTION 1 FALSE' print 'The validation headword for validation index =', valid_idx, 'is', \ valid_headword valid_pred = valid_fn(valid_input) valid_op.append(valid_pred) print 'The validation prediction is', ' '.join([mappings_words[idx] for idx in valid_op]) print 'Time taken by this epoch is', time.time()-t1 print 'Time taken by __main__ and training is', time.time()-t0 print 'Total words seen is', words_seen print 'Total words matched is', match_idx print 'Ratio is', match_idx/words_seen
from __future__ import absolute_import, print_function, division import time import numpy import theano from theano import tensor as tt from six.moves import xrange from theano.ifelse import ifelse a, b = tt.scalars('a', 'b') x, y = tt.matrices('x', 'y') z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y)) z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y)) f_switch = theano.function([a, b, x, y], z_switch) f_lazyifelse = theano.function([a, b, x, y], z_lazy) val1 = 0. val2 = 1. big_mat1 = numpy.ones((10000, 1000)) big_mat2 = numpy.ones((10000, 1000)) n_times = 10 tic = time.clock() for i in xrange(n_times): f_switch(val1, val2, big_mat1, big_mat2) print('time spent evaluating both values %f sec' % (time.clock() - tic))
def test_clone(self): # Data for unit testing X_unit = ['abcdef', 'abcdef', 'qwerty'] X_unit = [[ord(c) for c in w] for w in X_unit] X_unit = np.array(X_unit, dtype='int8') n_alerts_unit, l_alerts_unit = X_unit.shape mask_unit = np.ones(X_unit.shape, dtype='int8') # Dimensions n_alerts = None l_alerts = None n_alphabet = 2**7 # All ASCII chars num_units = 10 # Symbolic variables input_var, input_var2 = T.imatrices('inputs', 'inputs2') mask_var, mask_var2 = T.matrices('masks', 'masks2') target_var = T.dvector('targets') # build net for testing l_in = InputLayer(shape=(n_alerts, l_alerts), input_var=input_var, name='INPUT-LAYER') l_emb = EmbeddingLayer(l_in, n_alphabet, n_alphabet, W=np.eye(n_alphabet), name='EMBEDDING-LAYER') l_emb.params[l_emb.W].remove('trainable') # Fix weight l_mask = InputLayer(shape=(n_alerts, l_alerts), input_var=mask_var, name='MASK-INPUT-LAYER') l_lstm = LSTMLayer(l_emb, num_units=num_units, name='LSTM-LAYER', mask_input=l_mask) l_slice = SliceLayer(l_lstm, indices=-1, axis=1, name="SLICE-LAYER") # Only last timestep net = l_slice # clone l_in2 = InputLayer(shape=(n_alerts, l_alerts), input_var=input_var2, name='INPUT-LAYER2') l_mask2 = InputLayer(shape=(n_alerts, l_alerts), input_var=mask_var2, name='MASK-INPUT-LAYER2') net2 = lstm_rnn_tied_weights.clone(net, l_in2, l_mask2) self.assertNotEqual(repr(net), repr(net2)) pred_unit = layers.get_output(net, inputs={ l_in: input_var, l_mask: mask_var }).eval({ input_var: X_unit, mask_var: mask_unit }) pred_unit2 = layers.get_output(net2, inputs={ l_in2: input_var2, l_mask2: mask_var2 }).eval({ input_var2: X_unit, mask_var2: mask_unit }) self.assert_array_equal(pred_unit, pred_unit2)
def test_gpu_rowwise_switch(): assert theano.config.device.startswith("gpu"), "Need to test on GPU!" data = [ # 4 x 2 (np.array([[ 0.22323515, 0.36703175], [ 0.82260513, 0.3461504 ], [ 0.82362652, 0.81626087], [ 0.95270008, 0.2226797 ]]), np.array([[ 0.36341551, 0.20102882], [ 0.24144639, 0.45237923], [ 0.39951822, 0.7348066 ], [ 0.16649647, 0.60306537]]), np.array([1, 0, 1, 1]), np.array([[ 0.22323515, 0.36703175], [ 0.24144639, 0.45237923], [ 0.82362652, 0.81626087], [ 0.95270008, 0.2226797 ]])), # 2 x 3 x 4 (np.array([[[ 0.48769062, 0.82649632, 0.2047115 , 0.41437615], [ 0.25290664, 0.87164914, 0.80968588, 0.49295084], [ 0.71438099, 0.97913502, 0.37598001, 0.76958707]], [[ 0.37605973, 0.538358 , 0.74304674, 0.84346291], [ 0.95310617, 0.61540292, 0.49881143, 0.1028554 ], [ 0.83481996, 0.90969569, 0.40410424, 0.34419989]]]), np.array([[[ 0.7289117 , 0.97323253, 0.19070121, 0.64164653], [ 0.26816493, 0.76093069, 0.95284825, 0.77350426], [ 0.55415519, 0.39431256, 0.86588665, 0.50031027]], [[ 0.1980869 , 0.7753601 , 0.26810868, 0.3628802 ], [ 0.2488143 , 0.21278388, 0.09724567, 0.58457886], [ 0.12295105, 0.75321368, 0.37258797, 0.27756972]]]), np.array([1, 0]), np.array([[[ 0.48769062, 0.82649632, 0.2047115 , 0.41437615], [ 0.25290664, 0.87164914, 0.80968588, 0.49295084], [ 0.71438099, 0.97913502, 0.37598001, 0.76958707]], [[ 0.1980869 , 0.7753601 , 0.26810868, 0.3628802 ], [ 0.2488143 , 0.21278388, 0.09724567, 0.58457886], [ 0.12295105, 0.75321368, 0.37258797, 0.27756972]]])) ] A2, B2 = T.matrices("AB") A3, B3 = T.tensor3("A"), T.tensor3("B") mask = T.ivector("mask") switch2 = T.switch(mask.dimshuffle(0, "x"), A2, B2) switch3 = T.switch(mask.dimshuffle(0, "x", "x"), A3, B3) f2 = theano.function([A2, B2, mask], switch2) f3 = theano.function([A3, B3, mask], switch3) print "Graph of 2dim switch:" theano.printing.debugprint(f2.maker.fgraph.outputs[0]) print "Graph of 3dim switch:" theano.printing.debugprint(f3.maker.fgraph.outputs[0]) for instance in data: # Retrieve appropriate function func = f2 if instance[0].ndim == 2 else f3 # Cast to float-friendly types instance = [x.astype(np.float32) if x.dtype.kind == 'f' else x.astype(np.int32) for x in instance] yield tuple([_test_gpu_rowwise_switch_inner, func] + instance)
def __init__(self, input, n_in, n_out): hidden_size = 36 batch_size = 32 self._w_h = init_weights((n_in, hidden_size)) self._b_h = init_b_weights((1, hidden_size)) # self._b_h = init_b_weights((hidden_size,)) self._w_h2 = init_weights((hidden_size, hidden_size)) self._b_h2 = init_b_weights((1, hidden_size)) # self._b_h2 = init_b_weights((hidden_size,)) # self._w_o = init_tanh(hidden_size, n_out) self._w_o = init_weights((hidden_size, n_out)) self._b_o = init_b_weights((1, n_out)) # self._b_o = init_b_weights((n_out,)) self.updateTargetModel() self._w_h_old = init_weights((n_in, hidden_size)) self._w_h2_old = init_weights((hidden_size, hidden_size)) self._w_o_old = init_tanh(hidden_size, n_out) # print ("Initial W " + str(self._w_o.get_value()) ) self._learning_rate = 0.00025 self._discount_factor = 0.99 self._weight_update_steps = 5000 self._updates = 0 # data types for model State = T.dmatrix("State") State.tag.test_value = np.random.rand(batch_size, 2) ResultState = T.dmatrix("ResultState") ResultState.tag.test_value = np.random.rand(batch_size, 2) Reward = T.col("Reward") Reward.tag.test_value = np.random.rand(batch_size, 1) Action = T.icol("Action") Action.tag.test_value = np.zeros((batch_size, 1), dtype=np.dtype('int32')) # Q_val = T.fmatrix() # model = T.nnet.sigmoid(T.dot(State, self._w) + self._b.reshape((1, -1))) # self._model = theano.function(inputs=[State], outputs=model, allow_input_downcast=True) _py_xA = self.model(State, self._w_h, self._b_h, self._w_h2, self._b_h2, self._w_o, self._b_o, 0.0, 0.0) _py_xB = self.model(State, self._w_h_old, self._b_h_old, self._w_h2_old, self._b_h2_old, self._w_o_old, self._b_o_old, 0.0, 0.0) self._y_predA = T.argmax(_py_xA, axis=1) self._y_predB = T.argmax(_py_xB, axis=1) self._q_funcA = T.mean( (self.model(State, self._w_h, self._b_h, self._w_h2, self._b_h2, self._w_o, self._b_o, 0.0, 0.0))[T.arange(batch_size), Action.reshape((-1, ))].reshape((-1, 1))) self._q_funcB = T.mean( (self.model(State, self._w_h_old, self._b_h_old, self._w_h2_old, self._b_h2_old, self._w_o_old, self._b_o_old, 0.0, 0.0))[T.arange(batch_size), Action.reshape((-1, ))].reshape((-1, 1))) # q_val = py_x # noisey_q_val = self.model(ResultState, self._w_h, self._b_h, self._w_h2, self._b_h2, self._w_o, self._b_o, 0.2, 0.5) # L1 norm ; one regularization option is to enforce L1 norm to # be small self._L1_A = (abs(self._w_h).sum() + abs(self._w_h2).sum() + abs(self._w_o).sum()) self._L1_B = (abs(self._w_h_old).sum() + abs(self._w_h2_old).sum() + abs(self._w_o_old).sum()) self._L1_reg = 0.0 self._L2_reg = 0.001 # L2 norm ; one regularization option is to enforce # L2 norm to be small self._L2_A = ((self._w_h**2).sum() + (self._w_h2**2).sum() + (self._w_o**2).sum()) self._L2_B = ((self._w_h_old**2).sum() + (self._w_h2_old**2).sum() + (self._w_o_old**2).sum()) # cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) # delta = ((Reward.reshape((-1, 1)) + (self._discount_factor * T.max(self.model(ResultState), axis=1, keepdims=True)) ) - self.model(State)) deltaA = ((Reward + (self._discount_factor * T.max(self.model( ResultState, self._w_h_old, self._b_h_old, self._w_h2_old, self._b_h2_old, self._w_o_old, self._b_o_old, 0.2, 0.5), axis=1, keepdims=True))) - (self.model(State, self._w_h, self._b_h, self._w_h2, self._b_h2, self._w_o, self._b_o, 0.2, 0.5))[T.arange(Action.shape[0]), Action.reshape((-1, ))].reshape((-1, 1))) deltaB = ( (Reward + (self._discount_factor * T.max(self.model(ResultState, self._w_h, self._b_h, self._w_h2, self._b_h2, self._w_o, self._b_o, 0.2, 0.5), axis=1, keepdims=True))) - (self.model(State, self._w_h_old, self._b_h_old, self._w_h2_old, self._b_h2_old, self._w_o_old, self._b_o_old, 0.2, 0.5))[T.arange(Action.shape[0]), Action.reshape((-1, ))].reshape((-1, 1))) # bellman_cost = T.mean( 0.5 * ((delta) ** 2 )) bellman_costA = T.mean(0.5 * ((deltaA)**2)) + ( self._L2_reg * self._L2_A) + (self._L1_reg * self._L1_A) bellman_costB = T.mean(0.5 * ((deltaB)**2)) + ( self._L2_reg * self._L2_B) + (self._L1_reg * self._L1_B) paramsA = [ self._w_h, self._b_h, self._w_h2, self._b_h2, self._w_o, self._b_o ] paramsB = [ self._w_h_old, self._b_h_old, self._w_h2_old, self._b_h2_old, self._w_o_old, self._b_o_old ] # updates = sgd(bellman_cost, params, lr=self._learning_rate) updatesA = rlTDSGD(self._q_funcA, T.mean(deltaA), paramsA, lr=self._learning_rate) updatesB = rlTDSGD(self._q_funcB, T.mean(deltaB), paramsB, lr=self._learning_rate) # updates = RMSprop(bellman_cost, params, lr=self._learning_rate) # updates = RMSpropRL(q_func, T.mean(delta), params, lr=self._learning_rate) # updates = lasagne.updates.rmsprop(bellman_cost, params, self._learning_rate, 0.95, 0.01) # updatesA = lasagne.updates.rmsprop(self._q_funcA, paramsA, self._learning_rate * -T.mean(deltaA), 0.95, 0.01) # updatesB = lasagne.updates.rmsprop(self._q_funcB, paramsB, self._learning_rate * -T.mean(deltaB), 0.95, 0.01) self._trainA = theano.function( inputs=[State, Action, Reward, ResultState], outputs=bellman_costA, updates=updatesA, allow_input_downcast=True) self._trainB = theano.function( inputs=[State, Action, Reward, ResultState], outputs=bellman_costB, updates=updatesB, allow_input_downcast=True) self._bellman_errorA = theano.function( inputs=[State, Action, Reward, ResultState], outputs=deltaA, allow_input_downcast=True) self._bellman_errorB = theano.function( inputs=[State, Action, Reward, ResultState], outputs=deltaB, allow_input_downcast=True) self._q_valuesA = theano.function(inputs=[State], outputs=_py_xA, allow_input_downcast=True) self._q_valuesB = theano.function(inputs=[State], outputs=_py_xB, allow_input_downcast=True) self._py_xA = theano.function(inputs=[State], outputs=_py_xA, allow_input_downcast=True) self._py_xB = theano.function(inputs=[State], outputs=_py_xB, allow_input_downcast=True) x, y = T.matrices('x', 'y') z_lazy = ifelse(T.gt(T.max(x, axis=1)[0], T.max(y, axis=1)[0]), T.argmax(x, axis=1), T.argmax(y, axis=1)) self._f_lazyifelse = theano.function([x, y], z_lazy, mode=theano.Mode(linker='vm'))