def test_multiple_outputs(self): m = tensor.matrix('m') v = tensor.vector('v') m_ = tensor.matrix('m_') v_ = tensor.vector('v_') mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX) vval = self.rng.uniform(size=(7, )).astype(theano.config.floatX) m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX) v_val = self.rng.uniform(size=(7, )).astype(theano.config.floatX) rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_]) rop_out12 = tensor.Rop_via_Lop([m, v, m + v], [m, v], [m_, v_]) assert isinstance(rop_out1, list) assert isinstance(rop_out12, list) assert len(rop_out1) == 3 assert len(rop_out12) == 3 rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_]) rop_out22 = tensor.Rop_via_Lop((m, v, m + v), [m, v], [m_, v_]) assert isinstance(rop_out2, tuple) assert isinstance(rop_out22, tuple) assert len(rop_out2) == 3 assert len(rop_out22) == 3 all_outs = [] for o in rop_out1, rop_out2: all_outs.extend(o) f = theano.function([m, v, m_, v_], all_outs) f(mval, vval, m_val, v_val) all_outs = [] for o in rop_out12, rop_out22: all_outs.extend(o) f = theano.function([m, v, m_, v_], all_outs) f(mval, vval, m_val, v_val)
def test_rop_lop(): mx = tensor.matrix('mx') mv = tensor.matrix('mv') v = tensor.vector('v') y = matrix_inverse(mx).sum(axis=0) yv = tensor.Rop(y, mx, mv) yv2 = tensor.Rop_via_Lop(y, mx, mv) rop_f = function([mx, mv], [yv, yv2]) sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sequences=tensor.arange(y.shape[0]), non_sequences=[y, mx, mv]) scan_f = function([mx, mv], sy) rng = np.random.RandomState(utt.fetch_seed()) vx = np.asarray(rng.randn(4, 4), theano.config.floatX) vv = np.asarray(rng.randn(4, 4), theano.config.floatX) v1 = scan_f(vx, vv) v2, v3 = rop_f(vx, vv) assert _allclose(v2, v1), ('Rop mismatch: %s %s' % (v2, v1)) assert _allclose(v3, v1), ('Rop_via_Lop mismatch: %s %s' % (v3, v1)) raised = False try: tensor.Rop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv) except ValueError: raised = True if not raised: raise Exception(('Op did not raised an error even though the function' ' is not differentiable')) try: tensor.Rop_via_Lop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv) except theano.gradient.NullTypeGradError: raised = True except theano.gradient.DisconnectedInputError: raised = True if not raised: raise Exception(( 'Rop_via_Lop for Op did not raise an error even though the function' ' is not differentiable')) vv = np.asarray(rng.uniform(size=(4, )), theano.config.floatX) yv = tensor.Lop(y, mx, v) lop_f = function([mx, v], yv) sy = tensor.grad((v * y).sum(), mx) scan_f = function([mx, v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert _allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))
def test_conv(self): for conv_op in [conv.conv2d, conv2d]: for border_mode in ['valid', 'full']: image_shape = (2, 2, 4, 5) filter_shape = (2, 2, 2, 3) image_dim = len(image_shape) filter_dim = len(filter_shape) input = tensor.TensorType(theano.config.floatX, [False] * image_dim)(name='input') filters = tensor.TensorType(theano.config.floatX, [False] * filter_dim)(name='filter') ev_input = tensor.TensorType(theano.config.floatX, [False] * image_dim)(name='ev_input') ev_filters = tensor.TensorType(theano.config.floatX, [False] * filter_dim)(name='ev_filters') def sym_conv2d(input, filters): return conv_op(input, filters, border_mode=border_mode) output = sym_conv2d(input, filters).flatten() yv = tensor.Rop(output, [input, filters], [ev_input, ev_filters]) yv2 = tensor.Rop_via_Lop(output, [input, filters], [ev_input, ev_filters]) mode = None if theano.config.mode == "FAST_COMPILE": mode = "FAST_RUN" rop_f = function([input, filters, ev_input, ev_filters], [yv, yv2], on_unused_input='ignore', mode=mode) sy, _ = theano.scan(lambda i, y, x1, x2, v1, v2: (tensor.grad(y[i], x1) * v1).sum() + (tensor.grad(y[i], x2) * v2).sum(), sequences=tensor.arange(output.shape[0]), non_sequences=[ output, input, filters, ev_input, ev_filters ], mode=mode) scan_f = function([input, filters, ev_input, ev_filters], sy, on_unused_input='ignore', mode=mode) dtype = theano.config.floatX image_data = np.random.random(image_shape).astype(dtype) filter_data = np.random.random(filter_shape).astype(dtype) ev_image_data = np.random.random(image_shape).astype(dtype) ev_filter_data = np.random.random(filter_shape).astype(dtype) v1, v2 = rop_f(image_data, filter_data, ev_image_data, ev_filter_data) v3 = scan_f(image_data, filter_data, ev_image_data, ev_filter_data) assert np.allclose(v1, v3), ("Rop mismatch: %s %s" % (v1, v3)) assert np.allclose(v2, v3), ("Rop_via_Lop mismatch: %s %s" % (v2, v3))
def test_invalid_input(self): success = False try: tensor.Rop(0., [tensor.matrix()], [tensor.vector()]) tensor.Rop_via_Lop(0., [tensor.matrix()], [tensor.vector()]) success = True except ValueError: pass assert not success
def test_downsample(self): rng = np.random.RandomState(utt.fetch_seed()) # ws, shp examples = ( ((2, ), (16, )), ((2, ), ( 4, 16, )), ((2, ), ( 4, 2, 16, )), ((1, 1), (4, 2, 16, 16)), ((2, 2), (4, 2, 16, 16)), ((3, 3), (4, 2, 16, 16)), ((3, 2), (4, 2, 16, 16)), ((3, 2, 2), (3, 2, 16, 16, 16)), ((2, 3, 2), (3, 2, 16, 16, 16)), ((2, 2, 3), (3, 2, 16, 16, 16)), ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)), ) for example, ignore_border in itertools.product( examples, [True, False]): (ws, shp) = example vx = rng.rand(*shp) vex = rng.rand(*shp) x = theano.shared(vx) ex = theano.shared(vex) maxpool_op = Pool(ignore_border, ndim=len(ws)) a_pooled = maxpool_op(x, ws).flatten() yv = tensor.Rop(a_pooled, x, ex) yv2 = tensor.Rop_via_Lop(a_pooled, x, ex) mode = None if theano.config.mode == "FAST_COMPILE": mode = "FAST_RUN" rop_f = function([], [yv, yv2], on_unused_input='ignore', mode=mode) sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sequences=tensor.arange(a_pooled.shape[0]), non_sequences=[a_pooled, x, ex], mode=mode) scan_f = function([], sy, on_unused_input='ignore', mode=mode) v1, v2 = rop_f() v3 = scan_f() assert np.allclose(v1, v3), ("Rop mismatch: %s %s" % (v1, v3)) assert np.allclose(v2, v3), ("Rop_via_Lop mismatch: %s %s" % (v2, v3))
def check_rop_lop(self, y, out_shape): """ As check_mat_rop_lop, except the input is self.x which is a vector. The output is still a vector. """ # TEST ROP vx = np.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX) vv = np.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX) yv = tensor.Rop(y, self.x, self.v) yv2 = tensor.Rop_via_Lop(y, self.x, self.v) rop_f = function([self.x, self.v], [yv, yv2], on_unused_input='ignore') J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.x]) sy = tensor.dot(J, self.v) scan_f = function([self.x, self.v], sy, on_unused_input='ignore') v1, v2 = rop_f(vx, vv) v3 = scan_f(vx, vv) assert np.allclose(v1, v3), ('ROP mismatch: %s %s' % (v1, v3)) assert np.allclose(v2, v3), ('ROP_VIA_LOP mismatch: %s %s' % (v2, v3)) known_fail = False try: self.check_nondiff_rop( theano.clone(y, replace={self.x: break_op(self.x)})) except AssertionError: known_fail = True # TEST LOP vx = np.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX) vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX) yv = tensor.Lop(y, self.x, self.v) lop_f = function([self.x, self.v], yv, on_unused_input='ignore') J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.x]) sy = tensor.dot(self.v, J) scan_f = function([self.x, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2)) if known_fail: raise SkipTest('Rop does not handle non-differentiable inputs ' 'correctly. Bug exposed by fixing Add.grad method.')
def check_mat_rop_lop(self, y, out_shape): """ Test the Rop/Lop when input is a matrix and the output is a vector :param y: the output variable of the op applied to self.mx :param out_shape: Used to generate a random tensor corresponding to the evaluation point of the Rop (i.e. the tensor with which you multiply the Jacobian). It should be a tuple of ints. If the Op has more than 1 input, one of them must be mx, while others must be shared variables / constants. We will test only against the input self.mx, so you must call check_mat_rop_lop/check_rop_lop for the other inputs. We expect all inputs/outputs have dtype floatX. If you want to test an Op with an output matrix, add a sum after the Op you want to test. """ vx = np.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX) vv = np.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX) yv = tensor.Rop(y, self.mx, self.mv) yv2 = tensor.Rop_via_Lop(y, self.mx, self.mv) rop_f = function([self.mx, self.mv], [yv, yv2], on_unused_input='ignore') sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.mx, self.mv]) scan_f = function([self.mx, self.mv], sy, on_unused_input='ignore') v1, v2 = rop_f(vx, vv) v3 = scan_f(vx, vv) assert np.allclose(v1, v3), ('ROP mismatch: %s %s' % (v1, v3)) assert np.allclose(v2, v3), ('ROP_VIA_LOP mismatch: %s %s' % (v2, v3)) self.check_nondiff_rop( theano.clone(y, replace={self.mx: break_op(self.mx)})) vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX) yv = tensor.Lop(y, self.mx, self.v) lop_f = function([self.mx, self.v], yv) sy = tensor.grad((self.v * y).sum(), self.mx) scan_f = function([self.mx, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))
def test_Rop_dot_bug_18Oct2013_Jeremiah(self): # This test refers to a bug reported by Jeremiah Lowin on 18th Oct # 2013. The bug consists when through a dot operation there is only # one differentiable path (i.e. there is no gradient wrt to one of # the inputs). x = tensor.arange(20.0).reshape([1, 20]) v = theano.shared(np.ones([20])) d = tensor.dot(x, v).sum() tensor.Rop(tensor.grad(d, v), v, v) # Note the technically we need the disconnected_outputs as the gradient # is independent of v tensor.Rop_via_Lop(tensor.grad(d, v), v, v, disconnected_outputs="ignore")
def __call__(self, v, cost, parameters, damp): # compute Gauss-Newton Matrix right-multiplied by `v` if self.use_rop_via_lop: Jv = T.Rop_via_Lop(self._s, parameters, v) else: Jv = T.Rop(self._s, parameters, v) HJv = T.grad(T.sum(T.grad(cost, self._s) * Jv), self._s, consider_constant=[Jv]) JHJv = T.grad(T.sum(HJv * self._s), parameters, consider_constant=[HJv, Jv]) # apply Tikhonov damping JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)] return JHJv
def test_rop(self, cls_ofg): a = T.vector() M = T.matrix() b = T.dot(a, M) op_matmul = cls_ofg([a, M], [b]) x = T.vector() W = T.matrix() y = op_matmul(x, W) du = T.vector() dv = T.Rop(y, x, du) # Fails because of the issue with OpFromGrad # has been resolved. See https://github.com/Theano/Theano/pull/6400 dv2 = T.Rop_via_Lop(y, x, du) fn = function([x, W, du], [dv, dv2]) xval = np.random.rand(16).astype(config.floatX) Wval = np.random.rand(16, 16).astype(config.floatX) duval = np.random.rand(16).astype(config.floatX) dvval = np.dot(duval, Wval) dvval2, dvval3 = fn(xval, Wval, duval) assert np.allclose(dvval2, dvval) assert np.allclose(dvval3, dvval)
def check_nondiff_rop(self, y): """ If your op is not differentiable(so you can't define Rop) test that an error is raised. """ raised = False try: tensor.Rop(y, self.x, self.v) except ValueError: raised = True if not raised: self.fail(('Op did not raise an error even though the function' ' is not differentiable')) try: tensor.Rop_via_Lop(y, self.x, self.v) except theano.gradient.NullTypeGradError: raised = True except theano.gradient.DisconnectedInputError: raised = True if not raised: self.fail(( 'Rop_via_Lop for Op did not raise an error even though the function' ' is not differentiable'))
def test_pool2d(): shps = [ (1, 12), (1, 1, 12), (1, 1, 1, 12), (1, 1, 2, 2), (1, 1, 1, 1), (1, 1, 4, 4), (1, 1, 10, 11), (1, 2, 2, 2), (3, 5, 4, 4), (25, 1, 7, 7), (1, 1, 12, 12), (1, 1, 2, 14), (1, 1, 12, 14), (1, 1, 14, 14), (1, 1, 16, 16), (1, 1, 18, 18), (1, 1, 24, 24), (1, 6, 24, 24), (10, 1, 24, 24), (10, 6, 24, 24), (30, 6, 12, 12), (30, 2, 24, 24), (30, 6, 24, 24), (10, 10, 10, 11), (1, 1, 10, 1025), (1, 1, 10, 1023), (1, 1, 1025, 10), (1, 1, 1023, 10), (3, 2, 16, 16, 16), (3, 2, 6, 6, 6, 5), (3, 2, 6, 6, 6, 5, 7), ] np.random.RandomState(utt.fetch_seed()).shuffle(shps) test_ws = (2, 2), (3, 2), (1, 1) test_st = (2, 2), (3, 2), (1, 1) test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad'] ref_mode = copy.copy(mode_without_gpu) ref_mode.check_py_code = False gpu_mode = mode_with_gpu.excluding("cudnn") gpu_mode.check_py_code = False for shp in shps: for mode, ws, st in itertools.product(test_mode, test_ws, test_st): if ws[0] > shp[-2] or ws[1] > shp[-1]: continue for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]): if pad[0] >= ws[0] or pad[1] >= ws[1]: continue if mode == 'average_exc_pad' and (pad[0] > 0 or pad[1] > 0): continue # print('test_pool2d', shp, ws, st, pad, mode, ignore_border) ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border) a = theano.shared(rand(*shp), 'a') a_pooled = ds_op(tensor.as_tensor_variable(a), ws, st, pad) f = theano.function([], a_pooled, mode=gpu_mode) f2 = theano.function([], a_pooled, mode=ref_mode) assert any([ isinstance(node.op, GpuPool) for node in f.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, Pool) for node in f2.maker.fgraph.toposort() ]) assert np.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border) a_pooled_grad = tensor.grad(a_pooled.sum(), a) g = theano.function([], a_pooled_grad, mode=gpu_mode) g2 = theano.function([], a_pooled_grad, mode=ref_mode) if mode == 'max': gop = GpuMaxPoolGrad gop2 = MaxPoolGrad else: gop = GpuAveragePoolGrad gop2 = AveragePoolGrad assert any([ isinstance(node.op, gop) for node in g.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, gop2) for node in g2.maker.fgraph.toposort() ]) assert np.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border) # test rop and grad grad for max pooling # for average pooling grad grad is just average pooling grad if mode != 'max': continue ea = theano.shared(rand(*shp), 'ea') gr = theano.function([], tensor.Rop(a_pooled, a, ea), mode=gpu_mode) gr2 = theano.function([], tensor.Rop(a_pooled, a, ea), mode=ref_mode) gr3 = theano.function([], tensor.Rop_via_Lop(a_pooled, a, ea), mode=gpu_mode) gr4 = theano.function([], tensor.Rop_via_Lop(a_pooled, a, ea), mode=ref_mode) assert any([ isinstance(node.op, GpuDownsampleFactorMaxGradGrad) for node in gr.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gr2.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gr3.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gr4.maker.fgraph.toposort() ]) assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border) assert np.allclose(gr(), gr3()), (shp, ws, st, pad, mode, ignore_border) assert np.allclose(gr(), gr4()), (shp, ws, st, pad, mode, ignore_border) ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a) gg = theano.function([], ggf, mode=gpu_mode) gg2 = theano.function([], ggf, mode=ref_mode) assert any([ isinstance(node.op, GpuDownsampleFactorMaxGradGrad) for node in gg.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gg2.maker.fgraph.toposort() ]) assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)