Ejemplo n.º 1
0
    def test_multiple_outputs(self):
        m = tensor.matrix('m')
        v = tensor.vector('v')
        m_ = tensor.matrix('m_')
        v_ = tensor.vector('v_')

        mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        vval = self.rng.uniform(size=(7, )).astype(theano.config.floatX)
        m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        v_val = self.rng.uniform(size=(7, )).astype(theano.config.floatX)

        rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
        rop_out12 = tensor.Rop_via_Lop([m, v, m + v], [m, v], [m_, v_])
        assert isinstance(rop_out1, list)
        assert isinstance(rop_out12, list)
        assert len(rop_out1) == 3
        assert len(rop_out12) == 3
        rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
        rop_out22 = tensor.Rop_via_Lop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(rop_out2, tuple)
        assert isinstance(rop_out22, tuple)
        assert len(rop_out2) == 3
        assert len(rop_out22) == 3

        all_outs = []
        for o in rop_out1, rop_out2:
            all_outs.extend(o)
        f = theano.function([m, v, m_, v_], all_outs)
        f(mval, vval, m_val, v_val)

        all_outs = []
        for o in rop_out12, rop_out22:
            all_outs.extend(o)
        f = theano.function([m, v, m_, v_], all_outs)
        f(mval, vval, m_val, v_val)
Ejemplo n.º 2
0
def test_rop_lop():
    mx = tensor.matrix('mx')
    mv = tensor.matrix('mv')
    v = tensor.vector('v')
    y = matrix_inverse(mx).sum(axis=0)

    yv = tensor.Rop(y, mx, mv)
    yv2 = tensor.Rop_via_Lop(y, mx, mv)
    rop_f = function([mx, mv], [yv, yv2])

    sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
                        sequences=tensor.arange(y.shape[0]),
                        non_sequences=[y, mx, mv])
    scan_f = function([mx, mv], sy)

    rng = np.random.RandomState(utt.fetch_seed())
    vx = np.asarray(rng.randn(4, 4), theano.config.floatX)
    vv = np.asarray(rng.randn(4, 4), theano.config.floatX)

    v1 = scan_f(vx, vv)
    v2, v3 = rop_f(vx, vv)

    assert _allclose(v2, v1), ('Rop mismatch: %s %s' % (v2, v1))
    assert _allclose(v3, v1), ('Rop_via_Lop mismatch: %s %s' % (v3, v1))

    raised = False
    try:
        tensor.Rop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except ValueError:
        raised = True
    if not raised:
        raise Exception(('Op did not raised an error even though the function'
                         ' is not differentiable'))

    try:
        tensor.Rop_via_Lop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except theano.gradient.NullTypeGradError:
        raised = True
    except theano.gradient.DisconnectedInputError:
        raised = True

    if not raised:
        raise Exception((
            'Rop_via_Lop for Op did not raise an error even though the function'
            ' is not differentiable'))

    vv = np.asarray(rng.uniform(size=(4, )), theano.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)

    v1 = lop_f(vx, vv)
    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))
Ejemplo n.º 3
0
    def test_conv(self):
        for conv_op in [conv.conv2d, conv2d]:
            for border_mode in ['valid', 'full']:
                image_shape = (2, 2, 4, 5)
                filter_shape = (2, 2, 2, 3)
                image_dim = len(image_shape)
                filter_dim = len(filter_shape)
                input = tensor.TensorType(theano.config.floatX,
                                          [False] * image_dim)(name='input')
                filters = tensor.TensorType(theano.config.floatX, [False] *
                                            filter_dim)(name='filter')
                ev_input = tensor.TensorType(theano.config.floatX, [False] *
                                             image_dim)(name='ev_input')
                ev_filters = tensor.TensorType(theano.config.floatX, [False] *
                                               filter_dim)(name='ev_filters')

                def sym_conv2d(input, filters):
                    return conv_op(input, filters, border_mode=border_mode)

                output = sym_conv2d(input, filters).flatten()
                yv = tensor.Rop(output, [input, filters],
                                [ev_input, ev_filters])
                yv2 = tensor.Rop_via_Lop(output, [input, filters],
                                         [ev_input, ev_filters])
                mode = None
                if theano.config.mode == "FAST_COMPILE":
                    mode = "FAST_RUN"
                rop_f = function([input, filters, ev_input, ev_filters],
                                 [yv, yv2],
                                 on_unused_input='ignore',
                                 mode=mode)
                sy, _ = theano.scan(lambda i, y, x1, x2, v1, v2:
                                    (tensor.grad(y[i], x1) * v1).sum() +
                                    (tensor.grad(y[i], x2) * v2).sum(),
                                    sequences=tensor.arange(output.shape[0]),
                                    non_sequences=[
                                        output, input, filters, ev_input,
                                        ev_filters
                                    ],
                                    mode=mode)
                scan_f = function([input, filters, ev_input, ev_filters],
                                  sy,
                                  on_unused_input='ignore',
                                  mode=mode)
                dtype = theano.config.floatX
                image_data = np.random.random(image_shape).astype(dtype)
                filter_data = np.random.random(filter_shape).astype(dtype)
                ev_image_data = np.random.random(image_shape).astype(dtype)
                ev_filter_data = np.random.random(filter_shape).astype(dtype)
                v1, v2 = rop_f(image_data, filter_data, ev_image_data,
                               ev_filter_data)
                v3 = scan_f(image_data, filter_data, ev_image_data,
                            ev_filter_data)
                assert np.allclose(v1, v3), ("Rop mismatch: %s %s" % (v1, v3))
                assert np.allclose(v2, v3), ("Rop_via_Lop mismatch: %s %s" %
                                             (v2, v3))
Ejemplo n.º 4
0
    def test_invalid_input(self):
        success = False

        try:
            tensor.Rop(0., [tensor.matrix()], [tensor.vector()])
            tensor.Rop_via_Lop(0., [tensor.matrix()], [tensor.vector()])
            success = True
        except ValueError:
            pass

        assert not success
Ejemplo n.º 5
0
    def test_downsample(self):
        rng = np.random.RandomState(utt.fetch_seed())
        # ws, shp
        examples = (
            ((2, ), (16, )),
            ((2, ), (
                4,
                16,
            )),
            ((2, ), (
                4,
                2,
                16,
            )),
            ((1, 1), (4, 2, 16, 16)),
            ((2, 2), (4, 2, 16, 16)),
            ((3, 3), (4, 2, 16, 16)),
            ((3, 2), (4, 2, 16, 16)),
            ((3, 2, 2), (3, 2, 16, 16, 16)),
            ((2, 3, 2), (3, 2, 16, 16, 16)),
            ((2, 2, 3), (3, 2, 16, 16, 16)),
            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
        )

        for example, ignore_border in itertools.product(
                examples, [True, False]):
            (ws, shp) = example
            vx = rng.rand(*shp)
            vex = rng.rand(*shp)

            x = theano.shared(vx)
            ex = theano.shared(vex)

            maxpool_op = Pool(ignore_border, ndim=len(ws))
            a_pooled = maxpool_op(x, ws).flatten()
            yv = tensor.Rop(a_pooled, x, ex)
            yv2 = tensor.Rop_via_Lop(a_pooled, x, ex)
            mode = None
            if theano.config.mode == "FAST_COMPILE":
                mode = "FAST_RUN"
            rop_f = function([], [yv, yv2],
                             on_unused_input='ignore',
                             mode=mode)
            sy, _ = theano.scan(lambda i, y, x, v:
                                (tensor.grad(y[i], x) * v).sum(),
                                sequences=tensor.arange(a_pooled.shape[0]),
                                non_sequences=[a_pooled, x, ex],
                                mode=mode)
            scan_f = function([], sy, on_unused_input='ignore', mode=mode)
            v1, v2 = rop_f()
            v3 = scan_f()
            assert np.allclose(v1, v3), ("Rop mismatch: %s %s" % (v1, v3))
            assert np.allclose(v2,
                               v3), ("Rop_via_Lop mismatch: %s %s" % (v2, v3))
Ejemplo n.º 6
0
    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.
        """
        # TEST ROP
        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        yv2 = tensor.Rop_via_Lop(y, self.x, self.v)
        rop_f = function([self.x, self.v], [yv, yv2], on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input='ignore')

        v1, v2 = rop_f(vx, vv)
        v3 = scan_f(vx, vv)
        assert np.allclose(v1, v3), ('ROP mismatch: %s %s' % (v1, v3))
        assert np.allclose(v2, v3), ('ROP_VIA_LOP mismatch: %s %s' % (v2, v3))
        known_fail = False
        try:
            self.check_nondiff_rop(
                theano.clone(y, replace={self.x: break_op(self.x)}))
        except AssertionError:
            known_fail = True

        # TEST LOP

        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

        if known_fail:
            raise SkipTest('Rop does not handle non-differentiable inputs '
                           'correctly. Bug exposed by fixing Add.grad method.')
Ejemplo n.º 7
0
    def check_mat_rop_lop(self, y, out_shape):
        """
        Test the Rop/Lop when input is a matrix and the output is a vector

        :param y: the output variable of the op applied to self.mx
        :param out_shape: Used to generate a random tensor
                          corresponding to the evaluation point of the Rop
                          (i.e. the tensor with which you multiply the
                          Jacobian). It should be a tuple of ints.

        If the Op has more than 1 input, one of them must be mx, while
        others must be shared variables / constants. We will test only
        against the input self.mx, so you must call
        check_mat_rop_lop/check_rop_lop for the other inputs.

        We expect all inputs/outputs have dtype floatX.

        If you want to test an Op with an output matrix, add a sum
        after the Op you want to test.
        """
        vx = np.asarray(self.rng.uniform(size=self.mat_in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.mat_in_shape),
                        theano.config.floatX)
        yv = tensor.Rop(y, self.mx, self.mv)
        yv2 = tensor.Rop_via_Lop(y, self.mx, self.mv)
        rop_f = function([self.mx, self.mv], [yv, yv2],
                         on_unused_input='ignore')
        sy, _ = theano.scan(lambda i, y, x, v:
                            (tensor.grad(y[i], x) * v).sum(),
                            sequences=tensor.arange(y.shape[0]),
                            non_sequences=[y, self.mx, self.mv])
        scan_f = function([self.mx, self.mv], sy, on_unused_input='ignore')

        v1, v2 = rop_f(vx, vv)
        v3 = scan_f(vx, vv)

        assert np.allclose(v1, v3), ('ROP mismatch: %s %s' % (v1, v3))
        assert np.allclose(v2, v3), ('ROP_VIA_LOP mismatch: %s %s' % (v2, v3))

        self.check_nondiff_rop(
            theano.clone(y, replace={self.mx: break_op(self.mx)}))

        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
        yv = tensor.Lop(y, self.mx, self.v)
        lop_f = function([self.mx, self.v], yv)

        sy = tensor.grad((self.v * y).sum(), self.mx)
        scan_f = function([self.mx, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))
Ejemplo n.º 8
0
 def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
     # This test refers to a bug reported by Jeremiah Lowin on 18th Oct
     # 2013. The bug consists when through a dot operation there is only
     # one differentiable path (i.e. there is no gradient wrt to one of
     # the inputs).
     x = tensor.arange(20.0).reshape([1, 20])
     v = theano.shared(np.ones([20]))
     d = tensor.dot(x, v).sum()
     tensor.Rop(tensor.grad(d, v), v, v)
     # Note the technically we need the disconnected_outputs as the gradient
     # is independent of v
     tensor.Rop_via_Lop(tensor.grad(d, v),
                        v,
                        v,
                        disconnected_outputs="ignore")
Ejemplo n.º 9
0
    def __call__(self, v, cost, parameters, damp):
        # compute Gauss-Newton Matrix right-multiplied by `v`
        if self.use_rop_via_lop:
            Jv = T.Rop_via_Lop(self._s, parameters, v)
        else:
            Jv = T.Rop(self._s, parameters, v)
        HJv = T.grad(T.sum(T.grad(cost, self._s) * Jv),
                     self._s,
                     consider_constant=[Jv])
        JHJv = T.grad(T.sum(HJv * self._s),
                      parameters,
                      consider_constant=[HJv, Jv])

        # apply Tikhonov damping
        JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)]
        return JHJv
Ejemplo n.º 10
0
 def test_rop(self, cls_ofg):
     a = T.vector()
     M = T.matrix()
     b = T.dot(a, M)
     op_matmul = cls_ofg([a, M], [b])
     x = T.vector()
     W = T.matrix()
     y = op_matmul(x, W)
     du = T.vector()
     dv = T.Rop(y, x, du)
     # Fails because of the issue with OpFromGrad
     # has been resolved. See https://github.com/Theano/Theano/pull/6400
     dv2 = T.Rop_via_Lop(y, x, du)
     fn = function([x, W, du], [dv, dv2])
     xval = np.random.rand(16).astype(config.floatX)
     Wval = np.random.rand(16, 16).astype(config.floatX)
     duval = np.random.rand(16).astype(config.floatX)
     dvval = np.dot(duval, Wval)
     dvval2, dvval3 = fn(xval, Wval, duval)
     assert np.allclose(dvval2, dvval)
     assert np.allclose(dvval3, dvval)
Ejemplo n.º 11
0
    def check_nondiff_rop(self, y):
        """
        If your op is not differentiable(so you can't define Rop)
        test that an error is raised.
        """
        raised = False
        try:
            tensor.Rop(y, self.x, self.v)
        except ValueError:
            raised = True
        if not raised:
            self.fail(('Op did not raise an error even though the function'
                       ' is not differentiable'))
        try:
            tensor.Rop_via_Lop(y, self.x, self.v)
        except theano.gradient.NullTypeGradError:
            raised = True
        except theano.gradient.DisconnectedInputError:
            raised = True

        if not raised:
            self.fail((
                'Rop_via_Lop for Op did not raise an error even though the function'
                ' is not differentiable'))
Ejemplo n.º 12
0
def test_pool2d():
    shps = [
        (1, 12),
        (1, 1, 12),
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
        (3, 2, 16, 16, 16),
        (3, 2, 6, 6, 6, 5),
        (3, 2, 6, 6, 6, 5, 7),
    ]

    np.random.RandomState(utt.fetch_seed()).shuffle(shps)
    test_ws = (2, 2), (3, 2), (1, 1)
    test_st = (2, 2), (3, 2), (1, 1)
    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']

    ref_mode = copy.copy(mode_without_gpu)
    ref_mode.check_py_code = False
    gpu_mode = mode_with_gpu.excluding("cudnn")
    gpu_mode.check_py_code = False

    for shp in shps:
        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
            if ws[0] > shp[-2] or ws[1] > shp[-1]:
                continue
            for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]):
                if pad[0] >= ws[0] or pad[1] >= ws[1]:
                    continue
                if mode == 'average_exc_pad' and (pad[0] > 0 or pad[1] > 0):
                    continue
                # print('test_pool2d', shp, ws, st, pad, mode, ignore_border)
                ds_op = Pool(ndim=len(ws),
                             mode=mode,
                             ignore_border=ignore_border)

                a = theano.shared(rand(*shp), 'a')
                a_pooled = ds_op(tensor.as_tensor_variable(a), ws, st, pad)

                f = theano.function([], a_pooled, mode=gpu_mode)
                f2 = theano.function([], a_pooled, mode=ref_mode)

                assert any([
                    isinstance(node.op, GpuPool)
                    for node in f.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, Pool)
                    for node in f2.maker.fgraph.toposort()
                ])
                assert np.allclose(f(), f2()), (shp, ws, st, pad, mode,
                                                ignore_border)

                a_pooled_grad = tensor.grad(a_pooled.sum(), a)

                g = theano.function([], a_pooled_grad, mode=gpu_mode)
                g2 = theano.function([], a_pooled_grad, mode=ref_mode)

                if mode == 'max':
                    gop = GpuMaxPoolGrad
                    gop2 = MaxPoolGrad
                else:
                    gop = GpuAveragePoolGrad
                    gop2 = AveragePoolGrad
                assert any([
                    isinstance(node.op, gop)
                    for node in g.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, gop2)
                    for node in g2.maker.fgraph.toposort()
                ])

                assert np.allclose(g(), g2()), (shp, ws, st, pad, mode,
                                                ignore_border)

                # test rop and grad grad for max pooling
                # for average pooling grad grad is just average pooling grad
                if mode != 'max':
                    continue

                ea = theano.shared(rand(*shp), 'ea')

                gr = theano.function([],
                                     tensor.Rop(a_pooled, a, ea),
                                     mode=gpu_mode)
                gr2 = theano.function([],
                                      tensor.Rop(a_pooled, a, ea),
                                      mode=ref_mode)
                gr3 = theano.function([],
                                      tensor.Rop_via_Lop(a_pooled, a, ea),
                                      mode=gpu_mode)
                gr4 = theano.function([],
                                      tensor.Rop_via_Lop(a_pooled, a, ea),
                                      mode=ref_mode)

                assert any([
                    isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
                    for node in gr.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gr2.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gr3.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gr4.maker.fgraph.toposort()
                ])
                assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode,
                                                  ignore_border)
                assert np.allclose(gr(), gr3()), (shp, ws, st, pad, mode,
                                                  ignore_border)
                assert np.allclose(gr(), gr4()), (shp, ws, st, pad, mode,
                                                  ignore_border)

                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)

                gg = theano.function([], ggf, mode=gpu_mode)
                gg2 = theano.function([], ggf, mode=ref_mode)

                assert any([
                    isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
                    for node in gg.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gg2.maker.fgraph.toposort()
                ])
                assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode,
                                                  ignore_border)