예제 #1
0
    def test_multiple_outputs(self):
        m = tensor.matrix('m')
        v = tensor.vector('v')
        m_ = tensor.matrix('m_')
        v_ = tensor.vector('v_')

        mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        vval = self.rng.uniform(size=(7, )).astype(theano.config.floatX)
        m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        v_val = self.rng.uniform(size=(7, )).astype(theano.config.floatX)

        rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
        assert isinstance(rop_out1, list)
        assert len(rop_out1) == 3
        rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(rop_out2, tuple)
        assert len(rop_out2) == 3
        lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_])
        assert isinstance(lop_out1, tuple)
        assert len(lop_out1) == 2
        lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(lop_out2, list)
        assert len(lop_out2) == 2

        all_outs = []
        for o in rop_out1, rop_out2, lop_out1, lop_out2:
            all_outs.extend(o)
        f = theano.function([m, v, m_, v_], all_outs)
        f(mval, vval, m_val, v_val)
예제 #2
0
파일: net.py 프로젝트: yueyub/deepchaos
    def __init__(self,
                 input_dim,
                 n_hidden_units,
                 n_hidden_layers,
                 nonlinearity='tanh',
                 bias_sigma=0.0,
                 weight_sigma=1.25,
                 input_layer=None,
                 flip=False,
                 output_dim=None):
        #if input_layer is not None:
        #    assert input_layer.output_shape[1] == input_dim
        self.input_dim = input_dim
        self.n_hidden_units = n_hidden_units
        self.n_hidden_layers = n_hidden_layers
        self.nonlinearity = nonlinearity
        self.bias_sigma = bias_sigma
        self.weight_sigma = weight_sigma
        self.input_layer = input_layer

        if output_dim is None:
            output_dim = n_hidden_units
        self.output_dim = output_dim

        model = Sequential()
        if input_layer is not None:
            model.add(input_layer)
        for i in xrange(n_hidden_layers):
            nunits = n_hidden_units if i < n_hidden_layers - 1 else output_dim
            if flip:
                model.add(
                    Activation(nonlinearity,
                               input_shape=(input_dim, ),
                               name='_a%d' % i))
                model.add(Dense(nunits, name='_d%d' % i))
            else:
                model.add(
                    Dense(nunits, input_shape=(input_dim, ), name='_d%d' % i))
                if i < n_hidden_layers - 1 or self.output_dim == self.n_hidden_units:
                    model.add(Activation(nonlinearity, name='_a%d' % i))
                else:
                    # Theano is optimizing out the nonlinearity if it can which is breaking shit
                    # Give it something that it won't optimize out.
                    model.add(
                        Activation(lambda x: T.minimum(x, 999999.999),
                                   name='_a%d' % i))

        model.build()
        self.model = model
        self.weights = model.get_weights()
        self.dense_layers = filter(lambda x: x.name.startswith('_d'),
                                   model.layers)
        self.hs = [h.output for h in self.dense_layers]
        self.act_layers = filter(lambda x: x.name.startswith('_a'),
                                 model.layers)
        self.f_acts = self.f_jac = self.f_jac_hess = self.f_act = None

        vec = K.ones_like(self.model.input)
        self.Js = [T.Rop(h, self.model.input, vec) for h in self.hs]
        self.Hs = [T.Rop(J, self.model.input, vec) for J in self.Js]
예제 #3
0
            def Gv_step(*gv_args):
                idx = TT.cast(gv_args[0], 'int32')
                nw_inps = [x[idx * options['cbs']: \
                             (idx + 1) * options['cbs']] for x in
                           loc_inputs]
                replace = dict(zip(model.inputs, nw_inps))
                nw_outs = safe_clone(model.gc_outs, replace)
                final_results = dict(
                    zip(model.params, [None] * len(model.params)))
                for nw_out, out_operator in zip(nw_outs,
                                                model.gc_outs_operator):
                    loc_params = [
                        x for x in model.params
                        if x in theano.gof.graph.inputs([nw_out])
                    ]
                    loc_args = [
                        x for x, y in zip(args, model.params)
                        if y in theano.gof.graph.inputs([nw_out])
                    ]
                    if out_operator == 'softmax':
                        factor = const(options['cbs']) * (nw_out + eps)
                    elif out_operator == 'sigmoid':
                        factor = const(
                            options['cbs'])  # * nw_out * (1 - nw_out)
                    else:
                        factor = const(options['cbs'])
                    if out_operator != 'sigmoid':
                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                     TT.Rop(nw_out, loc_params, loc_args) /\
                                     factor)
                    else:
                        tnwout = TT.nnet.sigmoid(nw_out)
                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                         TT.Rop(nw_out, loc_params,
                                                loc_args) *\
                                         tnwout * (1 - tnwout)/ factor)

                    for lp, lgv in zip(loc_params, loc_Gvs):
                        if final_results[lp] is None:
                            final_results[lp] = lgv
                        else:
                            final_results[lp] += lgv

                Gvs = [
                    ogv + final_results[param]
                    for (ogv, param) in zip(gv_args[1:], model.params)
                ]
                return [gv_args[0] + const(1)] + Gvs

                nw_cost, nw_preactiv_out = safe_clone(
                    [model.train_cost, model.preactiv_out], replace)
                nw_gvs = TT.Lop(
                    nw_preactiv_out, model.params,
                    TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params,
                           args))

                Gvs = [ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)]
                return [gv_args[0] + const(1)] + Gvs
예제 #4
0
    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.
        """
        # TEST ROP
        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = theano.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input="ignore")

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "ROP mismatch: %s %s" % (v1, v2)

        known_fail = False
        try:
            tensor.Rop(theano.clone(y, replace={self.x: break_op(self.x)}),
                       self.x, self.v)
        except ValueError:
            known_fail = True

        # TEST LOP

        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = theano.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "LOP mismatch: %s %s" % (v1, v2)

        if known_fail:
            pytest.skip("Rop does not handle non-differentiable inputs "
                        "correctly. Bug exposed by fixing Add.grad method.")
예제 #5
0
def test_rop_lop():
    mx = tensor.matrix('mx')
    mv = tensor.matrix('mv')
    v = tensor.vector('v')
    y = matrix_inverse(mx).sum(axis=0)

    yv = tensor.Rop(y, mx, mv)
    yv2 = tensor.Rop_via_Lop(y, mx, mv)
    rop_f = function([mx, mv], [yv, yv2])

    sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
                        sequences=tensor.arange(y.shape[0]),
                        non_sequences=[y, mx, mv])
    scan_f = function([mx, mv], sy)

    rng = np.random.RandomState(utt.fetch_seed())
    vx = np.asarray(rng.randn(4, 4), theano.config.floatX)
    vv = np.asarray(rng.randn(4, 4), theano.config.floatX)

    v1 = scan_f(vx, vv)
    v2, v3 = rop_f(vx, vv)

    assert _allclose(v2, v1), ('Rop mismatch: %s %s' % (v2, v1))
    assert _allclose(v3, v1), ('Rop_via_Lop mismatch: %s %s' % (v3, v1))

    raised = False
    try:
        tensor.Rop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except ValueError:
        raised = True
    if not raised:
        raise Exception(('Op did not raised an error even though the function'
                         ' is not differentiable'))

    try:
        tensor.Rop_via_Lop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except theano.gradient.NullTypeGradError:
        raised = True
    except theano.gradient.DisconnectedInputError:
        raised = True

    if not raised:
        raise Exception((
            'Rop_via_Lop for Op did not raise an error even though the function'
            ' is not differentiable'))

    vv = np.asarray(rng.uniform(size=(4, )), theano.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)

    v1 = lop_f(vx, vv)
    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))
예제 #6
0
    def Gvs(self, *args):
        # Contribution of hid_sig
        nw_args1 = TT.Lop(
            self.hid_sig, self.params,
            TT.Rop(self.hid_sig, self.params, args) /
            ((1 - self.hid_sig) * self.hid_sig * self.mbs))
        nw_args2 = TT.Lop(
            self.hid_sftmax, self.params,
            TT.Rop(self.hid_sftmax, self.params, args) /
            (self.hid_sftmax * self.mbs))

        return [x + y for x, y in zip(nw_args1, nw_args2)]
예제 #7
0
 def compute_Gv(*args):
     (hid_sig, hid_sftmax) = self.get_hiddens()
     nw_args1 = TT.Lop(
         hid_sig, self.params,
         TT.Rop(hid_sig, self.params, args) /
         ((1 - hid_sig) * hid_sig * self.batchsize))
     nw_args2 = TT.Lop(
         hid_sftmax, self.params,
         TT.Rop(hid_sftmax, self.params, args) /
         (hid_sftmax * self.batchsize))
     fin_vals = [x + y for x, y in zip(nw_args1, nw_args2)]
     new_vals = safe_clone(fin_vals, [self.X, self.Y],
                           [self.loc_x, self.loc_y])
     return new_vals, {}
예제 #8
0
 def gauss_vect_mult(v):
     """
     Multiply a vector by the Gauss-Newton matrix JHJ'
       where J is the Jacobian between output and params and H is the Hessian between costs and output
       H should be diagonal and positive.
     Also add the ridge
     """
     Jv = T.Rop(output, params, v)
     HJv = T.Rop(T.grad(opt_cost, output), output, Jv)
     JHJv = T.Lop(output, params, HJv)
     if not isinstance(JHJv, list):
         JHJv = [JHJv]
     JHJv = [a + ridge * b for a, b in zip(JHJv, v)]
     return JHJv
예제 #9
0
    def _compute_nary_hessian_vector_product(self, gradients, arguments):
        """Returns a function accepting `2 * len(arguments)` arguments to
        compute a Hessian-vector product of a multivariate function.

        Notes
        -----
        The implementation is based on TensorFlow's '_hessian_vector_product'
        function in 'tensorflow.python.ops.gradients_impl'.
        """
        argument_types = [argument.type() for argument in arguments]
        try:
            Rop = T.Rop(gradients, arguments, argument_types)
        except NotImplementedError:
            proj = [
                T.sum(gradient * disconnected_grad(argument_type))
                for gradient, argument_type in zip(gradients, argument_types)
            ]
            proj_grad = [
                T.grad(proj_elem,
                       arguments,
                       disconnected_inputs="ignore",
                       return_disconnected="None") for proj_elem in proj
            ]
            proj_grad_transpose = map(list, zip(*proj_grad))
            proj_grad_stack = [
                T.stacklists([c for c in row if c is not None])
                for row in proj_grad_transpose
            ]
            Rop = [T.sum(stack, axis=0) for stack in proj_grad_stack]
        return self._compile_function_without_warnings(
            list(itertools.chain(arguments, argument_types)), Rop)
예제 #10
0
 def _get_updates_for(self, param, grad):
     D_tm1 = shared_like(param, 'D_ewma')
     Hv = TT.Rop(grad, param, self.rng.normal(param.shape))
     D_t = self.ewma * D_tm1 + (1 - self.ewma) * Hv * Hv
     den = TT.sqrt(D_t) + self.epsilon
     yield D_tm1, D_t
     yield param, param - grad * self.learning_rate / den
예제 #11
0
    def get_theano_fn(self, args, kwargs):
        self.trace(*args, **kwargs)

        fn_inputs, fn_outputs, graph = self.get_theano_variables(
            self.s_inputs, self.s_outputs)

        if np.any([o.ndim != 0 for o in fn_outputs]):
            raise TypeError('HessianVector requires scalar outputs.')

        # get wrt variables. If none were specified, use inputs.
        wrt = utils.as_seq(self.wrt)
        if len(wrt) == 0:
            wrt = [i for i in fn_inputs]
        else:
            wrt = [graph[self.get_symbolic(w)] for w in wrt]

        grads = utils.flat_from_doc([tt.grad(o, wrt=wrt) for o in fn_outputs])

        sym_vecs = tuple(
            tt.TensorType(dtype=w.dtype, broadcastable=[False] * w.ndim)()
            for w in wrt)
        hess_vec = tt.Rop(grads, wrt, sym_vecs)

        if len(hess_vec) == 1:
            hess_vec = hess_vec[0]

        # compile function
        fn = theano.function(inputs=fn_inputs + sym_vecs,
                             outputs=hess_vec,
                             on_unused_input='ignore')

        return fn
예제 #12
0
파일: theano_test.py 프로젝트: yochju/odl
def test_theano_operator():
    """Test the ODL->Theano operator wrapper."""
    # Define ODL operator
    matrix = np.random.rand(3, 2)
    odl_op = odl.MatrixOperator(matrix)

    # Define evaluation points
    x = [1., 2.]
    dy = [1., 2., 3.]

    # Create Theano placeholders
    x_theano = T.dvector()
    dy_theano = T.dvector()

    # Create Theano layer from odl operator
    odl_op_layer = odl.contrib.theano.TheanoOperator(odl_op)

    # Build computation graphs
    y_theano = odl_op_layer(x_theano)
    y_theano_func = theano.function([x_theano], y_theano)
    dy_theano_func = theano.function([x_theano, dy_theano],
                                     T.Rop(y_theano, x_theano, dy_theano))

    # Evaluate using Theano
    result = y_theano_func(x)
    expected = odl_op(x)

    assert all_almost_equal(result, expected)

    # Evaluate the adjoint of the derivative, called gradient in Theano
    result = dy_theano_func(x, dy)
    expected = odl_op.derivative(x).adjoint(dy)

    assert all_almost_equal(result, expected)
예제 #13
0
def hessian_rop_wrt_list(cost, wrt_list, v, g_vec=None, g_list=None):
    """
    Compute an expression for the Hessian of cost with respect to wrt_list,
    right-multiplied by a column vector v.
    """
    if wrt_list == []:
        raise Exception("wrt_list must not be empty!")

    if g_vec is None:
        if g_list is None:
            g_list = T.grad(cost, wrt_list)
        g_vec = T.concatenate(g_list, axis=0)

    # Compute the Hessian \dot vector Rop
    wrt_flat = []
    for wrt in wrt_list:
        if wrt.ndim < 1:
            wrt = T.shape_padright(wrt, n_ones=1)
        elif wrt.ndim > 1:
            wrt = T.flatten(wrt)
        wrt_flat.append(wrt)

    # Concatenate wrt into a single vector
    wrt = T.concatenate(wrt_flat, axis=0)

    # Compute the Rop
    Hv = T.Rop(g_vec, wrt, v)
    return Hv
예제 #14
0
def hessian(objective, argument):
    """
    Compute the directional derivative of the gradient
    (which is equal to the hessian multiplied by direction).
    """
    g = T.grad(objective, argument)

    # Create a new tensor A, which has the same type (i.e. same dimensionality)
    # as argument.
    A = argument.type()

    try:
        # First attempt efficient 'R-op', this directly calculates the
        # directional derivative of the gradient, rather than explicitly
        # calculating the hessian and then multiplying.
        R = T.Rop(g, argument, A)
    except NotImplementedError:
        shp = T.shape(argument)
        H = T.jacobian(g.flatten(),
                       argument).reshape(T.concatenate([shp, shp]), 2 * A.ndim)
        R = T.tensordot(H, A, A.ndim)

    try:
        hess = theano.function([argument, A], R, on_unused_input='raise')
    except theano.compile.UnusedInputError:
        warn('Theano detected unused input - suggests hessian may be zero or '
             'constant.')
        hess = theano.function([argument, A], R, on_unused_input='ignore')
    return hess
예제 #15
0
            def Gv_step(*gv_args):
                idx = TT.cast(gv_args[0], 'int32')
                nw_inps = [x[idx * options['cbs']: \
                             (idx + 1) * options['cbs']] for x in
                           loc_inputs]
                replace = dict(zip(model.inputs, nw_inps))
                nw_outs = safe_clone(model.outs, replace)
                final_results = dict(zip(model.params, [None] * len(model.params)))
                for nw_out, out_operator in zip(nw_outs, model.outs_operator):
                    loc_params = [x for x in model.params
                                  if x in theano.gof.graph.inputs([nw_out])]
                    loc_args = [x for x, y in zip(cgv, model.params)
                                if y in theano.gof.graph.inputs([nw_out])]
                    if out_operator == 'softmax':
                        factor = const(options['cbs']) * nw_out
                    elif out_operator == 'sigmoid':
                        factor = const(options['cbs']) * nw_out * (1 - nw_out)
                    else:
                        factor = const(options['cbs'])

                    loc_Gvs = TT.Lop(nw_out, loc_params,
                                     TT.Rop(nw_out, loc_params, loc_args) /\
                                     factor)

                    for lp, lgv in zip(loc_params, loc_Gvs):
                        if final_results[lp] is None:
                            final_results[lp] = lgv
                        else:
                            final_results[lp] += lgv

                Gvs = [ogv + final_results[param]
                       for (ogv, param) in zip(gv_args[1:], model.params)]
                return [gv_args[0] + const(1)] + Gvs
예제 #16
0
            def compute_Ax(x):

                # There are three ways to compute the Fisher-vector product:

                # 1. https://github.com/joschu/modular_rl/blob/master/modular_rl/trpo.py#L54
                # Use theano.gradient.disconnected_grad and call theano.tensor.grad() twice.
                # WARNING: In our case (with the attention mechanism) it is extremly slow.

                # 2. http://deeplearning.net/software/theano/tutorial/gradients.html#hessian-times-a-vector
                # Use only theano.tensor.Rop, but you will need to calculate the fixed_output outside
                # of the compiled function, because disconnected_grad will not work with Rop.

                # 3. https://github.com/pascanur/natgrad/blob/master/model_convMNIST_standard.py
                # Rop devided by output because a metric F is based on gradient of log(output).
                # Here we also split the vector of parameters. Not checked, but it may be
                # faster then supply few vectors to minresQLP.

                xs = []
                offset = 0
                for p in params:
                    shape = p.get_value().shape
                    size = np.prod(shape)
                    xs.append(x[offset:offset + size].reshape(shape))
                    offset += size

                jvp = T.Rop(new_output, params, xs) / (
                    new_output * self.batch_size * self.history + TINY)
                fvp = T.Lop(new_output, params, jvp)
                fvp = T.concatenate([g.flatten() for g in fvp])

                return [fvp], {}
예제 #17
0
    def buildObjective(self):
        """
            Construct Theano expressions for loss, gradient and gauss-newton mv-multiplication
        """
        p = T.vector(name='p')
        (bhid, bvis, W) = self.unwrap(p)
        X = T.matrix(name='X')

        y = T.nnet.sigmoid(T.dot(X, W) + bhid)
        zinner = T.dot(y, W.T) + bvis
        z = T.nnet.sigmoid(zinner)
        L = -T.sum(X * T.log(z) + (1 - X) * T.log(1 - z), axis=1)
        loss = T.sum(L) / self.n
        loss += 0.5 * self.reg * T.dot(p, p)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        g = T.grad(loss, p)

        self.obj = function([X, p], (loss, g))

        v = T.vector(name='v')

        # Essentially the Jacobian right up to before the final sigmoid is used,
        # in the form J.T*H*J, where H is the hessian of just the last nonlinearity
        Jv = T.Rop(zinner, p, v)
        HJv = T.grad(T.sum(T.grad(loss, zinner) * Jv),
                     zinner,
                     consider_constant=[Jv])
        Gp = T.grad(T.sum(HJv * zinner), p, consider_constant=[HJv, Jv])
        Gp = Gp + self.reg * v

        self.gnprod = function([X, p, v], Gp)
예제 #18
0
def build_hess_p(x, mask, ctx, cost):
    p = tensor.matrix(name='p', dtype='float32')
    ctx_grad = tensor.grad(cost, ctx)
    ctx_hess_p = tensor.Rop(ctx_grad, ctx, p)
    f_ctx_hess_p = theano.function([x, mask, ctx, p], ctx_hess_p)

    return f_ctx_hess_p
예제 #19
0
 def _compile_theano_functions(self):
     p = self.number_dense_jacob_columns
     u = tt.vector('u')
     y = self.generator(u, self.constants)
     u_rep = tt.tile(u, (p, 1))
     y_rep = self.generator(u_rep, self.constants)
     diag_jacob = tt.grad(tt.sum(y), u)[p:]
     m = tt.zeros((p, u.shape[0]))
     m = tt.set_subtensor(m[:p, :p], tt.eye(p))
     dense_jacob = tt.Rop(y_rep, u_rep, m).T
     energy = self.base_energy(u) + (
         0.5 * tt.log(nla.det(
             tt.eye(p) + (dense_jacob.T / diag_jacob**2).dot(dense_jacob)
         )) +
         tt.log(diag_jacob).sum()
     )
     energy_grad = tt.grad(energy, u)
     dy_du = tt.join(1, dense_jacob, tt.diag(diag_jacob))
     self.generator_func = _timed_func_compilation(
         [u], y, 'generator function')
     self.generator_jacob = _timed_func_compilation(
         [u], dy_du, 'generator Jacobian')
     self._energy_grad = _timed_func_compilation(
         [u], energy_grad, 'energy gradient')
     self.base_energy_func = _timed_func_compilation(
         [u], self.base_energy(u), 'base energy function')
예제 #20
0
 def check_nondiff_rop(self, y):
     """
     If your op is not differentiable(so you can't define Rop)
     test that an error is raised.
     """
     with pytest.raises(ValueError):
         tensor.Rop(y, self.x, self.v)
예제 #21
0
    def __init__(self, t_cost, t_traj_info, t_inputs, params, reg=1e-5):
        t_new_params = [
            _np2theano(p.name, p.get_value(borrow=True)) for p in params
        ]

        t_mean = t_traj_info['act_mean']
        t_mean = t_mean.reshape((-1, t_mean.shape[-1]))
        t_logstd = t_traj_info['act_logstd']
        t_logstd = t_logstd.reshape((-1, t_logstd.shape[-1]))
        t_new_mean = t_traj_info['new_act_mean']
        t_new_mean = t_new_mean.reshape((-1, t_new_mean.shape[-1]))
        t_new_logstd = t_traj_info['new_act_logstd']
        t_new_logstd = t_new_logstd.reshape((-1, t_new_logstd.shape[-1]))

        print 'Compiling cost function ... ',
        s = time()
        self.cost = theano.function(inputs=t_inputs,
                                    outputs=t_cost,
                                    on_unused_input='ignore')
        print 'finished in %f seconds' % (time() - s)

        print 'Building cost grad function ... ',
        s = time()
        _t_cost_grad = T.grad(-t_cost, wrt=params)
        print 'finished in %f seconds' % (time() - s)

        print 'Compiling cost grad function ... ',
        s = time()
        self._cost_grad = theano.function(inputs=t_inputs,
                                          outputs=[t_cost] + _t_cost_grad,
                                          on_unused_input='ignore')
        print 'finished in %f seconds' % (time() - s)

        print 'Building Hx function ... ',
        s = time()
        mu = T.concatenate([t_new_mean, t_new_logstd], axis=-1)
        Jx = sum([T.Rop(mu, p, x) for (p, x) in zip(params, t_new_params)])
        M = T.tile(T.eye(2), (mu.shape[0], 1, 1))
        Jx = Jx.reshape((Jx.shape[0], Jx.shape[1], 1))
        Jx = T.tile(Jx, (1, 1, Jx.shape[1]))
        MJx = Jx
        JMJx = [
            T.Lop(MJx, p, x, disconnected_inputs='ignore')
            for (p, x) in zip(params, t_new_params)
        ]
        Hx = [h + reg * p for (h, p) in zip(JMJx, t_new_params)]
        print 'finished in %f seconds' % (time() - s)

        # TODO: Use mask to handle  different lengths.

        print 'Compiling Hx function ...',
        s = time()
        self._constraint_Hx = theano.function(inputs=t_inputs + t_new_params,
                                              outputs=Hx,
                                              on_unused_input='ignore')

        self.constraint_Hx = lambda inputs, params: self._constraint_Hx(*(
            inputs + params))
        print 'finished in %f seconds' % (time() - s)
예제 #22
0
def gauss_newton_product(cost, p, v, s):
    Jv = T.Rop(s, p, v)
    HJv = T.grad(T.sum(T.grad(cost, s)*Jv), s,
                 consider_constant=[Jv], disconnected_inputs='ignore')
    Gv = T.grad(T.sum(HJv*s), p,
                consider_constant=[HJv, Jv], disconnected_inputs='ignore')
    Gv = map(T.as_tensor_variable, Gv)  # for CudaNdarray
    return Gv
예제 #23
0
 def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
     # This test refers to a bug reported by Jeremiah Lowin on 18th Oct
     # 2013. The bug consists when through a dot operation there is only
     # one differentiable path (i.e. there is no gradient wrt to one of
     # the inputs).
     x = tensor.arange(20.0).reshape([1, 20])
     v = theano.shared(np.ones([20]))
     d = tensor.dot(x, v).sum()
     tensor.Rop(tensor.grad(d, v), v, v)
예제 #24
0
    def test_conv(self):
        for conv_op in [conv.conv2d, conv2d]:
            for border_mode in ["valid", "full"]:
                image_shape = (2, 2, 4, 5)
                filter_shape = (2, 2, 2, 3)
                image_dim = len(image_shape)
                filter_dim = len(filter_shape)
                input = tensor.TensorType(theano.config.floatX,
                                          [False] * image_dim)(name="input")
                filters = tensor.TensorType(theano.config.floatX, [False] *
                                            filter_dim)(name="filter")
                ev_input = tensor.TensorType(theano.config.floatX, [False] *
                                             image_dim)(name="ev_input")
                ev_filters = tensor.TensorType(theano.config.floatX, [False] *
                                               filter_dim)(name="ev_filters")

                def sym_conv2d(input, filters):
                    return conv_op(input, filters, border_mode=border_mode)

                output = sym_conv2d(input, filters).flatten()
                yv = tensor.Rop(output, [input, filters],
                                [ev_input, ev_filters])
                mode = None
                if theano.config.mode == "FAST_COMPILE":
                    mode = "FAST_RUN"
                rop_f = function(
                    [input, filters, ev_input, ev_filters],
                    yv,
                    on_unused_input="ignore",
                    mode=mode,
                )
                sy, _ = theano.scan(
                    lambda i, y, x1, x2, v1, v2:
                    (tensor.grad(y[i], x1) * v1).sum() +
                    (tensor.grad(y[i], x2) * v2).sum(),
                    sequences=tensor.arange(output.shape[0]),
                    non_sequences=[
                        output, input, filters, ev_input, ev_filters
                    ],
                    mode=mode,
                )
                scan_f = function(
                    [input, filters, ev_input, ev_filters],
                    sy,
                    on_unused_input="ignore",
                    mode=mode,
                )
                dtype = theano.config.floatX
                image_data = np.random.random(image_shape).astype(dtype)
                filter_data = np.random.random(filter_shape).astype(dtype)
                ev_image_data = np.random.random(image_shape).astype(dtype)
                ev_filter_data = np.random.random(filter_shape).astype(dtype)
                v1 = rop_f(image_data, filter_data, ev_image_data,
                           ev_filter_data)
                v2 = scan_f(image_data, filter_data, ev_image_data,
                            ev_filter_data)
                assert np.allclose(v1, v2), "Rop mismatch: %s %s" % (v1, v2)
예제 #25
0
def test_rop_lop():
    mx = tensor.matrix("mx")
    mv = tensor.matrix("mv")
    v = tensor.vector("v")
    y = matrix_inverse(mx).sum(axis=0)

    yv = tensor.Rop(y, mx, mv)
    rop_f = function([mx, mv], yv)

    sy, _ = theano.scan(
        lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
        sequences=tensor.arange(y.shape[0]),
        non_sequences=[y, mx, mv],
    )
    scan_f = function([mx, mv], sy)

    rng = np.random.RandomState(utt.fetch_seed())
    vx = np.asarray(rng.randn(4, 4), theano.config.floatX)
    vv = np.asarray(rng.randn(4, 4), theano.config.floatX)

    v1 = rop_f(vx, vv)
    v2 = scan_f(vx, vv)

    assert _allclose(v1, v2), "ROP mismatch: %s %s" % (v1, v2)

    raised = False
    try:
        tensor.Rop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except ValueError:
        raised = True
    if not raised:
        raise Exception(("Op did not raised an error even though the function"
                         " is not differentiable"))

    vv = np.asarray(rng.uniform(size=(4, )), theano.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)

    v1 = lop_f(vx, vv)
    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), "LOP mismatch: %s %s" % (v1, v2)
예제 #26
0
    def test_downsample(self):
        rng = np.random.RandomState(utt.fetch_seed())
        # ws, shp
        examples = (
            ((2, ), (16, )),
            (
                (2, ),
                (
                    4,
                    16,
                ),
            ),
            (
                (2, ),
                (
                    4,
                    2,
                    16,
                ),
            ),
            ((1, 1), (4, 2, 16, 16)),
            ((2, 2), (4, 2, 16, 16)),
            ((3, 3), (4, 2, 16, 16)),
            ((3, 2), (4, 2, 16, 16)),
            ((3, 2, 2), (3, 2, 16, 16, 16)),
            ((2, 3, 2), (3, 2, 16, 16, 16)),
            ((2, 2, 3), (3, 2, 16, 16, 16)),
            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
        )

        for example, ignore_border in itertools.product(
                examples, [True, False]):
            (ws, shp) = example
            vx = rng.rand(*shp)
            vex = rng.rand(*shp)

            x = theano.shared(vx)
            ex = theano.shared(vex)

            maxpool_op = Pool(ignore_border, ndim=len(ws))
            a_pooled = maxpool_op(x, ws).flatten()
            yv = tensor.Rop(a_pooled, x, ex)
            mode = None
            if theano.config.mode == "FAST_COMPILE":
                mode = "FAST_RUN"
            rop_f = function([], yv, on_unused_input="ignore", mode=mode)
            sy, _ = theano.scan(
                lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
                sequences=tensor.arange(a_pooled.shape[0]),
                non_sequences=[a_pooled, x, ex],
                mode=mode,
            )
            scan_f = function([], sy, on_unused_input="ignore", mode=mode)
            v1 = rop_f()
            v2 = scan_f()
            assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
예제 #27
0
파일: test_rop.py 프로젝트: rsk2327/Theano
    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.

        """
        # TEST ROP
        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input='ignore')

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))
        known_fail = False
        try:
            self.check_nondiff_rop(
                theano.clone(y, replace={self.x: break_op(self.x)}))
        except AssertionError:
            known_fail = True

        # TEST LOP

        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=out_shape),
                           theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

        if known_fail:
            raise KnownFailureTest(
                "Rop doesn't handle non-differentiable "
                "inputs correctly. Bug exposed by fixing Add.grad"
                " method.")
예제 #28
0
    def test_invalid_input(self):
        success = False

        try:
            tensor.Rop(0., [tensor.matrix()], [tensor.vector()])
            success = True
        except ValueError:
            pass

        assert not success
예제 #29
0
 def check_nondiff_rop(self, y):
     """ If you op is not differentiable(so you can't define Rop)
     test that an error is raised."""
     raised = False
     try:
         tmp = tensor.Rop(y, self.x, self.v)
     except ValueError:
         raised = True
     if not raised:
         self.fail(('Op did not raised an error even though the function'
                    ' is not differentiable'))
예제 #30
0
    def __call__(self, v, cost, parameters, damp):
        # compute Gauss-Newton Matrix right-multiplied by `v`
        Jv = tt.Rop(self._s, parameters, v)
        HJv = tt.grad(
            tt.sum(tt.grad(cost, self._s) * Jv), self._s, consider_constant=[Jv]
        )
        JHJv = tt.grad(tt.sum(HJv * self._s), parameters, consider_constant=[HJv, Jv])

        # apply Tikhonov damping
        JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)]
        return JHJv