Example #1
0
    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.
        """
        # TEST ROP
        vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = aesara.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input="ignore")

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2)

        try:
            tensor.Rop(
                aesara.clone(y, replace={self.x: break_op(self.x)}), self.x, self.v
            )
        except ValueError:
            pytest.skip(
                "Rop does not handle non-differentiable inputs "
                "correctly. Bug exposed by fixing Add.grad method."
            )

        vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX)
        vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = aesara.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
Example #2
0
def inline_ofg_expansion(node):
    """
    This optimization expands internal graph of OpFromGraph.
    Only performed if node.op.is_inline == True
    Doing so can improve optimization at the cost of compilation speed.
    """
    op = node.op
    if not isinstance(op, OpFromGraph):
        return False
    if not op.is_inline:
        return False
    return aesara.clone(
        op.local_outputs,
        {u: v
         for u, v in zip(node.op.local_inputs, node.inputs)})
Example #3
0
    def check_mat_rop_lop(self, y, out_shape):
        """
        Test the Rop/Lop when input is a matrix and the output is a vector

        :param y: the output variable of the op applied to self.mx
        :param out_shape: Used to generate a random tensor
                          corresponding to the evaluation point of the Rop
                          (i.e. the tensor with which you multiply the
                          Jacobian). It should be a tuple of ints.

        If the Op has more than 1 input, one of them must be mx, while
        others must be shared variables / constants. We will test only
        against the input self.mx, so you must call
        check_mat_rop_lop/check_rop_lop for the other inputs.

        We expect all inputs/outputs have dtype floatX.

        If you want to test an Op with an output matrix, add a sum
        after the Op you want to test.
        """
        vx = np.asarray(self.rng.uniform(size=self.mat_in_shape), aesara.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.mat_in_shape), aesara.config.floatX)
        yv = tensor.Rop(y, self.mx, self.mv)
        rop_f = function([self.mx, self.mv], yv, on_unused_input="ignore")
        sy, _ = aesara.scan(
            lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.mx, self.mv],
        )
        scan_f = function([self.mx, self.mv], sy, on_unused_input="ignore")

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)

        assert np.allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2)

        self.check_nondiff_rop(aesara.clone(y, replace={self.mx: break_op(self.mx)}))

        vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX)
        yv = tensor.Lop(y, self.mx, self.v)
        lop_f = function([self.mx, self.v], yv)

        sy = tensor.grad((self.v * y).sum(), self.mx)
        scan_f = function([self.mx, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
Example #4
0
def test_rop_lop():
    mx = tensor.matrix("mx")
    mv = tensor.matrix("mv")
    v = tensor.vector("v")
    y = matrix_inverse(mx).sum(axis=0)

    yv = tensor.Rop(y, mx, mv)
    rop_f = function([mx, mv], yv)

    sy, _ = aesara.scan(
        lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
        sequences=tensor.arange(y.shape[0]),
        non_sequences=[y, mx, mv],
    )
    scan_f = function([mx, mv], sy)

    rng = np.random.RandomState(utt.fetch_seed())
    vx = np.asarray(rng.randn(4, 4), aesara.config.floatX)
    vv = np.asarray(rng.randn(4, 4), aesara.config.floatX)

    v1 = rop_f(vx, vv)
    v2 = scan_f(vx, vv)

    assert _allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2)

    raised = False
    try:
        tensor.Rop(aesara.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except ValueError:
        raised = True
    if not raised:
        raise Exception("Op did not raised an error even though the function"
                        " is not differentiable")

    vv = np.asarray(rng.uniform(size=(4, )), aesara.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)

    v1 = lop_f(vx, vv)
    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
Example #5
0
    def infer_shape(self, node, shapes):
        out_shp = aesara.scan_module.scan_utils.infer_shape(
            self.local_outputs, self.local_inputs, shapes)

        # Clone the output shape so that shape are computed from outer inputs.
        # Note:
        # Here we can do it more simply like:
        #      ret = [aesara.clone(shp, replace=repl) for shp in out_shp]
        # But  doing it multiple time could duplicate common subgraph between
        # each shape call. Aesara optimizer will clean this up later, but this
        # will ask extra work to the optimizer.
        repl = dict(zip(self.local_inputs, node.inputs))
        cloned = aesara.clone(reduce(tuple.__add__, out_shp), replace=repl)
        ret = []
        used = 0
        for i in range(len(out_shp)):
            nb = len(out_shp[i])
            ret.append(cloned[used:used + nb])
            used += nb

        return ret
    def inner_replacer(graph):
        new_graph = replacer(graph)

        other_inputs = []
        constants = []
        for input_ in gof.graph.inputs([new_graph]):
            if isinstance(input_, gof.Variable):
                if isinstance(input_, gof.Constant):
                    constants.append(input_)
                else:
                    other_inputs.append(input_)

        # foreign inputs are fgraph inputs and shared variables that we need
        # to access through inner inputs
        foreign_inputs = list(set(other_inputs) - set(outer_to_inner.values()))

        # skip further processing if there is nothing to do
        if not constants and not foreign_inputs:
            return new_graph

        replacements = []

        # constants just need to be replaced by copies that the inner
        # `fg` can take ownership of
        for input_ in constants:
            new_input = input_.clone()
            new_input.name = "%s_copied" % new_input.name
            replacements.append((input_, new_input))

        for outer_input in foreign_inputs:
            if getattr(outer_input, "update", False):
                # when aesara.scan() constructs a scan node, it detects
                # shared variables with updates and returns these updates
                # to the user.  we need to do the same thing for every new
                # use of such a variable that is introduced.  it's hard to
                # do that at this point.
                # shared variables with updates inside the inner graph of
                # OpFromGraph are not supported at all, so we don't support
                # introducing those either.
                raise NotImplementedError(
                    "Replacement introduces shared variable %s "
                    "which has an update associated with it into "
                    "the inner graph of %s. This is not currently "
                    "supported." % (outer_input, containing_op))
            # if this foreign input is not already available
            # as an inner input, connect it through a new
            # inner input
            if outer_input not in outer_to_inner.keys():
                inner_input = scan_utils.safe_new(outer_input, tag="_copy")
                outer_to_inner[outer_input] = inner_input
                extra_inner_inputs.append(inner_input)
                extra_outer_inputs.append(outer_input)
                # the inner FunctionGraph wants to know its inputs
                # beforehand, but we don't always know.  so add them
                # as we discover them.
                graph.owner.fgraph.add_input(inner_input)

        replacements.extend(outer_to_inner.items())

        (new_graph, ) = aesara.clone([new_graph],
                                     share_inputs=True,
                                     replace=replacements)
        return new_graph
Example #7
0
    def _run(self, num_features, num_timesteps, batch_size, mode):
        # determine shapes of inputs and targets depending on the batch size
        if batch_size == 1:
            inputs_size = (num_timesteps, num_features)
            targets_size = (num_timesteps, 1)
        else:
            inputs_size = (num_timesteps, batch_size, num_features)
            targets_size = (num_timesteps, batch_size, 1)

        # make inputs and targets shared variables
        inputs = aesara.shared(
            self.rng.uniform(size=inputs_size).astype(config.floatX), borrow=True
        )
        targets = aesara.shared(
            self.rng.uniform(size=targets_size).astype(config.floatX), borrow=True
        )

        # create symbolic inputs and targets variables
        if batch_size == 1:
            x = tt.matrix("inputs")
            t = tt.matrix("targets")
        else:
            x = tt.tensor3("inputs")
            t = tt.tensor3("inputs")
        x.tag.test_value = inputs.get_value(borrow=True)
        t.tag.test_value = targets.get_value(borrow=True)

        # create a set of parameters for a simple RNN
        W_xh = aesara.shared(
            (0.01 * self.rng.uniform(size=(num_features, 10))).astype(config.floatX),
            borrow=True,
        )
        W_hh = aesara.shared(
            (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX), borrow=True
        )
        W_hy = aesara.shared(
            (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX), borrow=True
        )
        b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True)
        b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True)

        params = [W_xh, W_hh, W_hy, b_h, b_y]

        # recurrent function
        def step(x_t, h_tm1):
            h = tt.tanh(tt.dot(h_tm1, W_hh) + tt.dot(x_t, W_xh) + b_h)
            return h

        # build recurrent graph
        if batch_size == 1:
            h_0 = tt.alloc(0.0, 10).astype(config.floatX)
        else:
            h_0 = tt.alloc(0.0, batch_size, 10).astype(config.floatX)
        h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0])
        # network output
        y = tt.dot(h, W_hy) + b_y

        # Create Gauss-Newton-Matrix object. Not really of any use here, but I
        # need it for Hessian-Free optimization.
        gn = GaussNewtonMatrix(y)

        # compute MSE
        cost = ((t - y) ** 2).sum(axis=1).mean()

        # Compute the cost at some other point in the parameter
        # space. Not really of any use here, but this is how I do it
        # during certain iterations of CG in the HF algorithm. There,
        # it's in fact `pi + current update proposal`.  For simplicity,
        # I just multiply by 2 here.
        cost_ = aesara.clone(cost, replace={pi: 2 * pi for pi in params})

        # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG,
        # but for simplicity, I just take the parameters vector because it's
        # already there.
        Gv = gn(v=params, cost=cost, parameters=params, damp=tt.constant(1.0))

        # compile Aesara function
        f = aesara.function([], [cost_] + Gv, givens={x: inputs, t: targets}, mode=mode)
        # execute
        f()