Exemple #1
0
    def L_op(self, inputs, outputs, output_grads):
        # Gradients computed by Op
        assert self.compute_grad and len(outputs) == 2
        gradients = outputs[1]
        assert gradients is not None

        # Gradients of original function, to compose chain rule
        grad_op = output_grads[0]
        grad_shuffle = GpuDimShuffle(
            input_broadcastable=(
                False,
                False,
                False,
            ),
            new_order=(1, 0, 2),
        )(gradients)
        grad_bdot = batched_dot(grad_op, grad_shuffle)
        grad_shuffle_reverse = GpuDimShuffle(
            input_broadcastable=(
                False,
                False,
                False,
            ),
            new_order=(1, 0, 2),
        )(grad_bdot)
        return [
            grad_shuffle_reverse,
            grad_undefined(self, 1, inputs[1]),
            grad_undefined(self, 2, inputs[2]),
        ]
Exemple #2
0
    def L_op(self, inputs, outputs, out_grads):
        x, k = inputs
        k_grad = grad_undefined(self, 1, k, "topk: k is not differentiable")

        if not (self.return_indices or self.return_values):
            x_grad = grad_undefined(
                self,
                0,
                x,
                "topk: cannot get gradient" " without both indices and values",
            )
        else:
            x_shp = shape(x)
            z_grad = out_grads[0]
            ndim = x.ndim
            axis = self.axis % ndim
            grad_indices = [
                arange(x_shp[i]).dimshuffle([0] + ["x"] * (ndim - i - 1))
                if i != axis
                else outputs[-1]
                for i in range(ndim)
            ]
            x_grad = x.zeros_like(dtype=z_grad.dtype)
            x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad)

        return [x_grad, k_grad]
Exemple #3
0
    def L_op(self, inputs, outputs, output_grads):
        assert self.compute_grad and len(outputs) == 2
        gradients = outputs[1]
        assert gradients is not None

        grad_op = output_grads[0]
        total_grad = batched_dot(grad_op,
                                 gradients.dimshuffle(1, 0,
                                                      2)).dimshuffle(1, 0, 2)
        return [
            total_grad,
            grad_undefined(self, 1, inputs[1]),
            grad_undefined(self, 2, inputs[2]),
        ]
Exemple #4
0
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]

        Wgrad = gpu_sparse_block_outer(W.zeros_like(), h, go, inputIdx,
                                       outputIdx)
        hgrad = gpu_sparse_block_gemv(h.zeros_like(), W.dimshuffle(
            (1, 0, 3, 2)), go, outputIdx, inputIdx)
        return [
            go,
            Wgrad,
            hgrad,
            grad_undefined(self, 3, inputIdx,
                           "grad of inputIdx makes no sense"),
            grad_undefined(self, 4, outputIdx,
                           "grad of outputIdx makes no sense"),
        ]
Exemple #5
0
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]

        outer_fun = SparseBlockOuter(self.inplace)
        gemv_fun = SparseBlockGemv(self.inplace)

        Wgrad = outer_fun(W.zeros_like(), h, go, inputIdx, outputIdx)
        hgrad = gemv_fun(
            h.zeros_like(), W.dimshuffle((1, 0, 3, 2)), go, outputIdx, inputIdx
        )
        return [
            go,
            Wgrad,
            hgrad,
            grad_undefined(self, 3, inputIdx, "grad of inputIdx makes no sense"),
            grad_undefined(self, 4, outputIdx, "grad of outputIdx makes no sense"),
        ]
Exemple #6
0
 def grad(self, inputs, output_grads):
     a, axis = inputs
     indices = self.__get_argsort_indices(a, axis)
     inp_grad = output_grads[0][tuple(indices)]
     axis_grad = grad_undefined(
         self,
         1,
         axis,
         "The gradient of sort is not defined "
         "with respect to the integer axes itself",
     )
     return [inp_grad, axis_grad]
Exemple #7
0
 def grad(self, inputs, output_grads):
     # No grad defined for integers.
     inp, axis = inputs
     inp_grad = inp.zeros_like()
     axis_grad = grad_undefined(
         self,
         1,
         axis,
         "argsort is not defined for non-integer axes so"
         " argsort(x, axis+eps) is undefined",
     )
     return [inp_grad, axis_grad]
Exemple #8
0
    def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.
        """
        a, val, offset = inp
        grad = cost_grad[0]
        height, width = grad.shape

        if a.dtype.startswith("complex"):
            return [None, None]

        # only valid for matrices
        wr_a = fill_diagonal_offset(grad, 0, offset)

        offset_abs = abs_(offset)
        pos_offset_flag = ge(offset, 0)
        neg_offset_flag = lt(offset, 0)
        min_wh = minimum(width, height)

        start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag
        num_of_step = minimum(
            min_wh,
            width * pos_offset_flag + height * neg_offset_flag - offset_abs)

        step = a.shape[1] + 1
        end = start + step * num_of_step

        # input of slice should be integer
        start = aet.cast(start, "int32")
        step = aet.cast(step, "int32")
        end = aet.cast(end, "int32")

        wr_val = grad.flatten()[start:end:step].sum()

        wr_offset = grad_undefined(
            self,
            2,
            offset,
            "offset is not defined for non-integer offset so"
            " fill_diagonal_offset(a,val,offset+eps) is undefined",
        )

        return [wr_a, wr_val, wr_offset]
Exemple #9
0
    def grad(self, inputs, outputs_gradients):
        a, *shape = inputs
        (dout,) = outputs_gradients

        # Determine the dimensions that were added by broadcasting
        new_dims = list(range(dout.ndim - a.ndim))

        d_wrt_a = broadcast_to(dout, shape).sum(axis=new_dims)

        # Determine the dimensions that were broadcast
        _, shape_bcast = aet.alloc_validate_shape(shape)
        bcast_sums = [
            i
            for i, (a_b, s_b) in enumerate(zip(a.broadcastable, shape_bcast[-a.ndim :]))
            if a_b and not s_b
        ]

        if bcast_sums:
            d_wrt_a = d_wrt_a.sum(axis=bcast_sums, keepdims=True)

        return [d_wrt_a] + [
            grad_undefined(self, i, shp) for i, shp in enumerate(shape, 1)
        ]
Exemple #10
0
 def grad(self, inputs, output_grads):
     return [grad_undefined(self, 0, inputs[0])]
Exemple #11
0
 def test_undefined_grad_func(self):
     # tests that function compilation catches undefined grads in the graph
     a = vector()
     b = grad_undefined(add, 0, a)
     with pytest.raises(TypeError):
         aesara.function([a], b, on_unused_input="ignore")
Exemple #12
0
 def grad(self, inputs, ograd):
     return [
         gradient.grad_undefined(
             self, k, inp, "No gradient defined through "
             "random sampling op") for k, inp in enumerate(inputs)
     ]
Exemple #13
0
 def grad(self, inp, grads):
     return [grad_undefined(self, 0, inp[0])]
Exemple #14
0
 def grad(self, inp, grads):
     return [grad_undefined(self, i, inp[i]) for i in range(2)]
Exemple #15
0
    def grad(self, inp, grads):
        x, neib_shape, neib_step = inp
        (gz, ) = grads

        if self.mode in ("valid", "ignore_borders"):
            if (neib_shape is neib_step or neib_shape == neib_step or
                    # Aesara Constant == do not compare the data
                    # the equals function do that.
                (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step)
                 )):
                return [
                    neibs2images(gz, neib_shape, x.shape, mode=self.mode),
                    grad_undefined(self, 1, neib_shape),
                    grad_undefined(self, 2, neib_step),
                ]

        if self.mode in ["valid"]:
            # Iterate over neighborhood positions, summing contributions.
            def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
                """
                Helper function that adds gradient contribution from a single
                neighborhood position i,j.
                pidx = Index of position within neighborhood.
                pgz  = Gradient of shape (batch_size*num_channels*neibs)
                prior_result  = Shape (batch_size, num_channnels, rows, cols)
                neib_shape = Number of rows, cols in a neighborhood.
                neib_step  = Step sizes from image2neibs.
                """
                nrows, ncols = neib_shape
                rstep, cstep = neib_step
                batch_size, num_channels, rows, cols = prior_result.shape
                i = pidx // ncols
                j = pidx - (i * ncols)
                # This position does not touch some img pixels in valid mode.
                result_indices = prior_result[:, :,
                                              i:(rows - nrows + i + 1):rstep,
                                              j:(cols - ncols + j + 1):cstep, ]
                newshape = ((batch_size, num_channels) +
                            ((rows - nrows) // rstep + 1, ) +
                            ((cols - ncols) // cstep + 1, ))
                return inc_subtensor(result_indices, pgz.reshape(newshape))

            indices = arange(neib_shape[0] * neib_shape[1])
            pgzs = gz.dimshuffle((1, 0))
            result, _ = aesara.scan(
                fn=pos2map,
                sequences=[indices, pgzs],
                outputs_info=zeros(x.shape),
                non_sequences=[neib_shape, neib_step],
            )
            grad_input = result[-1]
            return [
                grad_input,
                grad_undefined(self, 1, neib_shape),
                grad_undefined(self, 2, neib_step),
            ]

        return [
            grad_not_implemented(self, 0, x),
            grad_undefined(self, 1, neib_shape),
            grad_undefined(self, 2, neib_step),
        ]