Example #1
0
def matmul(a: Variable, b: Variable) -> Variable:
    "Matrix multiplication."
    value = np.matmul(a.array, b.array)
    a_, b_ = enable_broadcast(a, b, matmul=True)
    local_gradients = [
        (a_,
         lambda path_value: np.matmul(path_value, np.swapaxes(b.array, -2, -1))
         ),
        (b_,
         lambda path_value: np.matmul(np.swapaxes(a.array, -2, -1), path_value)
         ),
    ]
    return Variable(value, local_gradients)
Example #2
0
 def multiply_by_locgrad(path_value):
     result = np.zeros(flatshape)
     result[np.arange(result.shape[0]), idx] = 1
     swapped_shape = list(a.shape)
     swapped_shape[axis], swapped_shape[-1] = swapped_shape[
         -1], swapped_shape[axis]
     result = result.reshape(swapped_shape)
     result = np.swapaxes(result, axis, -1)
     return path_value * result
Example #3
0
def maxax(a: Variable, axis: int) -> Variable:
    "Reduce an axis, `axis`, to its max value."
    # Note, implementation now more complicated because CuPy doesn't have put_along_axis.
    axis = axis if axis >= 0 else a.ndim + axis
    value = np.swapaxes(a.array, axis, -1)
    value = value.reshape([-1, value.shape[-1]])
    flatshape = value.shape
    idx = np.argmax(value, axis=-1)
    value = np.take_along_axis(value, idx[..., np.newaxis], -1)
    value = value.reshape(
        tuple(1 if i == axis else v for i, v in enumerate(a.shape)))

    def multiply_by_locgrad(path_value):
        result = np.zeros(flatshape)
        result[np.arange(result.shape[0]), idx] = 1
        swapped_shape = list(a.shape)
        swapped_shape[axis], swapped_shape[-1] = swapped_shape[
            -1], swapped_shape[axis]
        result = result.reshape(swapped_shape)
        result = np.swapaxes(result, axis, -1)
        return path_value * result

    local_gradients = ((a, multiply_by_locgrad), )
    return Variable(value, local_gradients)
Example #4
0
def matrix_transpose(a: Variable) -> Variable:
    "Swap the end two axes."
    value = np.swapaxes(a.array, -2, -1)
    local_gradients = [(a, lambda path_value: np.swapaxes(path_value, -2, -1))]
    return Variable(value, local_gradients)